dev On the process of "update" statement execution, interruption happened. After that, the "select" statement failed. Sometimes the "select" statement will recover to succeed, but sometimes it can not recover. The error infomation as following: "scala> cc.sql("select * from qqdata2.oc_indextest where id = '1999100000'").show(100,false); java.lang.NullPointerException at org.apache.carbondata.hadoop.CarbonInputFormat.getDataBlocksOfSegment(CarbonInputFormat.java:381) at org.apache.carbondata.hadoop.CarbonInputFormat.getSplits(CarbonInputFormat.java:316) at org.apache.carbondata.hadoop.CarbonInputFormat.getSplits(CarbonInputFormat.java:262) at org.apache.carbondata.spark.rdd.CarbonScanRDD.getPartitions(CarbonScanRDD.scala:81) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) at scala.Option.getOrElse(Option.scala:121) at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) at scala.Option.getOrElse(Option.scala:121) at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) at scala.Option.getOrElse(Option.scala:121) at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:311) at org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:38) at org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$Dataset$$execute$1$1.apply(Dataset.scala:2378) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:57) at org.apache.spark.sql.Dataset.withNewExecutionId(Dataset.scala:2780) at org.apache.spark.sql.Dataset.org $apache$spark$sql$Dataset$$execute$1(Dataset.scala:2377) at org.apache.spark.sql.Dataset.org $apache$spark$sql$Dataset$$collect(Dataset.scala:2384) at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2120) at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2119) at org.apache.spark.sql.Dataset.withTypedCallback(Dataset.scala:2810) at org.apache.spark.sql.Dataset.head(Dataset.scala:2119) at org.apache.spark.sql.Dataset.take(Dataset.scala:2334) at org.apache.spark.sql.Dataset.showString(Dataset.scala:248) at org.apache.spark.sql.Dataset.show(Dataset.scala:640) ... 50 elided" yixu2001
|
Administrator
|
Hi
Can you provide the full script? what is your update script? how to reproduce ? Regards Liang yixu2001 wrote > dev > > On the process of "update" statement execution, interruption happened. > After that, the "select" statement failed. > Sometimes the "select" statement will recover to succeed, but sometimes it > can not recover. > > The error infomation as following: > > "scala> cc.sql("select * from qqdata2.oc_indextest where id = > '1999100000'").show(100,false); > java.lang.NullPointerException > at > org.apache.carbondata.hadoop.CarbonInputFormat.getDataBlocksOfSegment(CarbonInputFormat.java:381) > at > org.apache.carbondata.hadoop.CarbonInputFormat.getSplits(CarbonInputFormat.java:316) > at > org.apache.carbondata.hadoop.CarbonInputFormat.getSplits(CarbonInputFormat.java:262) > at > org.apache.carbondata.spark.rdd.CarbonScanRDD.getPartitions(CarbonScanRDD.scala:81) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:311) > at > org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:38) > at > org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$Dataset$$execute$1$1.apply(Dataset.scala:2378) > at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:57) > at org.apache.spark.sql.Dataset.withNewExecutionId(Dataset.scala:2780) > at org.apache.spark.sql.Dataset.org > > $apache$spark$sql$Dataset$$execute$1(Dataset.scala:2377) > at org.apache.spark.sql.Dataset.org > > $apache$spark$sql$Dataset$$collect(Dataset.scala:2384) > at > org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2120) > at > org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2119) > at org.apache.spark.sql.Dataset.withTypedCallback(Dataset.scala:2810) > at org.apache.spark.sql.Dataset.head(Dataset.scala:2119) > at org.apache.spark.sql.Dataset.take(Dataset.scala:2334) > at org.apache.spark.sql.Dataset.showString(Dataset.scala:248) > at org.apache.spark.sql.Dataset.show(Dataset.scala:640) > ... 50 elided" > > > > yixu2001 -- Sent from: http://apache-carbondata-dev-mailing-list-archive.1130556.n5.nabble.com/ |
dev
step 1:create table: cc.sql("CREATE TABLE IF NOT EXISTS qqdata2.oc_indextest (id STRING, CUST_ORDER_ID STRING,ORDER_ITEM_IDATTR_ID STRING,TTR_VALUE_IDATTR_VALUE STRING,CREATE_DATE STRING,UPDATE_DATE STRING,STATUS_CD STRING,STATUS_DATE STRING,AREA_ID STRING,REGION_CD STRING,UPDATE_STAFF STRING,CREATE_STAFF STRING,SHARDING_ID STRING,ORDER_ATTR_ID STRING) STORED BY 'carbondata' ") step 2: insert data: cc.sql("insert into qqdata2.oc_indextest select * from qqdata2.h_indextest").show(100,false); step 3: calling a scala program to update the table continuously. code segment as following lazy val list = (start to end).sliding(step, step).toList for (i <- 0 to list.length - 1) { lazy val temp1 = list(i).mkString("\"", "\",\"", "\"") lazy val msg = rnd.nextInt(seed) % (seed) + Str1 + rnd.nextInt(seed) % (seed) + Str1 + (i + 4) + Str1 + "20160709" + rnd.nextInt(seed) % (seed) + Str1 + "20160710" + rnd.nextInt(seed) % (seed) + Str1 + "1299" + Str1 + "20160711" + rnd.nextInt(seed) % (seed) + Str1 + "10" + Str1 + "73" + Str1 + "302063" + Str1 + "302064" + Str1 + rnd.nextInt(seed) % (seed) + Str1 + "44444444" cc.sql(s"update qqdata2.$table set(CUST_ORDER_ID,ORDER_ITEM_IDATTR_ID,TTR_VALUE_IDATTR_VALUE,CREATE_DATE,UPDATE_DATE,STATUS_CD,STATUS_DATE,AREA_ID,REGION_CD,UPDATE_STAFF,CREATE_STAFF,SHARDING_ID,ORDER_ATTR_ID)=($msg) where id in ($temp1)").show cc.sql(s"select * from qqdata2.$table a,qqdata2.h_indextest1 b where a.id=b.id").show } step 4: interrupt the update program in the process of updating. step 5:query data cc.sql("select * from qqdata2.oc_indextest where id = '1999100000'").show(100,false); Sometimes the query failed. yixu2001 From: Liang Chen Date: 2017-10-16 19:31 To: dev Subject: Re: Query failed after "update" statement interruptted Hi Can you provide the full script? what is your update script? how to reproduce ? Regards Liang yixu2001 wrote > dev > > On the process of "update" statement execution, interruption happened. > After that, the "select" statement failed. > Sometimes the "select" statement will recover to succeed, but sometimes it > can not recover. > > The error infomation as following: > > "scala> cc.sql("select * from qqdata2.oc_indextest where id = > '1999100000'").show(100,false); > java.lang.NullPointerException > at > org.apache.carbondata.hadoop.CarbonInputFormat.getDataBlocksOfSegment(CarbonInputFormat.java:381) > at > org.apache.carbondata.hadoop.CarbonInputFormat.getSplits(CarbonInputFormat.java:316) > at > org.apache.carbondata.hadoop.CarbonInputFormat.getSplits(CarbonInputFormat.java:262) > at > org.apache.carbondata.spark.rdd.CarbonScanRDD.getPartitions(CarbonScanRDD.scala:81) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:311) > at > org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:38) > at > org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$Dataset$$execute$1$1.apply(Dataset.scala:2378) > at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:57) > at org.apache.spark.sql.Dataset.withNewExecutionId(Dataset.scala:2780) > at org.apache.spark.sql.Dataset.org > > $apache$spark$sql$Dataset$$execute$1(Dataset.scala:2377) > at org.apache.spark.sql.Dataset.org > > $apache$spark$sql$Dataset$$collect(Dataset.scala:2384) > at > org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2120) > at > org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2119) > at org.apache.spark.sql.Dataset.withTypedCallback(Dataset.scala:2810) > at org.apache.spark.sql.Dataset.head(Dataset.scala:2119) > at org.apache.spark.sql.Dataset.take(Dataset.scala:2334) > at org.apache.spark.sql.Dataset.showString(Dataset.scala:248) > at org.apache.spark.sql.Dataset.show(Dataset.scala:640) > ... 50 elided" > > > > yixu2001 -- Sent from: http://apache-carbondata-dev-mailing-list-archive.1130556.n5.nabble.com/ |
Free forum by Nabble | Edit this page |