I'm using carbondata1.3+spark2.1.1+hadoop2.7.1 to do multi update operations
here is the replay step: import org.apache.spark.sql.SparkSession import org.apache.spark.sql.CarbonSession._ val cc = SparkSession.builder().config(sc.getConf).getOrCreateCarbonSession("hdfs://ns1/user/ip_crm") // create table cc.sql("CREATE TABLE IF NOT EXISTS public.c_compact3 (id string,qqnum string,nick string,age string,gender string,auth string,qunnum string,mvcc string) STORED BY 'carbondata' TBLPROPERTIES ('SORT_COLUMNS'='id')").show; // data prepare import org.apache.spark.sql.types._ import org.apache.spark.sql.Row val schema = StructType(StructField("id",StringType,true)::StructField("qqnum",StringType,true)::StructField("nick",StringType,true)::StructField("age",StringType,true)::StructField("gender",StringType,true)::StructField("auth",StringType,true)::StructField("qunnum",StringType,true)::StructField("mvcc",IntegerType,true)::Nil) val data = cc.sparkContext.parallelize(1 to 50000000,4).map { i => Row.fromSeq(Seq(i.toString,i.toString.concat("aaaaaaaa").concat(i.toString),"2009-05-27",i.toString.concat("c").concat(i.toString),"1","1",i.toString.concat("dddddd").concat(i.toString),1)) } cc.createDataFrame(data, schema).createOrReplaceTempView("ddd") cc.sql("insert into public.c_compact3 select * from ddd").show; // update table multi times in while loop import scala.util.Random var bcnum=1; while (true) { bcnum=1+bcnum; println(bcnum); println("111111111"); var randomNmber = Random.nextInt(1000) cc.sql(s"DROP TABLE IF EXISTS cache_compact3").show; cc.sql(s"cache table cache_compact3 as select * from public.c_compact3 where pmod(cast(id as int),1000)=$randomNmber").show(100, false); cc.sql("select count(*) from cache_compact3").show; cc.sql("update public.c_compact3 a set (a.id,a.qqnum,a.nick,a.age,a.gender,a.auth,a.qunnum,a.mvcc)=(select b.id,b.qqnum,b.nick,b.age,b.gender,b.auth,b.qunnum,b.mvcc from cache_compact3 b where b.id=a.id)").show; println("222222222"); Thread.sleep(30000); } after about 30 loop,[Problem in loading segment blocks] error happended. then performing select count operations on the table and get exception like follows: scala>cc.sql("select count(*) from public.c_compact3").show; 18/02/25 08:49:46 AUDIT CarbonMetaStoreFactory: [hdd340][ip_crm][Thread-1]File based carbon metastore is enabled Exchange SinglePartition +- *HashAggregate(keys=[], functions=[partial_count(1)], output=[count#33L]) +- *BatchedScan CarbonDatasourceHadoopRelation [ Database name :public, Table name :c_compact3, Schema :Some(StructType(StructField(id,StringType,true), StructField(qqnum,StringType,true), StructField(nick,StringType,true), StructField(age,StringType,true), StructField(gender,StringType,true), StructField(auth,StringType,true), StructField(qunnum,StringType,true), StructField(mvcc,StringType,true))) ] public.c_compact3[] at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56) at org.apache.spark.sql.execution.exchange.ShuffleExchange.doExecute(ShuffleExchange.scala:112) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113) at org.apache.spark.sql.execution.InputAdapter.inputRDDs(WholeStageCodegenExec.scala:235) at org.apache.spark.sql.execution.aggregate.HashAggregateExec.inputRDDs(HashAggregateExec.scala:141) at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:368) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113) at org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:225) at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:308) at org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:38) at org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$Dataset$$execute$1$1.apply(Dataset.scala:2386) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:57) at org.apache.spark.sql.Dataset.withNewExecutionId(Dataset.scala:2788) at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$execute$1(Dataset.scala:2385) at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collect(Dataset.scala:2392) at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2128) at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2127) at org.apache.spark.sql.Dataset.withTypedCallback(Dataset.scala:2818) at org.apache.spark.sql.Dataset.head(Dataset.scala:2127) at org.apache.spark.sql.Dataset.take(Dataset.scala:2342) at org.apache.spark.sql.Dataset.showString(Dataset.scala:248) at org.apache.spark.sql.Dataset.show(Dataset.scala:638) at org.apache.spark.sql.Dataset.show(Dataset.scala:597) at org.apache.spark.sql.Dataset.show(Dataset.scala:606) ... 50 elided Caused by: java.io.IOException: Problem in loading segment blocks. at org.apache.carbondata.core.indexstore.BlockletDataMapIndexStore.getAll(BlockletDataMapIndexStore.java:153) at org.apache.carbondata.core.indexstore.blockletindex.BlockletDataMapFactory.getDataMaps(BlockletDataMapFactory.java:76) at org.apache.carbondata.core.datamap.TableDataMap.prune(TableDataMap.java:72) at org.apache.carbondata.hadoop.api.CarbonTableInputFormat.getDataBlocksOfSegment(CarbonTableInputFormat.java:739) at org.apache.carbondata.hadoop.api.CarbonTableInputFormat.getSplits(CarbonTableInputFormat.java:666) at org.apache.carbondata.hadoop.api.CarbonTableInputFormat.getSplits(CarbonTableInputFormat.java:426) at org.apache.carbondata.spark.rdd.CarbonScanRDD.getPartitions(CarbonScanRDD.scala:96) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) at scala.Option.getOrElse(Option.scala:121) at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) at scala.Option.getOrElse(Option.scala:121) at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) at scala.Option.getOrElse(Option.scala:121) at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) at org.apache.spark.ShuffleDependency.<init>(Dependency.scala:91) at org.apache.spark.sql.execution.exchange.ShuffleExchange$.prepareShuffleDependency(ShuffleExchange.scala:261) at org.apache.spark.sql.execution.exchange.ShuffleExchange.prepareShuffleDependency(ShuffleExchange.scala:84) at org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$doExecute$1.apply(ShuffleExchange.scala:121) at org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$doExecute$1.apply(ShuffleExchange.scala:112) at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52) ... 83 more Caused by: java.lang.ArrayIndexOutOfBoundsException: 0 at org.apache.carbondata.core.datastore.filesystem.AbstractDFSCarbonFile.getLocations(AbstractDFSCarbonFile.java:514) at org.apache.carbondata.core.indexstore.BlockletDataMapIndexStore.getAll(BlockletDataMapIndexStore.java:142) ... 109 more |
Hi,
I tried to reproduce the issue but it is running fine. Are you running this script in a cluster and any special configuration you have set in carbon.properties? The script almost ran 200 times but no problem was observed. On Sun, Feb 25, 2018 at 1:59 PM, 杨义 <[hidden email]> wrote: > I'm using carbondata1.3+spark2.1.1+hadoop2.7.1 to do multi update > operations > here is the replay step: > > import org.apache.spark.sql.SparkSession > import org.apache.spark.sql.CarbonSession._ > val cc = SparkSession.builder().config(sc.getConf). > getOrCreateCarbonSession("hdfs://ns1/user/ip_crm") > // create table > cc.sql("CREATE TABLE IF NOT EXISTS public.c_compact3 (id string,qqnum > string,nick string,age string,gender string,auth string,qunnum string,mvcc > string) STORED BY 'carbondata' TBLPROPERTIES ('SORT_COLUMNS'='id')").show; > // data prepare > import org.apache.spark.sql.types._ > import org.apache.spark.sql.Row > val schema = StructType(StructField("id",StringType,true)::StructField( > "qqnum",StringType,true)::StructField("nick",StringType, > true)::StructField("age",StringType,true)::StructField( > "gender",StringType,true)::StructField("auth",StringType, > true)::StructField("qunnum",StringType,true)::StructField( > "mvcc",IntegerType,true)::Nil) > val data = cc.sparkContext.parallelize(1 to 50000000,4).map { i => > Row.fromSeq(Seq(i.toString,i.toString.concat("aaaaaaaa"). > concat(i.toString),"2009-05-27",i.toString.concat("c"). > concat(i.toString),"1","1",i.toString.concat("dddddd"). > concat(i.toString),1)) > } > cc.createDataFrame(data, schema).createOrReplaceTempView("ddd") > cc.sql("insert into public.c_compact3 select * from ddd").show; > > // update table multi times in while loop > import scala.util.Random > var bcnum=1; > while (true) { > bcnum=1+bcnum; > println(bcnum); > println("111111111"); > var randomNmber = Random.nextInt(1000) > cc.sql(s"DROP TABLE IF EXISTS cache_compact3").show; > cc.sql(s"cache table cache_compact3 as select * from > public.c_compact3 where pmod(cast(id as int),1000)=$randomNmber").show(100, > false); > cc.sql("select count(*) from cache_compact3").show; > cc.sql("update public.c_compact3 a set (a.id > ,a.qqnum,a.nick,a.age,a.gender,a.auth,a.qunnum,a.mvcc)=(select b.id > ,b.qqnum,b.nick,b.age,b.gender,b.auth,b.qunnum,b.mvcc from > cache_compact3 b where b.id=a.id)").show; > println("222222222"); > Thread.sleep(30000); > } > > after about 30 loop,[Problem in loading segment blocks] error happended. > then performing select count operations on the table and get exception > like follows: > > scala>cc.sql("select count(*) from public.c_compact3").show; > 18/02/25 08:49:46 AUDIT CarbonMetaStoreFactory: > [hdd340][ip_crm][Thread-1]File based carbon metastore is enabled > Exchange SinglePartition > +- *HashAggregate(keys=[], functions=[partial_count(1)], > output=[count#33L]) > +- *BatchedScan CarbonDatasourceHadoopRelation [ Database name :public, > Table name :c_compact3, Schema :Some(StructType(StructField(id,StringType,true), > StructField(qqnum,StringType,true), StructField(nick,StringType,true), > StructField(age,StringType,true), StructField(gender,StringType,true), > StructField(auth,StringType,true), StructField(qunnum,StringType,true), > StructField(mvcc,StringType,true))) ] public.c_compact3[] > > at org.apache.spark.sql.catalyst.errors.package$.attachTree( > package.scala:56) > at org.apache.spark.sql.execution.exchange.ShuffleExchange.doExecute( > ShuffleExchange.scala:112) > at org.apache.spark.sql.execution.SparkPlan$$anonfun$ > execute$1.apply(SparkPlan.scala:114) > at org.apache.spark.sql.execution.SparkPlan$$anonfun$ > execute$1.apply(SparkPlan.scala:114) > at org.apache.spark.sql.execution.SparkPlan$$anonfun$ > executeQuery$1.apply(SparkPlan.scala:135) > at org.apache.spark.rdd.RDDOperationScope$.withScope( > RDDOperationScope.scala:151) > at org.apache.spark.sql.execution.SparkPlan. > executeQuery(SparkPlan.scala:132) > at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113) > at org.apache.spark.sql.execution.InputAdapter.inputRDDs( > WholeStageCodegenExec.scala:235) > at org.apache.spark.sql.execution.aggregate.HashAggregateExec.inputRDDs( > HashAggregateExec.scala:141) > at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute( > WholeStageCodegenExec.scala:368) > at org.apache.spark.sql.execution.SparkPlan$$anonfun$ > execute$1.apply(SparkPlan.scala:114) > at org.apache.spark.sql.execution.SparkPlan$$anonfun$ > execute$1.apply(SparkPlan.scala:114) > at org.apache.spark.sql.execution.SparkPlan$$anonfun$ > executeQuery$1.apply(SparkPlan.scala:135) > at org.apache.spark.rdd.RDDOperationScope$.withScope( > RDDOperationScope.scala:151) > at org.apache.spark.sql.execution.SparkPlan. > executeQuery(SparkPlan.scala:132) > at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113) > at org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan. > scala:225) > at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala: > 308) > at org.apache.spark.sql.execution.CollectLimitExec. > executeCollect(limit.scala:38) > at org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$ > Dataset$$execute$1$1.apply(Dataset.scala:2386) > at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId( > SQLExecution.scala:57) > at org.apache.spark.sql.Dataset.withNewExecutionId(Dataset.scala:2788) > at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$ > execute$1(Dataset.scala:2385) > at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$ > collect(Dataset.scala:2392) > at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset. > scala:2128) > at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset. > scala:2127) > at org.apache.spark.sql.Dataset.withTypedCallback(Dataset.scala:2818) > at org.apache.spark.sql.Dataset.head(Dataset.scala:2127) > at org.apache.spark.sql.Dataset.take(Dataset.scala:2342) > at org.apache.spark.sql.Dataset.showString(Dataset.scala:248) > at org.apache.spark.sql.Dataset.show(Dataset.scala:638) > at org.apache.spark.sql.Dataset.show(Dataset.scala:597) > at org.apache.spark.sql.Dataset.show(Dataset.scala:606) > ... 50 elided > Caused by: java.io.IOException: Problem in loading segment blocks. > at org.apache.carbondata.core.indexstore.BlockletDataMapIndexStore. > getAll(BlockletDataMapIndexStore.java:153) > at org.apache.carbondata.core.indexstore.blockletindex. > BlockletDataMapFactory.getDataMaps(BlockletDataMapFactory.java:76) > at org.apache.carbondata.core.datamap.TableDataMap.prune( > TableDataMap.java:72) > at org.apache.carbondata.hadoop.api.CarbonTableInputFormat. > getDataBlocksOfSegment(CarbonTableInputFormat.java:739) > at org.apache.carbondata.hadoop.api.CarbonTableInputFormat.getSplits( > CarbonTableInputFormat.java:666) > at org.apache.carbondata.hadoop.api.CarbonTableInputFormat.getSplits( > CarbonTableInputFormat.java:426) > at org.apache.carbondata.spark.rdd.CarbonScanRDD. > getPartitions(CarbonScanRDD.scala:96) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at org.apache.spark.rdd.MapPartitionsRDD.getPartitions( > MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at org.apache.spark.rdd.MapPartitionsRDD.getPartitions( > MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at org.apache.spark.ShuffleDependency.<init>(Dependency.scala:91) > at org.apache.spark.sql.execution.exchange.ShuffleExchange$. > prepareShuffleDependency(ShuffleExchange.scala:261) > at org.apache.spark.sql.execution.exchange.ShuffleExchange. > prepareShuffleDependency(ShuffleExchange.scala:84) > at org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$ > doExecute$1.apply(ShuffleExchange.scala:121) > at org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$ > doExecute$1.apply(ShuffleExchange.scala:112) > at org.apache.spark.sql.catalyst.errors.package$.attachTree( > package.scala:52) > ... 83 more > Caused by: java.lang.ArrayIndexOutOfBoundsException: 0 > at org.apache.carbondata.core.datastore.filesystem. > AbstractDFSCarbonFile.getLocations(AbstractDFSCarbonFile.java:514) > at org.apache.carbondata.core.indexstore.BlockletDataMapIndexStore. > getAll(BlockletDataMapIndexStore.java:142) > ... 109 more -- Thanks Sounak |
dev
1.this script is running after yarn-cluster mode 2.no special configuration in carbon.properties,therefore using default configuration 3.filter data location and find a deletedelta file which is of size 0. hdfs dfs -du -h /user/ip_crm/public/c_compact1/*/*/*/*.deletedelta |grep "0 /" 0 /user/ip_crm/public/c_compact1/Fact/Part0/Segment_1/part-0-0_batchno0-0-1519639964744.deletedelta 4.delete this deletedelta file,table can select,but get "Multiple input rows matched for same row." when doing update operation. 5.following is shell contents : /usr/lib/spark-2.1.1-bin-hadoop2.7/bin/spark-shell \ --driver-memory 3g \ --executor-memory 3g \ --executor-cores 1 \ --jars carbondata_2.11-1.3.0-shade-hadoop2.7.2.jar \ --driver-class-path /home/ip_crm/testdata/ojdbc14.jar \ --queue ip_crm \ --master yarn \ --deploy-mode client \ --keytab /etc/security/keytabs/ip_crm.keytab \ --principal ip_crm \ --files /usr/hdp/2.4.0.0-169/hadoop/conf/hdfs-site.xml \ --conf "spark.driver.extraJavaOptions=-server -XX:+AggressiveOpts -XX:MaxMetaspaceSize=256m -XX:CompressedClassSpaceSize=512m -XX:+AlwaysPreTouch -XX:+UseG1GC -XX:+ScavengeBeforeFullGC -Djava.net.preferIPv4Stack=true -Xss16m -Dhdp.version=2.4.0.0-169 -Dcarbon.properties.filepath=/home/ip_crm/testdata/carbon.conf" \ --conf "spark.executor.extraJavaOptions=-server -XX:+AggressiveOpts -XX:MaxMetaspaceSize=256m -XX:CompressedClassSpaceSize=512m -XX:+AlwaysPreTouch -XX:+UseG1GC -XX:+ScavengeBeforeFullGC -Djava.net.preferIPv4Stack=true -Xss16m -Dhdp.version=2.4.0.0-169 -Dcarbon.properties.filepath=/home/ip_crm/testdata/carbon.conf" \ --conf "spark.dynamicAllocation.enabled=true" \ --conf "spark.network.timeout=300" \ --conf "spark.sql.shuffle.partitions=200" \ --conf "spark.default.parallelism=200" \ --conf "spark.serializer=org.apache.spark.serializer.KryoSerializer" \ --conf "spark.kryo.referenceTracking=false" \ --conf "spark.kryoserializer.buffer.max=1g" \ --conf "spark.debug.maxToStringFields=1000" \ --conf "spark.dynamicAllocation.executorIdleTimeout=30" \ --conf "spark.dynamicAllocation.maxExecutors=30" \ --conf "spark.dynamicAllocation.minExecutors=1" \ --conf "spark.dynamicAllocation.sustainedSchedulerBacklogTimeout=1s" \ --conf "spark.yarn.executor.memoryOverhead=2048" \ --conf "spark.yarn.driver.memoryOverhead=1024" \ --conf "spark.speculation=true" \ --conf "spark.sql.warehouse.dir=/apps/hive/warehouse" \ --conf "spark.rpc.askTimeout=300" \ --conf "spark.locality.wait=0" yixu2001 From: sounak Date: 2018-02-26 17:04 To: dev Subject: Re: Getting [Problem in loading segment blocks] error after doing multi update operations Hi, I tried to reproduce the issue but it is running fine. Are you running this script in a cluster and any special configuration you have set in carbon.properties? The script almost ran 200 times but no problem was observed. On Sun, Feb 25, 2018 at 1:59 PM, 杨义 <[hidden email]> wrote: > I'm using carbondata1.3+spark2.1.1+hadoop2.7.1 to do multi update > operations > here is the replay step: > > import org.apache.spark.sql.SparkSession > import org.apache.spark.sql.CarbonSession._ > val cc = SparkSession.builder().config(sc.getConf). > getOrCreateCarbonSession("hdfs://ns1/user/ip_crm") > // create table > cc.sql("CREATE TABLE IF NOT EXISTS public.c_compact3 (id string,qqnum > string,nick string,age string,gender string,auth string,qunnum string,mvcc > string) STORED BY 'carbondata' TBLPROPERTIES ('SORT_COLUMNS'='id')").show; > // data prepare > import org.apache.spark.sql.types._ > import org.apache.spark.sql.Row > val schema = StructType(StructField("id",StringType,true)::StructField( > "qqnum",StringType,true)::StructField("nick",StringType, > true)::StructField("age",StringType,true)::StructField( > "gender",StringType,true)::StructField("auth",StringType, > true)::StructField("qunnum",StringType,true)::StructField( > "mvcc",IntegerType,true)::Nil) > val data = cc.sparkContext.parallelize(1 to 50000000,4).map { i => > Row.fromSeq(Seq(i.toString,i.toString.concat("aaaaaaaa"). > concat(i.toString),"2009-05-27",i.toString.concat("c"). > concat(i.toString),"1","1",i.toString.concat("dddddd"). > concat(i.toString),1)) > } > cc.createDataFrame(data, schema).createOrReplaceTempView("ddd") > cc.sql("insert into public.c_compact3 select * from ddd").show; > > // update table multi times in while loop > import scala.util.Random > var bcnum=1; > while (true) { > bcnum=1+bcnum; > println(bcnum); > println("111111111"); > var randomNmber = Random.nextInt(1000) > cc.sql(s"DROP TABLE IF EXISTS cache_compact3").show; > cc.sql(s"cache table cache_compact3 as select * from > public.c_compact3 where pmod(cast(id as int),1000)=$randomNmber").show(100, > false); > cc.sql("select count(*) from cache_compact3").show; > cc.sql("update public.c_compact3 a set (a.id > ,a.qqnum,a.nick,a.age,a.gender,a.auth,a.qunnum,a.mvcc)=(select b.id > ,b.qqnum,b.nick,b.age,b.gender,b.auth,b.qunnum,b.mvcc from > cache_compact3 b where b.id=a.id)").show; > println("222222222"); > Thread.sleep(30000); > } > > after about 30 loop,[Problem in loading segment blocks] error happended. > then performing select count operations on the table and get exception > like follows: > > scala>cc.sql("select count(*) from public.c_compact3").show; > 18/02/25 08:49:46 AUDIT CarbonMetaStoreFactory: > [hdd340][ip_crm][Thread-1]File based carbon metastore is enabled > Exchange SinglePartition > +- *HashAggregate(keys=[], functions=[partial_count(1)], > output=[count#33L]) > +- *BatchedScan CarbonDatasourceHadoopRelation [ Database name :public, > Table name :c_compact3, Schema :Some(StructType(StructField(id,StringType,true), > StructField(qqnum,StringType,true), StructField(nick,StringType,true), > StructField(age,StringType,true), StructField(gender,StringType,true), > StructField(auth,StringType,true), StructField(qunnum,StringType,true), > StructField(mvcc,StringType,true))) ] public.c_compact3[] > > at org.apache.spark.sql.catalyst.errors.package$.attachTree( > package.scala:56) > at org.apache.spark.sql.execution.exchange.ShuffleExchange.doExecute( > ShuffleExchange.scala:112) > at org.apache.spark.sql.execution.SparkPlan$$anonfun$ > execute$1.apply(SparkPlan.scala:114) > at org.apache.spark.sql.execution.SparkPlan$$anonfun$ > execute$1.apply(SparkPlan.scala:114) > at org.apache.spark.sql.execution.SparkPlan$$anonfun$ > executeQuery$1.apply(SparkPlan.scala:135) > at org.apache.spark.rdd.RDDOperationScope$.withScope( > RDDOperationScope.scala:151) > at org.apache.spark.sql.execution.SparkPlan. > executeQuery(SparkPlan.scala:132) > at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113) > at org.apache.spark.sql.execution.InputAdapter.inputRDDs( > WholeStageCodegenExec.scala:235) > at org.apache.spark.sql.execution.aggregate.HashAggregateExec.inputRDDs( > HashAggregateExec.scala:141) > at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute( > WholeStageCodegenExec.scala:368) > at org.apache.spark.sql.execution.SparkPlan$$anonfun$ > execute$1.apply(SparkPlan.scala:114) > at org.apache.spark.sql.execution.SparkPlan$$anonfun$ > execute$1.apply(SparkPlan.scala:114) > at org.apache.spark.sql.execution.SparkPlan$$anonfun$ > executeQuery$1.apply(SparkPlan.scala:135) > at org.apache.spark.rdd.RDDOperationScope$.withScope( > RDDOperationScope.scala:151) > at org.apache.spark.sql.execution.SparkPlan. > executeQuery(SparkPlan.scala:132) > at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113) > at org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan. > scala:225) > at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala: > 308) > at org.apache.spark.sql.execution.CollectLimitExec. > executeCollect(limit.scala:38) > at org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$ > Dataset$$execute$1$1.apply(Dataset.scala:2386) > at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId( > SQLExecution.scala:57) > at org.apache.spark.sql.Dataset.withNewExecutionId(Dataset.scala:2788) > at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$ > execute$1(Dataset.scala:2385) > at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$ > collect(Dataset.scala:2392) > at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset. > scala:2128) > at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset. > scala:2127) > at org.apache.spark.sql.Dataset.withTypedCallback(Dataset.scala:2818) > at org.apache.spark.sql.Dataset.head(Dataset.scala:2127) > at org.apache.spark.sql.Dataset.take(Dataset.scala:2342) > at org.apache.spark.sql.Dataset.showString(Dataset.scala:248) > at org.apache.spark.sql.Dataset.show(Dataset.scala:638) > at org.apache.spark.sql.Dataset.show(Dataset.scala:597) > at org.apache.spark.sql.Dataset.show(Dataset.scala:606) > ... 50 elided > Caused by: java.io.IOException: Problem in loading segment blocks. > at org.apache.carbondata.core.indexstore.BlockletDataMapIndexStore. > getAll(BlockletDataMapIndexStore.java:153) > at org.apache.carbondata.core.indexstore.blockletindex. > BlockletDataMapFactory.getDataMaps(BlockletDataMapFactory.java:76) > at org.apache.carbondata.core.datamap.TableDataMap.prune( > TableDataMap.java:72) > at org.apache.carbondata.hadoop.api.CarbonTableInputFormat. > getDataBlocksOfSegment(CarbonTableInputFormat.java:739) > at org.apache.carbondata.hadoop.api.CarbonTableInputFormat.getSplits( > CarbonTableInputFormat.java:666) > at org.apache.carbondata.hadoop.api.CarbonTableInputFormat.getSplits( > CarbonTableInputFormat.java:426) > at org.apache.carbondata.spark.rdd.CarbonScanRDD. > getPartitions(CarbonScanRDD.scala:96) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at org.apache.spark.rdd.MapPartitionsRDD.getPartitions( > MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at org.apache.spark.rdd.MapPartitionsRDD.getPartitions( > MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at org.apache.spark.ShuffleDependency.<init>(Dependency.scala:91) > at org.apache.spark.sql.execution.exchange.ShuffleExchange$. > prepareShuffleDependency(ShuffleExchange.scala:261) > at org.apache.spark.sql.execution.exchange.ShuffleExchange. > prepareShuffleDependency(ShuffleExchange.scala:84) > at org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$ > doExecute$1.apply(ShuffleExchange.scala:121) > at org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$ > doExecute$1.apply(ShuffleExchange.scala:112) > at org.apache.spark.sql.catalyst.errors.package$.attachTree( > package.scala:52) > ... 83 more > Caused by: java.lang.ArrayIndexOutOfBoundsException: 0 > at org.apache.carbondata.core.datastore.filesystem. > AbstractDFSCarbonFile.getLocations(AbstractDFSCarbonFile.java:514) > at org.apache.carbondata.core.indexstore.BlockletDataMapIndexStore. > getAll(BlockletDataMapIndexStore.java:142) > ... 109 more -- Thanks Sounak |
Administrator
|
In reply to this post by yixu2001
Hi
Thanks for your feedback. Let me first reproduce this issue, and check the detail. Regards Liang yixu2001 wrote > I'm using carbondata1.3+spark2.1.1+hadoop2.7.1 to do multi update > operations > here is the replay step: > > import org.apache.spark.sql.SparkSession > import org.apache.spark.sql.CarbonSession._ > val cc = > SparkSession.builder().config(sc.getConf).getOrCreateCarbonSession("hdfs://ns1/user/ip_crm") > // create table > cc.sql("CREATE TABLE IF NOT EXISTS public.c_compact3 (id string,qqnum > string,nick string,age string,gender string,auth string,qunnum string,mvcc > string) STORED BY 'carbondata' TBLPROPERTIES ('SORT_COLUMNS'='id')").show; > // data prepare > import org.apache.spark.sql.types._ > import org.apache.spark.sql.Row > val schema = > StructType(StructField("id",StringType,true)::StructField("qqnum",StringType,true)::StructField("nick",StringType,true)::StructField("age",StringType,true)::StructField("gender",StringType,true)::StructField("auth",StringType,true)::StructField("qunnum",StringType,true)::StructField("mvcc",IntegerType,true)::Nil) > val data = cc.sparkContext.parallelize(1 to 50000000,4).map { i => > Row.fromSeq(Seq(i.toString,i.toString.concat("aaaaaaaa").concat(i.toString),"2009-05-27",i.toString.concat("c").concat(i.toString),"1","1",i.toString.concat("dddddd").concat(i.toString),1)) > } > cc.createDataFrame(data, schema).createOrReplaceTempView("ddd") > cc.sql("insert into public.c_compact3 select * from ddd").show; > > // update table multi times in while loop > import scala.util.Random > var bcnum=1; > while (true) { > bcnum=1+bcnum; > println(bcnum); > println("111111111"); > var randomNmber = Random.nextInt(1000) > cc.sql(s"DROP TABLE IF EXISTS cache_compact3").show; > cc.sql(s"cache table cache_compact3 as select * from > public.c_compact3 where pmod(cast(id as > int),1000)=$randomNmber").show(100, false); > cc.sql("select count(*) from cache_compact3").show; > cc.sql("update public.c_compact3 a set > (a.id,a.qqnum,a.nick,a.age,a.gender,a.auth,a.qunnum,a.mvcc)=(select > b.id,b.qqnum,b.nick,b.age,b.gender,b.auth,b.qunnum,b.mvcc from > cache_compact3 b where b.id=a.id)").show; > println("222222222"); > Thread.sleep(30000); > } > > after about 30 loop,[Problem in loading segment blocks] error happended. > then performing select count operations on the table and get exception > like follows: > > scala>cc.sql("select count(*) from public.c_compact3").show; > 18/02/25 08:49:46 AUDIT CarbonMetaStoreFactory: > [hdd340][ip_crm][Thread-1]File based carbon metastore is enabled > Exchange SinglePartition > +- *HashAggregate(keys=[], functions=[partial_count(1)], > output=[count#33L]) > +- *BatchedScan CarbonDatasourceHadoopRelation [ Database name :public, > Table name :c_compact3, Schema > :Some(StructType(StructField(id,StringType,true), > StructField(qqnum,StringType,true), StructField(nick,StringType,true), > StructField(age,StringType,true), StructField(gender,StringType,true), > StructField(auth,StringType,true), StructField(qunnum,StringType,true), > StructField(mvcc,StringType,true))) ] public.c_compact3[] > > at > org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56) > at > org.apache.spark.sql.execution.exchange.ShuffleExchange.doExecute(ShuffleExchange.scala:112) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132) > at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113) > at > org.apache.spark.sql.execution.InputAdapter.inputRDDs(WholeStageCodegenExec.scala:235) > at > org.apache.spark.sql.execution.aggregate.HashAggregateExec.inputRDDs(HashAggregateExec.scala:141) > at > org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:368) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132) > at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113) > at > org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:225) > at > org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:308) > at > org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:38) > at > org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$Dataset$$execute$1$1.apply(Dataset.scala:2386) > at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:57) > at org.apache.spark.sql.Dataset.withNewExecutionId(Dataset.scala:2788) > at > org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$execute$1(Dataset.scala:2385) > at > org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collect(Dataset.scala:2392) > at > org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2128) > at > org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2127) > at org.apache.spark.sql.Dataset.withTypedCallback(Dataset.scala:2818) > at org.apache.spark.sql.Dataset.head(Dataset.scala:2127) > at org.apache.spark.sql.Dataset.take(Dataset.scala:2342) > at org.apache.spark.sql.Dataset.showString(Dataset.scala:248) > at org.apache.spark.sql.Dataset.show(Dataset.scala:638) > at org.apache.spark.sql.Dataset.show(Dataset.scala:597) > at org.apache.spark.sql.Dataset.show(Dataset.scala:606) > ... 50 elided > Caused by: java.io.IOException: Problem in loading segment blocks. > at > org.apache.carbondata.core.indexstore.BlockletDataMapIndexStore.getAll(BlockletDataMapIndexStore.java:153) > at > org.apache.carbondata.core.indexstore.blockletindex.BlockletDataMapFactory.getDataMaps(BlockletDataMapFactory.java:76) > at > org.apache.carbondata.core.datamap.TableDataMap.prune(TableDataMap.java:72) > at > org.apache.carbondata.hadoop.api.CarbonTableInputFormat.getDataBlocksOfSegment(CarbonTableInputFormat.java:739) > at > org.apache.carbondata.hadoop.api.CarbonTableInputFormat.getSplits(CarbonTableInputFormat.java:666) > at > org.apache.carbondata.hadoop.api.CarbonTableInputFormat.getSplits(CarbonTableInputFormat.java:426) > at > org.apache.carbondata.spark.rdd.CarbonScanRDD.getPartitions(CarbonScanRDD.scala:96) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at org.apache.spark.ShuffleDependency. > <init> > (Dependency.scala:91) > at > org.apache.spark.sql.execution.exchange.ShuffleExchange$.prepareShuffleDependency(ShuffleExchange.scala:261) > at > org.apache.spark.sql.execution.exchange.ShuffleExchange.prepareShuffleDependency(ShuffleExchange.scala:84) > at > org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$doExecute$1.apply(ShuffleExchange.scala:121) > at > org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$doExecute$1.apply(ShuffleExchange.scala:112) > at > org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52) > ... 83 more > Caused by: java.lang.ArrayIndexOutOfBoundsException: 0 > at > org.apache.carbondata.core.datastore.filesystem.AbstractDFSCarbonFile.getLocations(AbstractDFSCarbonFile.java:514) > at > org.apache.carbondata.core.indexstore.BlockletDataMapIndexStore.getAll(BlockletDataMapIndexStore.java:142) > ... 109 more -- Sent from: http://apache-carbondata-dev-mailing-list-archive.1130556.n5.nabble.com/ |
In reply to this post by yixu2001
Hi yixu2001
We have tried same code which is given in the below mail but not able to reproduce the same. <http://apache-carbondata-dev-mailing-list-archive.1130556.n5.nabble.com/file/t203/loop_count.jpg> Please find some of the analysis points 1. As per your Exception ArrayIndexOutOfBoundsException Error is because deleteDelta file is 0 size . blkLocations is empty because FileSystem.getFileBlockLocations has check if file lenth is <=size of file then return empty BlockLocation @Override public String[] getLocations() throws IOException { BlockLocation[] blkLocations; if (fileStatus instanceof LocatedFileStatus) { blkLocations = ((LocatedFileStatus)fileStatus).getBlockLocations(); } else { FileSystem fs = fileStatus.getPath().getFileSystem(FileFactory.getConfiguration()); blkLocations = fs.getFileBlockLocations(fileStatus.getPath(), 0L, fileStatus.getLen()); } return blkLocations[0].getHosts(); } <http://apache-carbondata-dev-mailing-list-archive.1130556.n5.nabble.com/file/t203/FileSystem.jpg> zero byte deletedelta should not be written , i have tried scenario like no records for update/delete but delta file with 0 byte is not written . Can you please confirm below things a. Is any task failure is observed in SparkUI. b. Update Query which resulted 0 byte delta file ( you can add this check in your code ). c. Can we give jar having logs ( print the logs about how many records are got delete /update in each update/delete operation) ? Thanks Babu -- Sent from: http://apache-carbondata-dev-mailing-list-archive.1130556.n5.nabble.com/ |
In reply to this post by Liang Chen
dev
loginfo first of all,writing deletedelta file will create an empty file and then do write,flush,close,and exception during write,flush,close would cause an empty file (refer to :org.apache.carbondata.core.writer.CarbonDeleteDeltaWriterImpl#write(org.apache.carbondata.core.mutate.DeleteDeltaBlockDetails)) 1.as for a and b,we add logs and exception happends during close. WARN DFSClient: DataStreamer Exception org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.hdfs.server.namenode.LeaseExpiredException): No lease on /user/ip_crm/public/offer_prod_inst_rel_cab/Fact/Part0/Segment_0/part-8-4_batchno0-0-1518490201583.deletedelta (inode 1306621743): File does not exist. Holder DFSClient_NONMAPREDUCE_-754557169_117 does not have any open files. at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkLease(FSNamesystem.java:3439) at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.analyzeFileState(FSNamesystem.java:3242) at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getNewBlockTargets(FSNamesystem.java:3080) at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getAdditionalBlock(FSNamesystem.java:3040) at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.addBlock(NameNodeRpcServer.java:789) ... 2.for c,yes you can give us your jar spark 2.1.1+hadoop2.7.2 mail [hidden email] yixu2001 From: Liang Chen Date: 2018-03-20 22:06 To: dev Subject: Re: Getting [Problem in loading segment blocks] error after doing multi update operations Hi Thanks for your feedback. Let me first reproduce this issue, and check the detail. Regards Liang yixu2001 wrote > I'm using carbondata1.3+spark2.1.1+hadoop2.7.1 to do multi update > operations > here is the replay step: > > import org.apache.spark.sql.SparkSession > import org.apache.spark.sql.CarbonSession._ > val cc = > SparkSession.builder().config(sc.getConf).getOrCreateCarbonSession("hdfs://ns1/user/ip_crm") > // create table > cc.sql("CREATE TABLE IF NOT EXISTS public.c_compact3 (id string,qqnum > string,nick string,age string,gender string,auth string,qunnum string,mvcc > string) STORED BY 'carbondata' TBLPROPERTIES ('SORT_COLUMNS'='id')").show; > // data prepare > import org.apache.spark.sql.types._ > import org.apache.spark.sql.Row > val schema = > StructType(StructField("id",StringType,true)::StructField("qqnum",StringType,true)::StructField("nick",StringType,true)::StructField("age",StringType,true)::StructField("gender",StringType,true)::StructField("auth",StringType,true)::StructField("qunnum",StringType,true)::StructField("mvcc",IntegerType,true)::Nil) > val data = cc.sparkContext.parallelize(1 to 50000000,4).map { i => > Row.fromSeq(Seq(i.toString,i.toString.concat("aaaaaaaa").concat(i.toString),"2009-05-27",i.toString.concat("c").concat(i.toString),"1","1",i.toString.concat("dddddd").concat(i.toString),1)) > } > cc.createDataFrame(data, schema).createOrReplaceTempView("ddd") > cc.sql("insert into public.c_compact3 select * from ddd").show; > > // update table multi times in while loop > import scala.util.Random > var bcnum=1; > while (true) { > bcnum=1+bcnum; > println(bcnum); > println("111111111"); > var randomNmber = Random.nextInt(1000) > cc.sql(s"DROP TABLE IF EXISTS cache_compact3").show; > cc.sql(s"cache table cache_compact3 as select * from > public.c_compact3 where pmod(cast(id as > int),1000)=$randomNmber").show(100, false); > cc.sql("select count(*) from cache_compact3").show; > cc.sql("update public.c_compact3 a set > (a.id,a.qqnum,a.nick,a.age,a.gender,a.auth,a.qunnum,a.mvcc)=(select > b.id,b.qqnum,b.nick,b.age,b.gender,b.auth,b.qunnum,b.mvcc from > cache_compact3 b where b.id=a.id)").show; > println("222222222"); > Thread.sleep(30000); > } > > after about 30 loop,[Problem in loading segment blocks] error happended. > then performing select count operations on the table and get exception > like follows: > > scala>cc.sql("select count(*) from public.c_compact3").show; > 18/02/25 08:49:46 AUDIT CarbonMetaStoreFactory: > [hdd340][ip_crm][Thread-1]File based carbon metastore is enabled > Exchange SinglePartition > +- *HashAggregate(keys=[], functions=[partial_count(1)], > output=[count#33L]) > +- *BatchedScan CarbonDatasourceHadoopRelation [ Database name :public, > Table name :c_compact3, Schema > :Some(StructType(StructField(id,StringType,true), > StructField(qqnum,StringType,true), StructField(nick,StringType,true), > StructField(age,StringType,true), StructField(gender,StringType,true), > StructField(auth,StringType,true), StructField(qunnum,StringType,true), > StructField(mvcc,StringType,true))) ] public.c_compact3[] > > at > org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56) > at > org.apache.spark.sql.execution.exchange.ShuffleExchange.doExecute(ShuffleExchange.scala:112) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132) > at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113) > at > org.apache.spark.sql.execution.InputAdapter.inputRDDs(WholeStageCodegenExec.scala:235) > at > org.apache.spark.sql.execution.aggregate.HashAggregateExec.inputRDDs(HashAggregateExec.scala:141) > at > org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:368) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132) > at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113) > at > org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:225) > at > org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:308) > at > org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:38) > at > org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$Dataset$$execute$1$1.apply(Dataset.scala:2386) > at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:57) > at org.apache.spark.sql.Dataset.withNewExecutionId(Dataset.scala:2788) > at > org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$execute$1(Dataset.scala:2385) > at > org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collect(Dataset.scala:2392) > at > org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2128) > at > org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2127) > at org.apache.spark.sql.Dataset.withTypedCallback(Dataset.scala:2818) > at org.apache.spark.sql.Dataset.head(Dataset.scala:2127) > at org.apache.spark.sql.Dataset.take(Dataset.scala:2342) > at org.apache.spark.sql.Dataset.showString(Dataset.scala:248) > at org.apache.spark.sql.Dataset.show(Dataset.scala:638) > at org.apache.spark.sql.Dataset.show(Dataset.scala:597) > at org.apache.spark.sql.Dataset.show(Dataset.scala:606) > ... 50 elided > Caused by: java.io.IOException: Problem in loading segment blocks. > at > org.apache.carbondata.core.indexstore.BlockletDataMapIndexStore.getAll(BlockletDataMapIndexStore.java:153) > at > org.apache.carbondata.core.indexstore.blockletindex.BlockletDataMapFactory.getDataMaps(BlockletDataMapFactory.java:76) > at > org.apache.carbondata.core.datamap.TableDataMap.prune(TableDataMap.java:72) > at > org.apache.carbondata.hadoop.api.CarbonTableInputFormat.getDataBlocksOfSegment(CarbonTableInputFormat.java:739) > at > org.apache.carbondata.hadoop.api.CarbonTableInputFormat.getSplits(CarbonTableInputFormat.java:666) > at > org.apache.carbondata.hadoop.api.CarbonTableInputFormat.getSplits(CarbonTableInputFormat.java:426) > at > org.apache.carbondata.spark.rdd.CarbonScanRDD.getPartitions(CarbonScanRDD.scala:96) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at org.apache.spark.ShuffleDependency. > <init> > (Dependency.scala:91) > at > org.apache.spark.sql.execution.exchange.ShuffleExchange$.prepareShuffleDependency(ShuffleExchange.scala:261) > at > org.apache.spark.sql.execution.exchange.ShuffleExchange.prepareShuffleDependency(ShuffleExchange.scala:84) > at > org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$doExecute$1.apply(ShuffleExchange.scala:121) > at > org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$doExecute$1.apply(ShuffleExchange.scala:112) > at > org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52) > ... 83 more > Caused by: java.lang.ArrayIndexOutOfBoundsException: 0 > at > org.apache.carbondata.core.datastore.filesystem.AbstractDFSCarbonFile.getLocations(AbstractDFSCarbonFile.java:514) > at > org.apache.carbondata.core.indexstore.BlockletDataMapIndexStore.getAll(BlockletDataMapIndexStore.java:142) > ... 109 more -- Sent from: http://apache-carbondata-dev-mailing-list-archive.1130556.n5.nabble.com/ |
hi all
i am able to reproduce same exception in my cluster and got the same exception. (Trace is listed below) ------ scala> carbon.sql("select count(*) from public.c_compact4").show 2018-03-22 20:40:33,105 | WARN | main | main spark.sql.sources.options.keys expected, but read nothing | org.apache.carbondata.common.logging.impl.StandardLogService.logWarnMessage(StandardLogService.java:168) ----------------Store location---- ---- linux-49:/opt/babu # hadoop fs -ls /user/hive/warehouse/carbon.store/public/c_compact4/Fact/Part0/Segment_0/*.deletedelta -rw-rw-r--+ 3 hdfs hive 177216 2018-03-22 18:20 /user/hive/warehouse/carbon.store/public/c_compact4/Fact/Part0/Segment_0/part-0-0_batchno0-0-1521723019528.deletedelta -rw-r--r-- 3 hdfs hive 0 2018-03-22 19:35 /user/hive/warehouse/carbon.store/public/c_compact4/Fact/Part0/Segment_0/part-0-0_batchno0-0-1521723886214.deletedelta -rw-rw-r--+ 3 hdfs hive 87989 2018-03-22 18:20 /user/hive/warehouse/carbon.store/public/c_compact4/Fact/Part0/Segment_0/part-0-1_batchno0-0-1521723019528.deletedelta -rw-r--r-- 3 hdfs hive 0 2018-03-22 19:35 /user/hive/warehouse/carbon.store/public/c_compact4/Fact/Part0/Segment_0/part-0-1_batchno0-0-1521723886214.deletedelta -rw-rw-r--+ 3 hdfs hive 87989 2018-03-22 18:20 /user/hive/warehouse/carbon.store/public/c_compact4/Fact/Part0/Segment_0/part-0-2_batchno0-0-1521723019528.deletedelta -rw-r--r-- 3 hdfs hive 0 2018-03-22 19:35 /user/hive/warehouse/carbon.store/public/c_compact4/Fact/Part0/Segment_0/part-0-2_batchno0-0-1521723886214.deletedelta ----------------------------------------------------------- Below Method /steps are used to reproduce :- Writing content of delete delta is failed but deletedelta file created successfully . Failed during Horizontal Compaction ( added setSpaceQuota in hdfs so that file can created successfully and write to this file is failed) org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree: Exchange SinglePartition +- *HashAggregate(keys=[], functions=[partial_count(1)], output=[count#1443L]) +- *BatchedScan CarbonDatasourceHadoopRelation [ Database name :public, Table name :c_compact4, Schema :Some(StructType(StructField(id,StringType,true), StructField(qqnum,StringType,true), StructField(nick,StringType,true), StructField(age,StringType,true), StructField(gender,StringType,true), StructField(auth,StringType,true), StructField(qunnum,StringType,true), StructField(mvcc,StringType,true))) ] public.c_compact4[] at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56) at org.apache.spark.sql.execution.exchange.ShuffleExchange.doExecute(ShuffleExchange.scala:112) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113) at org.apache.spark.sql.execution.InputAdapter.inputRDDs(WholeStageCodegenExec.scala:235) at org.apache.spark.sql.execution.aggregate.HashAggregateExec.inputRDDs(HashAggregateExec.scala:141) at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:372) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114 at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135 at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113) at org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:225) at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:308) at org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:113) at org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$Dataset$$execute$1$1.apply(Dataset.scala:2386) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:57) at org.apache.spark.sql.Dataset.withNewExecutionId(Dataset.scala:2788) at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$execute$1(Dataset.scala:2385) at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collect(Dataset.scala:2392) at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2128 at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2127) at org.apache.spark.sql.Dataset.withTypedCallback(Dataset.scala:2818) at org.apache.spark.sql.Dataset.head(Dataset.scala:2127) at org.apache.spark.sql.Dataset.take(Dataset.scala:2342) at org.apache.spark.sql.Dataset.showString(Dataset.scala:248) at org.apache.spark.sql.Dataset.show(Dataset.scala:638) at org.apache.spark.sql.Dataset.show(Dataset.scala:597) at org.apache.spark.sql.Dataset.show(Dataset.scala:606) ... 48 elided Caused by: java.io.IOException: Problem in loading segment blocks. at org.apache.carbondata.core.indexstore.BlockletDataMapIndexStore.getAll(BlockletDataMapIndexStore.java:153) at org.apache.carbondata.core.indexstore.blockletindex.BlockletDataMapFactory.getDataMaps(BlockletDataMapFactory.java:76) at org.apache.carbondata.core.datamap.TableDataMap.prune(TableDataMap.java:72) at org.apache.carbondata.hadoop.api.CarbonTableInputFormat.getDataBlocksOfSegment(CarbonTableInputFormat.java:739 at org.apache.carbondata.hadoop.api.CarbonTableInputFormat.getSplits(CarbonTableInputFormat.java:666) at org.apache.carbondata.hadoop.api.CarbonTableInputFormat.getSplits(CarbonTableInputFormat.java:426) at org.apache.carbondata.spark.rdd.CarbonScanRDD.getPartitions(CarbonScanRDD.scala:107) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:253) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:251) at scala.Option.getOrElse(Option.scala:121) at org.apache.spark.rdd.RDD.partitions(RDD.scala:251) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:253) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:251) at scala.Option.getOrElse(Option.scala:121 at org.apache.spark.rdd.RDD.partitions(RDD.scala:251 at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:253) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:251) at scala.Option.getOrElse(Option.scala:121) at org.apache.spark.rdd.RDD.partitions(RDD.scala:251) at org.apache.spark.ShuffleDependency.<init>(Dependency.scala:91) at org.apache.spark.sql.execution.exchange.ShuffleExchange$.prepareShuffleDependency(ShuffleExchange.scala:273) at org.apache.spark.sql.execution.exchange.ShuffleExchange.prepareShuffleDependency(ShuffleExchange.scala:84) at org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$doExecute$1.apply(ShuffleExchange.scala:121) at org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$doExecute$1.apply(ShuffleExchange.scala:112) at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52) ... 81 more Caused by: java.lang.ArrayIndexOutOfBoundsException: 0 at org.apache.carbondata.core.datastore.filesystem.AbstractDFSCarbonFile.getLocations(AbstractDFSCarbonFile.java:509) at org.apache.carbondata.core.indexstore.BlockletDataMapIndexStore.getAll(BlockletDataMapIndexStore.java:142) *Below points to be handled to fix this issue.* 1. When Horizontal Compaction is failed 0 byte delete delta file should be deleted currently it is not deleted. This is a cleaning part of the Horizontal Compaction fail . 2. delete delta of 0 byte should not be considered while reading .( we can further discuss about this solution ) . currently tablestatus file has the entry of deletedelta timestamp for 0 byte also. 3. If deleting is in progress , file is created (name node has entry of file) but data writing is in progress (not yet flush) but at same time select query is triggered ,then Query will failed so this scenario also need to handle. @dev :- Please Let me know if any other detail you want. Thanks Babu -- Sent from: http://apache-carbondata-dev-mailing-list-archive.1130556.n5.nabble.com/ |
In reply to this post by yixu2001
hi all
i am able to reproduce same exception in my cluster and got the same exception. (Trace is listed below) ------ scala> carbon.sql("select count(*) from public.c_compact4").show 2018-03-22 20:40:33,105 | WARN | main | main spark.sql.sources.options.keys expected, but read nothing | org.apache.carbondata.common.logging.impl.StandardLogService.logWarnMessage(StandardLogService.java:168) org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree: Exchange SinglePartition +- *HashAggregate(keys=[], functions=[partial_count(1)], output=[count#1443L]) +- *BatchedScan CarbonDatasourceHadoopRelation [ Database name :public, Table name :c_compact4, Schema :Some(StructType(StructField(id,StringType,true), StructField(qqnum,StringType,true), StructField(nick,StringType,true), StructField(age,StringType,true), StructField(gender,StringType,true), StructField(auth,StringType,true), StructField(qunnum,StringType,true), StructField(mvcc,StringType,true))) ] public.c_compact4[] at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56) at org.apache.spark.sql.execution.exchange.ShuffleExchange.doExecute(ShuffleExchange.scala:112) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113) at org.apache.spark.sql.execution.InputAdapter.inputRDDs(WholeStageCodegenExec.scala:235) at org.apache.spark.sql.execution.aggregate.HashAggregateExec.inputRDDs(HashAggregateExec.scala:141) at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:372) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114 at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135 at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113) at org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:225) at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:308) at org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:113) at org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$Dataset$$execute$1$1.apply(Dataset.scala:2386) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:57) at org.apache.spark.sql.Dataset.withNewExecutionId(Dataset.scala:2788) at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$execute$1(Dataset.scala:2385) at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collect(Dataset.scala:2392) at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2128 at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2127) at org.apache.spark.sql.Dataset.withTypedCallback(Dataset.scala:2818) at org.apache.spark.sql.Dataset.head(Dataset.scala:2127) at org.apache.spark.sql.Dataset.take(Dataset.scala:2342) at org.apache.spark.sql.Dataset.showString(Dataset.scala:248) at org.apache.spark.sql.Dataset.show(Dataset.scala:638) at org.apache.spark.sql.Dataset.show(Dataset.scala:597) at org.apache.spark.sql.Dataset.show(Dataset.scala:606) ... 48 elided Caused by: java.io.IOException: Problem in loading segment blocks. at org.apache.carbondata.core.indexstore.BlockletDataMapIndexStore.getAll(BlockletDataMapIndexStore.java:153) at org.apache.carbondata.core.indexstore.blockletindex.BlockletDataMapFactory.getDataMaps(BlockletDataMapFactory.java:76) at org.apache.carbondata.core.datamap.TableDataMap.prune(TableDataMap.java:72) at org.apache.carbondata.hadoop.api.CarbonTableInputFormat.getDataBlocksOfSegment(CarbonTableInputFormat.java:739 at org.apache.carbondata.hadoop.api.CarbonTableInputFormat.getSplits(CarbonTableInputFormat.java:666) at org.apache.carbondata.hadoop.api.CarbonTableInputFormat.getSplits(CarbonTableInputFormat.java:426) at org.apache.carbondata.spark.rdd.CarbonScanRDD.getPartitions(CarbonScanRDD.scala:107) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:253) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:251) at scala.Option.getOrElse(Option.scala:121) at org.apache.spark.rdd.RDD.partitions(RDD.scala:251) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:253) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:251) at scala.Option.getOrElse(Option.scala:121 at org.apache.spark.rdd.RDD.partitions(RDD.scala:251 at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:253) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:251) at scala.Option.getOrElse(Option.scala:121) at org.apache.spark.rdd.RDD.partitions(RDD.scala:251) at org.apache.spark.ShuffleDependency.<init>(Dependency.scala:91) at org.apache.spark.sql.execution.exchange.ShuffleExchange$.prepareShuffleDependency(ShuffleExchange.scala:273) at org.apache.spark.sql.execution.exchange.ShuffleExchange.prepareShuffleDependency(ShuffleExchange.scala:84) at org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$doExecute$1.apply(ShuffleExchange.scala:121) at org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$doExecute$1.apply(ShuffleExchange.scala:112) at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52) ... 81 more Caused by: java.lang.ArrayIndexOutOfBoundsException: 0 at org.apache.carbondata.core.datastore.filesystem.AbstractDFSCarbonFile.getLocations(AbstractDFSCarbonFile.java:509) at org.apache.carbondata.core.indexstore.BlockletDataMapIndexStore.getAll(BlockletDataMapIndexStore.java:142) ----------------Store location---- ---- linux-49:/opt/babu # hadoop fs -ls /user/hive/warehouse/carbon.store/public/c_compact4/Fact/Part0/Segment_0/*.deletedelta -rw-rw-r--+ 3 hdfs hive 177216 2018-03-22 18:20 /user/hive/warehouse/carbon.store/public/c_compact4/Fact/Part0/Segment_0/part-0-0_batchno0-0-1521723019528.deletedelta -rw-r--r-- 3 hdfs hive 0 2018-03-22 19:35 /user/hive/warehouse/carbon.store/public/c_compact4/Fact/Part0/Segment_0/part-0-0_batchno0-0-1521723886214.deletedelta -rw-rw-r--+ 3 hdfs hive 87989 2018-03-22 18:20 /user/hive/warehouse/carbon.store/public/c_compact4/Fact/Part0/Segment_0/part-0-1_batchno0-0-1521723019528.deletedelta -rw-r--r-- 3 hdfs hive 0 2018-03-22 19:35 /user/hive/warehouse/carbon.store/public/c_compact4/Fact/Part0/Segment_0/part-0-1_batchno0-0-1521723886214.deletedelta -rw-rw-r--+ 3 hdfs hive 87989 2018-03-22 18:20 /user/hive/warehouse/carbon.store/public/c_compact4/Fact/Part0/Segment_0/part-0-2_batchno0-0-1521723019528.deletedelta -rw-r--r-- 3 hdfs hive 0 2018-03-22 19:35 /user/hive/warehouse/carbon.store/public/c_compact4/Fact/Part0/Segment_0/part-0-2_batchno0-0-1521723886214.deletedelta ----------------------------------------------------------- Issue reproduced technique :- Writing content of delete delta is failed but deletedelta file created successfully . Failed during Horizontal Compaction ( added setSpaceQuota in hdfs so that file can created successfully and write to this file is failed) *Below points to be handled to fix this issue.* 1. When Horizontal Compaction is failed 0 byte delete delta file should be deleted currently it is not deleted. This is a cleaning part of the Horizontal Compaction fail . 2. delete delta of 0 byte should not be considered while reading .( we can further discuss about this solution ) . currently tablestatus file has the entry of deletedelta timestamp. 3. If deleting is in progress , file is created (name node has entry of file) but data writing is in progress (not yet flush) but at same time select query is triggered ,then Query will failed so this scenario also need to handle. @dev :- Please Let me know if any other detail is needed. Thanks Babu -- Sent from: http://apache-carbondata-dev-mailing-list-archive.1130556.n5.nabble.com/ |
dev
This issue has caused great trouble for our production. I will appreciate if you have any plan to fix it and let me know. yixu2001 From: BabuLal Date: 2018-03-23 00:20 To: dev Subject: Re: Getting [Problem in loading segment blocks] error after doing multi update operations hi all i am able to reproduce same exception in my cluster and got the same exception. (Trace is listed below) ------ scala> carbon.sql("select count(*) from public.c_compact4").show 2018-03-22 20:40:33,105 | WARN | main | main spark.sql.sources.options.keys expected, but read nothing | org.apache.carbondata.common.logging.impl.StandardLogService.logWarnMessage(StandardLogService.java:168) org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree: Exchange SinglePartition +- *HashAggregate(keys=[], functions=[partial_count(1)], output=[count#1443L]) +- *BatchedScan CarbonDatasourceHadoopRelation [ Database name :public, Table name :c_compact4, Schema :Some(StructType(StructField(id,StringType,true), StructField(qqnum,StringType,true), StructField(nick,StringType,true), StructField(age,StringType,true), StructField(gender,StringType,true), StructField(auth,StringType,true), StructField(qunnum,StringType,true), StructField(mvcc,StringType,true))) ] public.c_compact4[] at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56) at org.apache.spark.sql.execution.exchange.ShuffleExchange.doExecute(ShuffleExchange.scala:112) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113) at org.apache.spark.sql.execution.InputAdapter.inputRDDs(WholeStageCodegenExec.scala:235) at org.apache.spark.sql.execution.aggregate.HashAggregateExec.inputRDDs(HashAggregateExec.scala:141) at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:372) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114 at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135 at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113) at org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:225) at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:308) at org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:113) at org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$Dataset$$execute$1$1.apply(Dataset.scala:2386) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:57) at org.apache.spark.sql.Dataset.withNewExecutionId(Dataset.scala:2788) at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$execute$1(Dataset.scala:2385) at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collect(Dataset.scala:2392) at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2128 at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2127) at org.apache.spark.sql.Dataset.withTypedCallback(Dataset.scala:2818) at org.apache.spark.sql.Dataset.head(Dataset.scala:2127) at org.apache.spark.sql.Dataset.take(Dataset.scala:2342) at org.apache.spark.sql.Dataset.showString(Dataset.scala:248) at org.apache.spark.sql.Dataset.show(Dataset.scala:638) at org.apache.spark.sql.Dataset.show(Dataset.scala:597) at org.apache.spark.sql.Dataset.show(Dataset.scala:606) ... 48 elided Caused by: java.io.IOException: Problem in loading segment blocks. at org.apache.carbondata.core.indexstore.BlockletDataMapIndexStore.getAll(BlockletDataMapIndexStore.java:153) at org.apache.carbondata.core.indexstore.blockletindex.BlockletDataMapFactory.getDataMaps(BlockletDataMapFactory.java:76) at org.apache.carbondata.core.datamap.TableDataMap.prune(TableDataMap.java:72) at org.apache.carbondata.hadoop.api.CarbonTableInputFormat.getDataBlocksOfSegment(CarbonTableInputFormat.java:739 at org.apache.carbondata.hadoop.api.CarbonTableInputFormat.getSplits(CarbonTableInputFormat.java:666) at org.apache.carbondata.hadoop.api.CarbonTableInputFormat.getSplits(CarbonTableInputFormat.java:426) at org.apache.carbondata.spark.rdd.CarbonScanRDD.getPartitions(CarbonScanRDD.scala:107) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:253) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:251) at scala.Option.getOrElse(Option.scala:121) at org.apache.spark.rdd.RDD.partitions(RDD.scala:251) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:253) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:251) at scala.Option.getOrElse(Option.scala:121 at org.apache.spark.rdd.RDD.partitions(RDD.scala:251 at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:253) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:251) at scala.Option.getOrElse(Option.scala:121) at org.apache.spark.rdd.RDD.partitions(RDD.scala:251) at org.apache.spark.ShuffleDependency.<init>(Dependency.scala:91) at org.apache.spark.sql.execution.exchange.ShuffleExchange$.prepareShuffleDependency(ShuffleExchange.scala:273) at org.apache.spark.sql.execution.exchange.ShuffleExchange.prepareShuffleDependency(ShuffleExchange.scala:84) at org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$doExecute$1.apply(ShuffleExchange.scala:121) at org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$doExecute$1.apply(ShuffleExchange.scala:112) at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52) ... 81 more Caused by: java.lang.ArrayIndexOutOfBoundsException: 0 at org.apache.carbondata.core.datastore.filesystem.AbstractDFSCarbonFile.getLocations(AbstractDFSCarbonFile.java:509) at org.apache.carbondata.core.indexstore.BlockletDataMapIndexStore.getAll(BlockletDataMapIndexStore.java:142) ----------------Store location---- ---- linux-49:/opt/babu # hadoop fs -ls /user/hive/warehouse/carbon.store/public/c_compact4/Fact/Part0/Segment_0/*.deletedelta -rw-rw-r--+ 3 hdfs hive 177216 2018-03-22 18:20 /user/hive/warehouse/carbon.store/public/c_compact4/Fact/Part0/Segment_0/part-0-0_batchno0-0-1521723019528.deletedelta -rw-r--r-- 3 hdfs hive 0 2018-03-22 19:35 /user/hive/warehouse/carbon.store/public/c_compact4/Fact/Part0/Segment_0/part-0-0_batchno0-0-1521723886214.deletedelta -rw-rw-r--+ 3 hdfs hive 87989 2018-03-22 18:20 /user/hive/warehouse/carbon.store/public/c_compact4/Fact/Part0/Segment_0/part-0-1_batchno0-0-1521723019528.deletedelta -rw-r--r-- 3 hdfs hive 0 2018-03-22 19:35 /user/hive/warehouse/carbon.store/public/c_compact4/Fact/Part0/Segment_0/part-0-1_batchno0-0-1521723886214.deletedelta -rw-rw-r--+ 3 hdfs hive 87989 2018-03-22 18:20 /user/hive/warehouse/carbon.store/public/c_compact4/Fact/Part0/Segment_0/part-0-2_batchno0-0-1521723019528.deletedelta -rw-r--r-- 3 hdfs hive 0 2018-03-22 19:35 /user/hive/warehouse/carbon.store/public/c_compact4/Fact/Part0/Segment_0/part-0-2_batchno0-0-1521723886214.deletedelta ----------------------------------------------------------- Issue reproduced technique :- Writing content of delete delta is failed but deletedelta file created successfully . Failed during Horizontal Compaction ( added setSpaceQuota in hdfs so that file can created successfully and write to this file is failed) *Below points to be handled to fix this issue.* 1. When Horizontal Compaction is failed 0 byte delete delta file should be deleted currently it is not deleted. This is a cleaning part of the Horizontal Compaction fail . 2. delete delta of 0 byte should not be considered while reading .( we can further discuss about this solution ) . currently tablestatus file has the entry of deletedelta timestamp. 3. If deleting is in progress , file is created (name node has entry of file) but data writing is in progress (not yet flush) but at same time select query is triggered ,then Query will failed so this scenario also need to handle. @dev :- Please Let me know if any other detail is needed. Thanks Babu -- Sent from: http://apache-carbondata-dev-mailing-list-archive.1130556.n5.nabble.com/ |
Administrator
|
Hi
Already arrange to fix this issue, will raise the pull request asap. Thanks for your feedback. Regards Liang yixu2001 wrote > dev > This issue has caused great trouble for our production. I will appreciate > if you have any plan to fix it and let me know. > > > yixu2001 > > From: BabuLal > Date: 2018-03-23 00:20 > To: dev > Subject: Re: Getting [Problem in loading segment blocks] error after doing > multi update operations > hi all > i am able to reproduce same exception in my cluster and got the same > exception. (Trace is listed below) > ------ > scala> carbon.sql("select count(*) from public.c_compact4").show > 2018-03-22 20:40:33,105 | WARN | main | main > spark.sql.sources.options.keys > expected, but read nothing | > org.apache.carbondata.common.logging.impl.StandardLogService.logWarnMessage(StandardLogService.java:168) > org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, > tree: > Exchange SinglePartition > +- *HashAggregate(keys=[], functions=[partial_count(1)], > output=[count#1443L]) > +- *BatchedScan CarbonDatasourceHadoopRelation [ Database name :public, > Table name :c_compact4, Schema > :Some(StructType(StructField(id,StringType,true), > StructField(qqnum,StringType,true), StructField(nick,StringType,true), > StructField(age,StringType,true), StructField(gender,StringType,true), > StructField(auth,StringType,true), StructField(qunnum,StringType,true), > StructField(mvcc,StringType,true))) ] public.c_compact4[] > at > org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56) > at > org.apache.spark.sql.execution.exchange.ShuffleExchange.doExecute(ShuffleExchange.scala:112) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132) > at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113) > at > org.apache.spark.sql.execution.InputAdapter.inputRDDs(WholeStageCodegenExec.scala:235) > at > org.apache.spark.sql.execution.aggregate.HashAggregateExec.inputRDDs(HashAggregateExec.scala:141) > at > org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:372) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114 > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135 > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132) > at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113) > at > org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:225) > at > org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:308) > at > org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:113) > at > org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$Dataset$$execute$1$1.apply(Dataset.scala:2386) > at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:57) > at org.apache.spark.sql.Dataset.withNewExecutionId(Dataset.scala:2788) > at > org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$execute$1(Dataset.scala:2385) > at > org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collect(Dataset.scala:2392) > at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2128 > at > org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2127) > at org.apache.spark.sql.Dataset.withTypedCallback(Dataset.scala:2818) > at org.apache.spark.sql.Dataset.head(Dataset.scala:2127) > at org.apache.spark.sql.Dataset.take(Dataset.scala:2342) > at org.apache.spark.sql.Dataset.showString(Dataset.scala:248) > at org.apache.spark.sql.Dataset.show(Dataset.scala:638) > at org.apache.spark.sql.Dataset.show(Dataset.scala:597) > at org.apache.spark.sql.Dataset.show(Dataset.scala:606) > ... 48 elided > Caused by: java.io.IOException: Problem in loading segment blocks. > at > org.apache.carbondata.core.indexstore.BlockletDataMapIndexStore.getAll(BlockletDataMapIndexStore.java:153) > at > org.apache.carbondata.core.indexstore.blockletindex.BlockletDataMapFactory.getDataMaps(BlockletDataMapFactory.java:76) > at > org.apache.carbondata.core.datamap.TableDataMap.prune(TableDataMap.java:72) > at > org.apache.carbondata.hadoop.api.CarbonTableInputFormat.getDataBlocksOfSegment(CarbonTableInputFormat.java:739 > at > org.apache.carbondata.hadoop.api.CarbonTableInputFormat.getSplits(CarbonTableInputFormat.java:666) > at > org.apache.carbondata.hadoop.api.CarbonTableInputFormat.getSplits(CarbonTableInputFormat.java:426) > at > org.apache.carbondata.spark.rdd.CarbonScanRDD.getPartitions(CarbonScanRDD.scala:107) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:253) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:251) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:251) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:253) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:251) > at scala.Option.getOrElse(Option.scala:121 > at org.apache.spark.rdd.RDD.partitions(RDD.scala:251 > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:253) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:251) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:251) > at org.apache.spark.ShuffleDependency. > <init> > (Dependency.scala:91) > at > org.apache.spark.sql.execution.exchange.ShuffleExchange$.prepareShuffleDependency(ShuffleExchange.scala:273) > at > org.apache.spark.sql.execution.exchange.ShuffleExchange.prepareShuffleDependency(ShuffleExchange.scala:84) > at > org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$doExecute$1.apply(ShuffleExchange.scala:121) > at > org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$doExecute$1.apply(ShuffleExchange.scala:112) > at > org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52) > ... 81 more > Caused by: java.lang.ArrayIndexOutOfBoundsException: 0 > at > org.apache.carbondata.core.datastore.filesystem.AbstractDFSCarbonFile.getLocations(AbstractDFSCarbonFile.java:509) > at > org.apache.carbondata.core.indexstore.BlockletDataMapIndexStore.getAll(BlockletDataMapIndexStore.java:142) > > ----------------Store location---- ---- > linux-49:/opt/babu # hadoop fs -ls > /user/hive/warehouse/carbon.store/public/c_compact4/Fact/Part0/Segment_0/*.deletedelta > -rw-rw-r--+ 3 hdfs hive 177216 2018-03-22 18:20 > /user/hive/warehouse/carbon.store/public/c_compact4/Fact/Part0/Segment_0/part-0-0_batchno0-0-1521723019528.deletedelta > -rw-r--r-- 3 hdfs hive 0 2018-03-22 19:35 > /user/hive/warehouse/carbon.store/public/c_compact4/Fact/Part0/Segment_0/part-0-0_batchno0-0-1521723886214.deletedelta > -rw-rw-r--+ 3 hdfs hive 87989 2018-03-22 18:20 > /user/hive/warehouse/carbon.store/public/c_compact4/Fact/Part0/Segment_0/part-0-1_batchno0-0-1521723019528.deletedelta > -rw-r--r-- 3 hdfs hive 0 2018-03-22 19:35 > /user/hive/warehouse/carbon.store/public/c_compact4/Fact/Part0/Segment_0/part-0-1_batchno0-0-1521723886214.deletedelta > -rw-rw-r--+ 3 hdfs hive 87989 2018-03-22 18:20 > /user/hive/warehouse/carbon.store/public/c_compact4/Fact/Part0/Segment_0/part-0-2_batchno0-0-1521723019528.deletedelta > -rw-r--r-- 3 hdfs hive 0 2018-03-22 19:35 > /user/hive/warehouse/carbon.store/public/c_compact4/Fact/Part0/Segment_0/part-0-2_batchno0-0-1521723886214.deletedelta > > ----------------------------------------------------------- > > Issue reproduced technique :- > Writing content of delete delta is failed but deletedelta file created > successfully . Failed during Horizontal Compaction ( added setSpaceQuota > in hdfs so that file can created successfully and write to this file is > failed) > *Below points to be handled to fix this issue.* > > 1. When Horizontal Compaction is failed 0 byte delete delta file should be > deleted currently it is not deleted. This is a cleaning part of the > Horizontal Compaction fail . > 2. delete delta of 0 byte should not be considered while reading .( we can > further discuss about this solution ) . currently tablestatus file has the > entry of deletedelta timestamp. > 3. If deleting is in progress , file is created (name node has entry of > file) but data writing is in progress (not yet flush) but at same time > select query is triggered ,then Query will failed so this scenario also > need to handle. > > @dev :- Please Let me know if any other detail is needed. > > Thanks > Babu > > > > -- > Sent from: > http://apache-carbondata-dev-mailing-list-archive.1130556.n5.nabble.com/ -- Sent from: http://apache-carbondata-dev-mailing-list-archive.1130556.n5.nabble.com/ |
dev
Thanks yixu2001 From: Liang Chen Date: 2018-03-23 16:36 To: dev Subject: Re: Re: Getting [Problem in loading segment blocks] error after doing multi update operations Hi Already arrange to fix this issue, will raise the pull request asap. Thanks for your feedback. Regards Liang yixu2001 wrote > dev > This issue has caused great trouble for our production. I will appreciate > if you have any plan to fix it and let me know. > > > yixu2001 > > From: BabuLal > Date: 2018-03-23 00:20 > To: dev > Subject: Re: Getting [Problem in loading segment blocks] error after doing > multi update operations > hi all > i am able to reproduce same exception in my cluster and got the same > exception. (Trace is listed below) > ------ > scala> carbon.sql("select count(*) from public.c_compact4").show > 2018-03-22 20:40:33,105 | WARN | main | main > spark.sql.sources.options.keys > expected, but read nothing | > org.apache.carbondata.common.logging.impl.StandardLogService.logWarnMessage(StandardLogService.java:168) > org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, > tree: > Exchange SinglePartition > +- *HashAggregate(keys=[], functions=[partial_count(1)], > output=[count#1443L]) > +- *BatchedScan CarbonDatasourceHadoopRelation [ Database name :public, > Table name :c_compact4, Schema > :Some(StructType(StructField(id,StringType,true), > StructField(qqnum,StringType,true), StructField(nick,StringType,true), > StructField(age,StringType,true), StructField(gender,StringType,true), > StructField(auth,StringType,true), StructField(qunnum,StringType,true), > StructField(mvcc,StringType,true))) ] public.c_compact4[] > at > org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56) > at > org.apache.spark.sql.execution.exchange.ShuffleExchange.doExecute(ShuffleExchange.scala:112) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132) > at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113) > at > org.apache.spark.sql.execution.InputAdapter.inputRDDs(WholeStageCodegenExec.scala:235) > at > org.apache.spark.sql.execution.aggregate.HashAggregateExec.inputRDDs(HashAggregateExec.scala:141) > at > org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:372) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114 > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135 > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132) > at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113) > at > org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:225) > at > org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:308) > at > org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:113) > at > org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$Dataset$$execute$1$1.apply(Dataset.scala:2386) > at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:57) > at org.apache.spark.sql.Dataset.withNewExecutionId(Dataset.scala:2788) > at > org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$execute$1(Dataset.scala:2385) > at > org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collect(Dataset.scala:2392) > at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2128 > at > org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2127) > at org.apache.spark.sql.Dataset.withTypedCallback(Dataset.scala:2818) > at org.apache.spark.sql.Dataset.head(Dataset.scala:2127) > at org.apache.spark.sql.Dataset.take(Dataset.scala:2342) > at org.apache.spark.sql.Dataset.showString(Dataset.scala:248) > at org.apache.spark.sql.Dataset.show(Dataset.scala:638) > at org.apache.spark.sql.Dataset.show(Dataset.scala:597) > at org.apache.spark.sql.Dataset.show(Dataset.scala:606) > ... 48 elided > Caused by: java.io.IOException: Problem in loading segment blocks. > at > org.apache.carbondata.core.indexstore.BlockletDataMapIndexStore.getAll(BlockletDataMapIndexStore.java:153) > at > org.apache.carbondata.core.indexstore.blockletindex.BlockletDataMapFactory.getDataMaps(BlockletDataMapFactory.java:76) > at > org.apache.carbondata.core.datamap.TableDataMap.prune(TableDataMap.java:72) > at > org.apache.carbondata.hadoop.api.CarbonTableInputFormat.getDataBlocksOfSegment(CarbonTableInputFormat.java:739 > at > org.apache.carbondata.hadoop.api.CarbonTableInputFormat.getSplits(CarbonTableInputFormat.java:666) > at > org.apache.carbondata.hadoop.api.CarbonTableInputFormat.getSplits(CarbonTableInputFormat.java:426) > at > org.apache.carbondata.spark.rdd.CarbonScanRDD.getPartitions(CarbonScanRDD.scala:107) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:253) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:251) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:251) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:253) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:251) > at scala.Option.getOrElse(Option.scala:121 > at org.apache.spark.rdd.RDD.partitions(RDD.scala:251 > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:253) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:251) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:251) > at org.apache.spark.ShuffleDependency. > <init> > (Dependency.scala:91) > at > org.apache.spark.sql.execution.exchange.ShuffleExchange$.prepareShuffleDependency(ShuffleExchange.scala:273) > at > org.apache.spark.sql.execution.exchange.ShuffleExchange.prepareShuffleDependency(ShuffleExchange.scala:84) > at > org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$doExecute$1.apply(ShuffleExchange.scala:121) > at > org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$doExecute$1.apply(ShuffleExchange.scala:112) > at > org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52) > ... 81 more > Caused by: java.lang.ArrayIndexOutOfBoundsException: 0 > at > org.apache.carbondata.core.datastore.filesystem.AbstractDFSCarbonFile.getLocations(AbstractDFSCarbonFile.java:509) > at > org.apache.carbondata.core.indexstore.BlockletDataMapIndexStore.getAll(BlockletDataMapIndexStore.java:142) > > ----------------Store location---- ---- > linux-49:/opt/babu # hadoop fs -ls > /user/hive/warehouse/carbon.store/public/c_compact4/Fact/Part0/Segment_0/*.deletedelta > -rw-rw-r--+ 3 hdfs hive 177216 2018-03-22 18:20 > /user/hive/warehouse/carbon.store/public/c_compact4/Fact/Part0/Segment_0/part-0-0_batchno0-0-1521723019528.deletedelta > -rw-r--r-- 3 hdfs hive 0 2018-03-22 19:35 > /user/hive/warehouse/carbon.store/public/c_compact4/Fact/Part0/Segment_0/part-0-0_batchno0-0-1521723886214.deletedelta > -rw-rw-r--+ 3 hdfs hive 87989 2018-03-22 18:20 > /user/hive/warehouse/carbon.store/public/c_compact4/Fact/Part0/Segment_0/part-0-1_batchno0-0-1521723019528.deletedelta > -rw-r--r-- 3 hdfs hive 0 2018-03-22 19:35 > /user/hive/warehouse/carbon.store/public/c_compact4/Fact/Part0/Segment_0/part-0-1_batchno0-0-1521723886214.deletedelta > -rw-rw-r--+ 3 hdfs hive 87989 2018-03-22 18:20 > /user/hive/warehouse/carbon.store/public/c_compact4/Fact/Part0/Segment_0/part-0-2_batchno0-0-1521723019528.deletedelta > -rw-r--r-- 3 hdfs hive 0 2018-03-22 19:35 > /user/hive/warehouse/carbon.store/public/c_compact4/Fact/Part0/Segment_0/part-0-2_batchno0-0-1521723886214.deletedelta > > ----------------------------------------------------------- > > Issue reproduced technique :- > Writing content of delete delta is failed but deletedelta file created > successfully . Failed during Horizontal Compaction ( added setSpaceQuota > in hdfs so that file can created successfully and write to this file is > failed) > *Below points to be handled to fix this issue.* > > 1. When Horizontal Compaction is failed 0 byte delete delta file should be > deleted currently it is not deleted. This is a cleaning part of the > Horizontal Compaction fail . > 2. delete delta of 0 byte should not be considered while reading .( we can > further discuss about this solution ) . currently tablestatus file has the > entry of deletedelta timestamp. > 3. If deleting is in progress , file is created (name node has entry of > file) but data writing is in progress (not yet flush) but at same time > select query is triggered ,then Query will failed so this scenario also > need to handle. > > @dev :- Please Let me know if any other detail is needed. > > Thanks > Babu > > > > -- > Sent from: > http://apache-carbondata-dev-mailing-list-archive.1130556.n5.nabble.com/ -- Sent from: http://apache-carbondata-dev-mailing-list-archive.1130556.n5.nabble.com/ |
Hi
Issue is fixed and PR is raised. 1. PR :- https://github.com/apache/carbondata/pull/2097 2. Below situation is handled in PR a. Skip 0 byte deletedelta b. On OutputStream Close/flush if any error is thrown from hdfs (SpaceQuota/No Lease ..ect) then Exception was not thrown to caller ,it shows delete successfull but actually it is failed . Now it is handled and exception will be thrown to caller. 3. Since i simulated issue using SpaceQuota, Can you please share me your full executor logs (where Exception from HDFS ) so ensure Exception handled in Fix. Thanks Babu -- Sent from: http://apache-carbondata-dev-mailing-list-archive.1130556.n5.nabble.com/ |
Hi yixu2001
Can you please verify your issue with PR https://github.com/apache/carbondata/pull/2097 . PR is for Branch 1.3 because you are using carbondata1.3 . Let me know if issue still exists. Thanks Babu -- Sent from: http://apache-carbondata-dev-mailing-list-archive.1130556.n5.nabble.com/ |
Free forum by Nabble | Edit this page |