dev
environment spark.2.1.1 carbondata 1.1.1 hadoop 2.7.2 run ALTER table e_carbon.prod_inst_all_c COMPACT 'MAJOR' CLEAN FILES FOR TABLE e_carbon.prod_inst_all_c 17/10/30 14:59:21 ERROR filesystem.AbstractDFSCarbonFile: main Exception occurred:File does not exist: hdfs://ns1/user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/624_batchno0-0-1509233731459.carbonindex 623_batchno0-0-1509233118616.carbonindex and 625_batchno0-0-1509233731459.carbonindex betwwen the lost of 624_batchno0-0-1509233731459.carbonindex -rw-r--r-- 3 e_carbon e_carbon_group 6750 2017-10-29 07:17 /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/621_batchno0-0-1509231670521.carbonindex -rw-r--r-- 3 e_carbon e_carbon_group 11320 2017-10-29 07:19 /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/622_batchno0-0-1509232641994.carbonindex -rw-r--r-- 3 e_carbon e_carbon_group 6858 2017-10-29 07:35 /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/623_batchno0-0-1509233118616.carbonindex -rw-r--r-- 3 e_carbon e_carbon_group 11423 2017-10-29 07:37 /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/625_batchno0-0-1509233731459.carbonindex scala> cc.sql("select his_id,count(*) from e_carbon.prod_inst_his_c group by his_id having count(*)>1").show 17/10/30 14:59:21 ERROR filesystem.AbstractDFSCarbonFile: main Exception occurred:File does not exist: hdfs://ns1/user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/624_batchno0-0-1509233731459.carbonindex org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree: Exchange hashpartitioning(his_id#1818, 100) +- *HashAggregate(keys=[his_id#1818], functions=[partial_count(1), partial_count(1)], output=[his_id#1818, count#1967L, count#1968L]) +- *BatchedScan CarbonDatasourceHadoopRelation [ Database name :e_carbon, Table name :prod_inst_his_c, Schema :Some(StructType(StructField(his_id,StringType,true), StructField(ETL_date,StringType,true), StructField(prod_inst_id,StringType,true), StructField(owner_cust_id,StringType,true), StructField(acc_prod_inst_id,StringType,true), StructField(DVERSION,StringType,true), StructField(GTID,StringType,true), StructField(IND,StringType,true), StructField(ODS_STATE,StringType,true), StructField(SRC,StringType,true), StructField(kafka_date,StringType,true), StructField(product_id,StringType,true), StructField(address_id,StringType,true), StructField(payment_mode_cd,StringType,true), StructField(product_password,StringType,true), StructField(important_level,StringType,true), StructField(area_code,StringType,true), StructField(acc_nbr,StringType,true), StructField(exch_id,StringType,true), StructField(common_region_id,StringType,true), StructField(remark,StringType,true), StructField(pay_cycle,StringType,true), StructField(begin_rent_time,StringType,true), StructField(stop_rent_time,StringType,true), StructField(finish_time,StringType,true), StructField(stop_status,StringType,true), StructField(status_cd,StringType,true), StructField(create_date,StringType,true), StructField(status_date,StringType,true), StructField(update_date,StringType,true), StructField(proc_serial,StringType,true), StructField(use_cust_id,StringType,true), StructField(ext_prod_inst_id,StringType,true), StructField(address_desc,StringType,true), StructField(area_id,StringType,true), StructField(update_staff,StringType,true), StructField(create_staff,StringType,true), StructField(rec_update_date,StringType,true), StructField(account,StringType,true), StructField(version,StringType,true), StructField(community_id,StringType,true), StructField(ext_acc_prod_inst_id,StringType,true), StructField(distributor_id,StringType,true), StructField(sharding_id,StringType,true))) ] e_carbon.prod_inst_his_c[his_id#1818] at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56) at org.apache.spark.sql.execution.exchange.ShuffleExchange.doExecute(ShuffleExchange.scala:112) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113) at org.apache.spark.sql.execution.InputAdapter.inputRDDs(WholeStageCodegenExec.scala:235) at org.apache.spark.sql.execution.aggregate.HashAggregateExec.inputRDDs(HashAggregateExec.scala:141) at org.apache.spark.sql.execution.FilterExec.inputRDDs(basicPhysicalOperators.scala:124) at org.apache.spark.sql.execution.ProjectExec.inputRDDs(basicPhysicalOperators.scala:42) at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:368) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113) at org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:225) at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:308) at org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:38) at org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$Dataset$$execute$1$1.apply(Dataset.scala:2386) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:57) at org.apache.spark.sql.Dataset.withNewExecutionId(Dataset.scala:2788) at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$execute$1(Dataset.scala:2385) at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collect(Dataset.scala:2392) at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2128) at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2127) at org.apache.spark.sql.Dataset.withTypedCallback(Dataset.scala:2818) at org.apache.spark.sql.Dataset.head(Dataset.scala:2127) at org.apache.spark.sql.Dataset.take(Dataset.scala:2342) at org.apache.spark.sql.Dataset.showString(Dataset.scala:248) at org.apache.spark.sql.Dataset.show(Dataset.scala:638) at org.apache.spark.sql.Dataset.show(Dataset.scala:597) at org.apache.spark.sql.Dataset.show(Dataset.scala:606) ... 50 elided Caused by: java.io.IOException: Problem in loading segment block. at org.apache.carbondata.core.datastore.SegmentTaskIndexStore.get(SegmentTaskIndexStore.java:102) at org.apache.carbondata.core.datastore.SegmentTaskIndexStore.get(SegmentTaskIndexStore.java:54) at org.apache.carbondata.hadoop.CacheAccessClient.get(CacheAccessClient.java:67) at org.apache.carbondata.hadoop.CarbonInputFormat.getSegmentAbstractIndexs(CarbonInputFormat.java:543) at org.apache.carbondata.hadoop.CarbonInputFormat.getDataBlocksOfSegment(CarbonInputFormat.java:382) at org.apache.carbondata.hadoop.CarbonInputFormat.getSplits(CarbonInputFormat.java:321) at org.apache.carbondata.hadoop.CarbonInputFormat.getSplits(CarbonInputFormat.java:262) at org.apache.carbondata.spark.rdd.CarbonScanRDD.getPartitions(CarbonScanRDD.scala:81) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) at scala.Option.getOrElse(Option.scala:121) at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) at scala.Option.getOrElse(Option.scala:121) at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) at scala.Option.getOrElse(Option.scala:121) at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) at org.apache.spark.ShuffleDependency.<init>(Dependency.scala:91) at org.apache.spark.sql.execution.exchange.ShuffleExchange$.prepareShuffleDependency(ShuffleExchange.scala:261) at org.apache.spark.sql.execution.exchange.ShuffleExchange.prepareShuffleDependency(ShuffleExchange.scala:84) at org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$doExecute$1.apply(ShuffleExchange.scala:121) at org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$doExecute$1.apply(ShuffleExchange.scala:112) at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52) ... 85 more Caused by: java.lang.NullPointerException at org.apache.carbondata.core.datastore.filesystem.AbstractDFSCarbonFile.getSize(AbstractDFSCarbonFile.java:113) at org.apache.carbondata.core.util.CarbonUtil.calculateDriverBTreeSize(CarbonUtil.java:1009) at org.apache.carbondata.core.datastore.SegmentTaskIndexStore.calculateRequiredSize(SegmentTaskIndexStore.java:304) at org.apache.carbondata.core.datastore.SegmentTaskIndexStore.loadAndGetTaskIdToSegmentsMap(SegmentTaskIndexStore.java:236) at org.apache.carbondata.core.datastore.SegmentTaskIndexStore.get(SegmentTaskIndexStore.java:96) ... 112 more [e_carbon@HETL032 testdata]$ hdfs dfs -ls /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/*.carbonindex -rw-r--r-- 3 e_carbon e_carbon_group 44859 2017-10-25 14:53 /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/0_batchno0-0-1508912751699.carbonindex -rw-r--r-- 3 e_carbon e_carbon_group 43893 2017-10-25 14:55 /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/1_batchno0-0-1508912751699.carbonindex -rw-r--r-- 3 e_carbon e_carbon_group 42684 2017-10-25 14:55 /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/2_batchno0-0-1508912751699.carbonindex -rw-r--r-- 3 e_carbon e_carbon_group 40751 2017-10-25 14:43 /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/3_batchno0-0-1508912751699.carbonindex -rw-r--r-- 3 e_carbon e_carbon_group 40212 2017-10-25 14:54 /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/4_batchno0-0-1508912751699.carbonindex -rw-r--r-- 3 e_carbon e_carbon_group 11249 2017-10-29 06:37 /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/618_batchno0-0-1509230160810.carbonindex -rw-r--r-- 3 e_carbon e_carbon_group 7197 2017-10-29 06:53 /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/619_batchno0-0-1509230585040.carbonindex -rw-r--r-- 3 e_carbon e_carbon_group 11516 2017-10-29 06:55 /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/620_batchno0-0-1509231215665.carbonindex -rw-r--r-- 3 e_carbon e_carbon_group 6750 2017-10-29 07:17 /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/621_batchno0-0-1509231670521.carbonindex -rw-r--r-- 3 e_carbon e_carbon_group 11320 2017-10-29 07:19 /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/622_batchno0-0-1509232641994.carbonindex -rw-r--r-- 3 e_carbon e_carbon_group 6858 2017-10-29 07:35 /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/623_batchno0-0-1509233118616.carbonindex -rw-r--r-- 3 e_carbon e_carbon_group 11423 2017-10-29 07:37 /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/625_batchno0-0-1509233731459.carbonindex yixu2001 |
Administrator
|
Hi
Yes, checked the log message, looks have some issues. Can you share the reproduce steps: Did you use how many machines to do data load, and load how many times? Regards Liang yixu2001 wrote > dev > environment spark.2.1.1 carbondata 1.1.1 hadoop 2.7.2 > > run ALTER table e_carbon.prod_inst_all_c COMPACT 'MAJOR' > CLEAN FILES FOR TABLE e_carbon.prod_inst_all_c > > 17/10/30 14:59:21 ERROR filesystem.AbstractDFSCarbonFile: main Exception > occurred:File does not exist: > hdfs://ns1/user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/624_batchno0-0-1509233731459.carbonindex > > > 623_batchno0-0-1509233118616.carbonindex and > 625_batchno0-0-1509233731459.carbonindex betwwen the lost of > 624_batchno0-0-1509233731459.carbonindex > > -rw-r--r-- 3 e_carbon e_carbon_group 6750 2017-10-29 07:17 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/621_batchno0-0-1509231670521.carbonindex > -rw-r--r-- 3 e_carbon e_carbon_group 11320 2017-10-29 07:19 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/622_batchno0-0-1509232641994.carbonindex > -rw-r--r-- 3 e_carbon e_carbon_group 6858 2017-10-29 07:35 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/623_batchno0-0-1509233118616.carbonindex > -rw-r--r-- 3 e_carbon e_carbon_group 11423 2017-10-29 07:37 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/625_batchno0-0-1509233731459.carbonindex > > scala> cc.sql("select his_id,count(*) from e_carbon.prod_inst_his_c group > by his_id having count(*)>1").show > 17/10/30 14:59:21 ERROR filesystem.AbstractDFSCarbonFile: main Exception > occurred:File does not exist: > hdfs://ns1/user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/624_batchno0-0-1509233731459.carbonindex > org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, > tree: > Exchange hashpartitioning(his_id#1818, 100) > +- *HashAggregate(keys=[his_id#1818], functions=[partial_count(1), > partial_count(1)], output=[his_id#1818, count#1967L, count#1968L]) > +- *BatchedScan CarbonDatasourceHadoopRelation [ Database name > :e_carbon, Table name :prod_inst_his_c, Schema > :Some(StructType(StructField(his_id,StringType,true), > StructField(ETL_date,StringType,true), > StructField(prod_inst_id,StringType,true), > StructField(owner_cust_id,StringType,true), > StructField(acc_prod_inst_id,StringType,true), > StructField(DVERSION,StringType,true), StructField(GTID,StringType,true), > StructField(IND,StringType,true), StructField(ODS_STATE,StringType,true), > StructField(SRC,StringType,true), StructField(kafka_date,StringType,true), > StructField(product_id,StringType,true), > StructField(address_id,StringType,true), > StructField(payment_mode_cd,StringType,true), > StructField(product_password,StringType,true), > StructField(important_level,StringType,true), > StructField(area_code,StringType,true), > StructField(acc_nbr,StringType,true), > StructField(exch_id,StringType,true), > StructField(common_region_id,StringType,true), > StructField(remark,StringType,true), > StructField(pay_cycle,StringType,true), > StructField(begin_rent_time,StringType,true), > StructField(stop_rent_time,StringType,true), > StructField(finish_time,StringType,true), > StructField(stop_status,StringType,true), > StructField(status_cd,StringType,true), > StructField(create_date,StringType,true), > StructField(status_date,StringType,true), > StructField(update_date,StringType,true), > StructField(proc_serial,StringType,true), > StructField(use_cust_id,StringType,true), > StructField(ext_prod_inst_id,StringType,true), > StructField(address_desc,StringType,true), > StructField(area_id,StringType,true), > StructField(update_staff,StringType,true), > StructField(create_staff,StringType,true), > StructField(rec_update_date,StringType,true), > StructField(account,StringType,true), > StructField(version,StringType,true), > StructField(community_id,StringType,true), > StructField(ext_acc_prod_inst_id,StringType,true), > StructField(distributor_id,StringType,true), > StructField(sharding_id,StringType,true))) ] > e_carbon.prod_inst_his_c[his_id#1818] > > at > org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56) > at > org.apache.spark.sql.execution.exchange.ShuffleExchange.doExecute(ShuffleExchange.scala:112) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132) > at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113) > at > org.apache.spark.sql.execution.InputAdapter.inputRDDs(WholeStageCodegenExec.scala:235) > at > org.apache.spark.sql.execution.aggregate.HashAggregateExec.inputRDDs(HashAggregateExec.scala:141) > at > org.apache.spark.sql.execution.FilterExec.inputRDDs(basicPhysicalOperators.scala:124) > at > org.apache.spark.sql.execution.ProjectExec.inputRDDs(basicPhysicalOperators.scala:42) > at > org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:368) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132) > at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113) > at > org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:225) > at > org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:308) > at > org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:38) > at > org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$Dataset$$execute$1$1.apply(Dataset.scala:2386) > at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:57) > at org.apache.spark.sql.Dataset.withNewExecutionId(Dataset.scala:2788) > at > org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$execute$1(Dataset.scala:2385) > at > org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collect(Dataset.scala:2392) > at > org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2128) > at > org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2127) > at org.apache.spark.sql.Dataset.withTypedCallback(Dataset.scala:2818) > at org.apache.spark.sql.Dataset.head(Dataset.scala:2127) > at org.apache.spark.sql.Dataset.take(Dataset.scala:2342) > at org.apache.spark.sql.Dataset.showString(Dataset.scala:248) > at org.apache.spark.sql.Dataset.show(Dataset.scala:638) > at org.apache.spark.sql.Dataset.show(Dataset.scala:597) > at org.apache.spark.sql.Dataset.show(Dataset.scala:606) > ... 50 elided > Caused by: java.io.IOException: Problem in loading segment block. > at > org.apache.carbondata.core.datastore.SegmentTaskIndexStore.get(SegmentTaskIndexStore.java:102) > at > org.apache.carbondata.core.datastore.SegmentTaskIndexStore.get(SegmentTaskIndexStore.java:54) > at > org.apache.carbondata.hadoop.CacheAccessClient.get(CacheAccessClient.java:67) > at > org.apache.carbondata.hadoop.CarbonInputFormat.getSegmentAbstractIndexs(CarbonInputFormat.java:543) > at > org.apache.carbondata.hadoop.CarbonInputFormat.getDataBlocksOfSegment(CarbonInputFormat.java:382) > at > org.apache.carbondata.hadoop.CarbonInputFormat.getSplits(CarbonInputFormat.java:321) > at > org.apache.carbondata.hadoop.CarbonInputFormat.getSplits(CarbonInputFormat.java:262) > at > org.apache.carbondata.spark.rdd.CarbonScanRDD.getPartitions(CarbonScanRDD.scala:81) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at org.apache.spark.ShuffleDependency. > <init> > (Dependency.scala:91) > at > org.apache.spark.sql.execution.exchange.ShuffleExchange$.prepareShuffleDependency(ShuffleExchange.scala:261) > at > org.apache.spark.sql.execution.exchange.ShuffleExchange.prepareShuffleDependency(ShuffleExchange.scala:84) > at > org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$doExecute$1.apply(ShuffleExchange.scala:121) > at > org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$doExecute$1.apply(ShuffleExchange.scala:112) > at > org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52) > ... 85 more > Caused by: java.lang.NullPointerException > at > org.apache.carbondata.core.datastore.filesystem.AbstractDFSCarbonFile.getSize(AbstractDFSCarbonFile.java:113) > at > org.apache.carbondata.core.util.CarbonUtil.calculateDriverBTreeSize(CarbonUtil.java:1009) > at > org.apache.carbondata.core.datastore.SegmentTaskIndexStore.calculateRequiredSize(SegmentTaskIndexStore.java:304) > at > org.apache.carbondata.core.datastore.SegmentTaskIndexStore.loadAndGetTaskIdToSegmentsMap(SegmentTaskIndexStore.java:236) > at > org.apache.carbondata.core.datastore.SegmentTaskIndexStore.get(SegmentTaskIndexStore.java:96) > ... 112 more > > > > [e_carbon@HETL032 testdata]$ hdfs dfs -ls > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/*.carbonindex > -rw-r--r-- 3 e_carbon e_carbon_group 44859 2017-10-25 14:53 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/0_batchno0-0-1508912751699.carbonindex > -rw-r--r-- 3 e_carbon e_carbon_group 43893 2017-10-25 14:55 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/1_batchno0-0-1508912751699.carbonindex > -rw-r--r-- 3 e_carbon e_carbon_group 42684 2017-10-25 14:55 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/2_batchno0-0-1508912751699.carbonindex > -rw-r--r-- 3 e_carbon e_carbon_group 40751 2017-10-25 14:43 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/3_batchno0-0-1508912751699.carbonindex > -rw-r--r-- 3 e_carbon e_carbon_group 40212 2017-10-25 14:54 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/4_batchno0-0-1508912751699.carbonindex > -rw-r--r-- 3 e_carbon e_carbon_group 11249 2017-10-29 06:37 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/618_batchno0-0-1509230160810.carbonindex > -rw-r--r-- 3 e_carbon e_carbon_group 7197 2017-10-29 06:53 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/619_batchno0-0-1509230585040.carbonindex > -rw-r--r-- 3 e_carbon e_carbon_group 11516 2017-10-29 06:55 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/620_batchno0-0-1509231215665.carbonindex > -rw-r--r-- 3 e_carbon e_carbon_group 6750 2017-10-29 07:17 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/621_batchno0-0-1509231670521.carbonindex > -rw-r--r-- 3 e_carbon e_carbon_group 11320 2017-10-29 07:19 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/622_batchno0-0-1509232641994.carbonindex > -rw-r--r-- 3 e_carbon e_carbon_group 6858 2017-10-29 07:35 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/623_batchno0-0-1509233118616.carbonindex > -rw-r--r-- 3 e_carbon e_carbon_group 11423 2017-10-29 07:37 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/625_batchno0-0-1509233731459.carbonindex > > > > yixu2001 -- Sent from: http://apache-carbondata-dev-mailing-list-archive.1130556.n5.nabble.com/ |
dev
run Multiple updates Generate ten thousand delta file cc.sql("update e_carbon.prod_inst_his_c A set (a.ETL_date,a.prod_inst_id,a.OWNER_CUST_ID,a.ACC_PROD_INST_ID,a.DVERSION,a.GTID,a.IND,a.ODS_STATE,A.SRC,a.kafka_date,a.PRODUCT_ID,a.ADDRESS_ID,a.PAYMENT_MODE_CD,a.PRODUCT_PASSWORD,a.IMPORTANT_LEVEL,a.AREA_CODE,a.ACC_NBR,a.EXCH_ID,a.COMMON_REGION_ID,a.REMARK,a.PAY_CYCLE,a.BEGIN_RENT_TIME,a.STOP_RENT_TIME,a.FINISH_TIME,a.STOP_STATUS,a.STATUS_CD,a.CREATE_DATE,a.STATUS_DATE,a.UPDATE_DATE,a.PROC_SERIAL,a.USE_CUST_ID,a.EXT_PROD_INST_ID,a.ADDRESS_DESC,a.AREA_ID,a.UPDATE_STAFF,a.CREATE_STAFF,a.REC_UPDATE_DATE,a.ACCOUNT,a.VERSION,a.COMMUNITY_ID,a.EXT_ACC_PROD_INST_ID,a.DISTRIBUTOR_ID,a.SHARDING_ID)=(select b.etl_date,b.prod_inst_id,b.OWNER_CUST_ID,b.ACC_PROD_INST_ID,B.DVERSION,b.GTID,b.IND,B.ODS_STATE,B.SRC,b.kafka_date,b.PRODUCT_ID,b.ADDRESS_ID,b.PAYMENT_MODE_CD,b.PRODUCT_PASSWORD,b.IMPORTANT_LEVEL,b.AREA_CODE,b.ACC_NBR,b.EXCH_ID,b.COMMON_REGION_ID,b.REMARK,b.PAY_CYCLE,b.BEGIN_RENT_TIME,b.STOP_RENT_TIME,b.FINISH_TIME,b.STOP_STATUS,b.STATUS_CD,b.CREATE_DATE,b.STATUS_DATE,b.UPDATE_DATE,b.PROC_SERIAL,b.USE_CUST_ID,b.EXT_PROD_INST_ID,b.ADDRESS_DESC,b.AREA_ID,b.UPDATE_STAFF,b.CREATE_STAFF,b.REC_UPDATE_DATE,b.ACCOUNT,b.VERSION,b.COMMUNITY_ID,b.EXT_ACC_PROD_INST_ID,b.DISTRIBUTOR_ID,b.SHARDING_ID from cache_prod_inst_his_u b where a.his_id=b.his_id)").show; yixu2001 From: Liang Chen Date: 2017-11-02 02:29 To: dev Subject: Re: After MAJOR index lost Hi Yes, checked the log message, looks have some issues. Can you share the reproduce steps: Did you use how many machines to do data load, and load how many times? Regards Liang yixu2001 wrote > dev > environment spark.2.1.1 carbondata 1.1.1 hadoop 2.7.2 > > run ALTER table e_carbon.prod_inst_all_c COMPACT 'MAJOR' > CLEAN FILES FOR TABLE e_carbon.prod_inst_all_c > > 17/10/30 14:59:21 ERROR filesystem.AbstractDFSCarbonFile: main Exception > occurred:File does not exist: > hdfs://ns1/user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/624_batchno0-0-1509233731459.carbonindex > > > 623_batchno0-0-1509233118616.carbonindex and > 625_batchno0-0-1509233731459.carbonindex betwwen the lost of > 624_batchno0-0-1509233731459.carbonindex > > -rw-r--r-- 3 e_carbon e_carbon_group 6750 2017-10-29 07:17 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/621_batchno0-0-1509231670521.carbonindex > -rw-r--r-- 3 e_carbon e_carbon_group 11320 2017-10-29 07:19 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/622_batchno0-0-1509232641994.carbonindex > -rw-r--r-- 3 e_carbon e_carbon_group 6858 2017-10-29 07:35 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/623_batchno0-0-1509233118616.carbonindex > -rw-r--r-- 3 e_carbon e_carbon_group 11423 2017-10-29 07:37 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/625_batchno0-0-1509233731459.carbonindex > > scala> cc.sql("select his_id,count(*) from e_carbon.prod_inst_his_c group > by his_id having count(*)>1").show > 17/10/30 14:59:21 ERROR filesystem.AbstractDFSCarbonFile: main Exception > occurred:File does not exist: > hdfs://ns1/user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/624_batchno0-0-1509233731459.carbonindex > org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, > tree: > Exchange hashpartitioning(his_id#1818, 100) > +- *HashAggregate(keys=[his_id#1818], functions=[partial_count(1), > partial_count(1)], output=[his_id#1818, count#1967L, count#1968L]) > +- *BatchedScan CarbonDatasourceHadoopRelation [ Database name > :e_carbon, Table name :prod_inst_his_c, Schema > :Some(StructType(StructField(his_id,StringType,true), > StructField(ETL_date,StringType,true), > StructField(prod_inst_id,StringType,true), > StructField(owner_cust_id,StringType,true), > StructField(acc_prod_inst_id,StringType,true), > StructField(DVERSION,StringType,true), StructField(GTID,StringType,true), > StructField(IND,StringType,true), StructField(ODS_STATE,StringType,true), > StructField(SRC,StringType,true), StructField(kafka_date,StringType,true), > StructField(product_id,StringType,true), > StructField(address_id,StringType,true), > StructField(payment_mode_cd,StringType,true), > StructField(product_password,StringType,true), > StructField(important_level,StringType,true), > StructField(area_code,StringType,true), > StructField(acc_nbr,StringType,true), > StructField(exch_id,StringType,true), > StructField(common_region_id,StringType,true), > StructField(remark,StringType,true), > StructField(pay_cycle,StringType,true), > StructField(begin_rent_time,StringType,true), > StructField(stop_rent_time,StringType,true), > StructField(finish_time,StringType,true), > StructField(stop_status,StringType,true), > StructField(status_cd,StringType,true), > StructField(create_date,StringType,true), > StructField(status_date,StringType,true), > StructField(update_date,StringType,true), > StructField(proc_serial,StringType,true), > StructField(use_cust_id,StringType,true), > StructField(ext_prod_inst_id,StringType,true), > StructField(address_desc,StringType,true), > StructField(area_id,StringType,true), > StructField(update_staff,StringType,true), > StructField(create_staff,StringType,true), > StructField(rec_update_date,StringType,true), > StructField(account,StringType,true), > StructField(version,StringType,true), > StructField(community_id,StringType,true), > StructField(ext_acc_prod_inst_id,StringType,true), > StructField(distributor_id,StringType,true), > StructField(sharding_id,StringType,true))) ] > e_carbon.prod_inst_his_c[his_id#1818] > > at > org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56) > at > org.apache.spark.sql.execution.exchange.ShuffleExchange.doExecute(ShuffleExchange.scala:112) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132) > at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113) > at > org.apache.spark.sql.execution.InputAdapter.inputRDDs(WholeStageCodegenExec.scala:235) > at > org.apache.spark.sql.execution.aggregate.HashAggregateExec.inputRDDs(HashAggregateExec.scala:141) > at > org.apache.spark.sql.execution.FilterExec.inputRDDs(basicPhysicalOperators.scala:124) > at > org.apache.spark.sql.execution.ProjectExec.inputRDDs(basicPhysicalOperators.scala:42) > at > org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:368) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132) > at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113) > at > org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:225) > at > org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:308) > at > org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:38) > at > org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$Dataset$$execute$1$1.apply(Dataset.scala:2386) > at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:57) > at org.apache.spark.sql.Dataset.withNewExecutionId(Dataset.scala:2788) > at > org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$execute$1(Dataset.scala:2385) > at > org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collect(Dataset.scala:2392) > at > org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2128) > at > org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2127) > at org.apache.spark.sql.Dataset.withTypedCallback(Dataset.scala:2818) > at org.apache.spark.sql.Dataset.head(Dataset.scala:2127) > at org.apache.spark.sql.Dataset.take(Dataset.scala:2342) > at org.apache.spark.sql.Dataset.showString(Dataset.scala:248) > at org.apache.spark.sql.Dataset.show(Dataset.scala:638) > at org.apache.spark.sql.Dataset.show(Dataset.scala:597) > at org.apache.spark.sql.Dataset.show(Dataset.scala:606) > ... 50 elided > Caused by: java.io.IOException: Problem in loading segment block. > at > org.apache.carbondata.core.datastore.SegmentTaskIndexStore.get(SegmentTaskIndexStore.java:102) > at > org.apache.carbondata.core.datastore.SegmentTaskIndexStore.get(SegmentTaskIndexStore.java:54) > at > org.apache.carbondata.hadoop.CacheAccessClient.get(CacheAccessClient.java:67) > at > org.apache.carbondata.hadoop.CarbonInputFormat.getSegmentAbstractIndexs(CarbonInputFormat.java:543) > at > org.apache.carbondata.hadoop.CarbonInputFormat.getDataBlocksOfSegment(CarbonInputFormat.java:382) > at > org.apache.carbondata.hadoop.CarbonInputFormat.getSplits(CarbonInputFormat.java:321) > at > org.apache.carbondata.hadoop.CarbonInputFormat.getSplits(CarbonInputFormat.java:262) > at > org.apache.carbondata.spark.rdd.CarbonScanRDD.getPartitions(CarbonScanRDD.scala:81) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at org.apache.spark.ShuffleDependency. > <init> > (Dependency.scala:91) > at > org.apache.spark.sql.execution.exchange.ShuffleExchange$.prepareShuffleDependency(ShuffleExchange.scala:261) > at > org.apache.spark.sql.execution.exchange.ShuffleExchange.prepareShuffleDependency(ShuffleExchange.scala:84) > at > org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$doExecute$1.apply(ShuffleExchange.scala:121) > at > org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$doExecute$1.apply(ShuffleExchange.scala:112) > at > org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52) > ... 85 more > Caused by: java.lang.NullPointerException > at > org.apache.carbondata.core.datastore.filesystem.AbstractDFSCarbonFile.getSize(AbstractDFSCarbonFile.java:113) > at > org.apache.carbondata.core.util.CarbonUtil.calculateDriverBTreeSize(CarbonUtil.java:1009) > at > org.apache.carbondata.core.datastore.SegmentTaskIndexStore.calculateRequiredSize(SegmentTaskIndexStore.java:304) > at > org.apache.carbondata.core.datastore.SegmentTaskIndexStore.loadAndGetTaskIdToSegmentsMap(SegmentTaskIndexStore.java:236) > at > org.apache.carbondata.core.datastore.SegmentTaskIndexStore.get(SegmentTaskIndexStore.java:96) > ... 112 more > > > > [e_carbon@HETL032 testdata]$ hdfs dfs -ls > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/*.carbonindex > -rw-r--r-- 3 e_carbon e_carbon_group 44859 2017-10-25 14:53 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/0_batchno0-0-1508912751699.carbonindex > -rw-r--r-- 3 e_carbon e_carbon_group 43893 2017-10-25 14:55 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/1_batchno0-0-1508912751699.carbonindex > -rw-r--r-- 3 e_carbon e_carbon_group 42684 2017-10-25 14:55 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/2_batchno0-0-1508912751699.carbonindex > -rw-r--r-- 3 e_carbon e_carbon_group 40751 2017-10-25 14:43 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/3_batchno0-0-1508912751699.carbonindex > -rw-r--r-- 3 e_carbon e_carbon_group 40212 2017-10-25 14:54 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/4_batchno0-0-1508912751699.carbonindex > -rw-r--r-- 3 e_carbon e_carbon_group 11249 2017-10-29 06:37 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/618_batchno0-0-1509230160810.carbonindex > -rw-r--r-- 3 e_carbon e_carbon_group 7197 2017-10-29 06:53 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/619_batchno0-0-1509230585040.carbonindex > -rw-r--r-- 3 e_carbon e_carbon_group 11516 2017-10-29 06:55 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/620_batchno0-0-1509231215665.carbonindex > -rw-r--r-- 3 e_carbon e_carbon_group 6750 2017-10-29 07:17 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/621_batchno0-0-1509231670521.carbonindex > -rw-r--r-- 3 e_carbon e_carbon_group 11320 2017-10-29 07:19 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/622_batchno0-0-1509232641994.carbonindex > -rw-r--r-- 3 e_carbon e_carbon_group 6858 2017-10-29 07:35 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/623_batchno0-0-1509233118616.carbonindex > -rw-r--r-- 3 e_carbon e_carbon_group 11423 2017-10-29 07:37 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/625_batchno0-0-1509233731459.carbonindex > > > > yixu2001 -- Sent from: http://apache-carbondata-dev-mailing-list-archive.1130556.n5.nabble.com/ |
dev
What time is expected to release a patch, we can test yixu2001 From: yixu2001 Date: 2017-11-07 15:14 To: dev Subject: Re: Re: After MAJOR index lost dev run Multiple updates Generate ten thousand delta file cc.sql("update e_carbon.prod_inst_his_c A set (a.ETL_date,a.prod_inst_id,a.OWNER_CUST_ID,a.ACC_PROD_INST_ID,a.DVERSION,a.GTID,a.IND,a.ODS_STATE,A.SRC,a.kafka_date,a.PRODUCT_ID,a.ADDRESS_ID,a.PAYMENT_MODE_CD,a.PRODUCT_PASSWORD,a.IMPORTANT_LEVEL,a.AREA_CODE,a.ACC_NBR,a.EXCH_ID,a.COMMON_REGION_ID,a.REMARK,a.PAY_CYCLE,a.BEGIN_RENT_TIME,a.STOP_RENT_TIME,a.FINISH_TIME,a.STOP_STATUS,a.STATUS_CD,a.CREATE_DATE,a.STATUS_DATE,a.UPDATE_DATE,a.PROC_SERIAL,a.USE_CUST_ID,a.EXT_PROD_INST_ID,a.ADDRESS_DESC,a.AREA_ID,a.UPDATE_STAFF,a.CREATE_STAFF,a.REC_UPDATE_DATE,a.ACCOUNT,a.VERSION,a.COMMUNITY_ID,a.EXT_ACC_PROD_INST_ID,a.DISTRIBUTOR_ID,a.SHARDING_ID)=(select b.etl_date,b.prod_inst_id,b.OWNER_CUST_ID,b.ACC_PROD_INST_ID,B.DVERSION,b.GTID,b.IND,B.ODS_STATE,B.SRC,b.kafka_date,b.PRODUCT_ID,b.ADDRESS_ID,b.PAYMENT_MODE_CD,b.PRODUCT_PASSWORD,b.IMPORTANT_LEVEL,b.AREA_CODE,b.ACC_NBR,b.EXCH_ID,b.COMMON_REGION_ID,b.REMARK,b.PAY_CYCLE,b.BEGIN_RENT_TIME,b.STOP_RENT_TIME,b.FINISH_TIME,b.STOP_STATUS,b.STATUS_CD,b.CREATE_DATE,b.STATUS_DATE,b.UPDATE_DATE,b.PROC_SERIAL,b.USE_CUST_ID,b.EXT_PROD_INST_ID,b.ADDRESS_DESC,b.AREA_ID,b.UPDATE_STAFF,b.CREATE_STAFF,b.REC_UPDATE_DATE,b.ACCOUNT,b.VERSION,b.COMMUNITY_ID,b.EXT_ACC_PROD_INST_ID,b.DISTRIBUTOR_ID,b.SHARDING_ID from cache_prod_inst_his_u b where a.his_id=b.his_id)").show; yixu2001 From: Liang Chen Date: 2017-11-02 02:29 To: dev Subject: Re: After MAJOR index lost Hi Yes, checked the log message, looks have some issues. Can you share the reproduce steps: Did you use how many machines to do data load, and load how many times? Regards Liang yixu2001 wrote > dev > environment spark.2.1.1 carbondata 1.1.1 hadoop 2.7.2 > > run ALTER table e_carbon.prod_inst_all_c COMPACT 'MAJOR' > CLEAN FILES FOR TABLE e_carbon.prod_inst_all_c > > 17/10/30 14:59:21 ERROR filesystem.AbstractDFSCarbonFile: main Exception > occurred:File does not exist: > hdfs://ns1/user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/624_batchno0-0-1509233731459.carbonindex > > > 623_batchno0-0-1509233118616.carbonindex and > 625_batchno0-0-1509233731459.carbonindex betwwen the lost of > 624_batchno0-0-1509233731459.carbonindex > > -rw-r--r-- 3 e_carbon e_carbon_group 6750 2017-10-29 07:17 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/621_batchno0-0-1509231670521.carbonindex > -rw-r--r-- 3 e_carbon e_carbon_group 11320 2017-10-29 07:19 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/622_batchno0-0-1509232641994.carbonindex > -rw-r--r-- 3 e_carbon e_carbon_group 6858 2017-10-29 07:35 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/623_batchno0-0-1509233118616.carbonindex > -rw-r--r-- 3 e_carbon e_carbon_group 11423 2017-10-29 07:37 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/625_batchno0-0-1509233731459.carbonindex > > scala> cc.sql("select his_id,count(*) from e_carbon.prod_inst_his_c group > by his_id having count(*)>1").show > 17/10/30 14:59:21 ERROR filesystem.AbstractDFSCarbonFile: main Exception > occurred:File does not exist: > hdfs://ns1/user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/624_batchno0-0-1509233731459.carbonindex > org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, > tree: > Exchange hashpartitioning(his_id#1818, 100) > +- *HashAggregate(keys=[his_id#1818], functions=[partial_count(1), > partial_count(1)], output=[his_id#1818, count#1967L, count#1968L]) > +- *BatchedScan CarbonDatasourceHadoopRelation [ Database name > :e_carbon, Table name :prod_inst_his_c, Schema > :Some(StructType(StructField(his_id,StringType,true), > StructField(ETL_date,StringType,true), > StructField(prod_inst_id,StringType,true), > StructField(owner_cust_id,StringType,true), > StructField(acc_prod_inst_id,StringType,true), > StructField(DVERSION,StringType,true), StructField(GTID,StringType,true), > StructField(IND,StringType,true), StructField(ODS_STATE,StringType,true), > StructField(SRC,StringType,true), StructField(kafka_date,StringType,true), > StructField(product_id,StringType,true), > StructField(address_id,StringType,true), > StructField(payment_mode_cd,StringType,true), > StructField(product_password,StringType,true), > StructField(important_level,StringType,true), > StructField(area_code,StringType,true), > StructField(acc_nbr,StringType,true), > StructField(exch_id,StringType,true), > StructField(common_region_id,StringType,true), > StructField(remark,StringType,true), > StructField(pay_cycle,StringType,true), > StructField(begin_rent_time,StringType,true), > StructField(stop_rent_time,StringType,true), > StructField(finish_time,StringType,true), > StructField(stop_status,StringType,true), > StructField(status_cd,StringType,true), > StructField(create_date,StringType,true), > StructField(status_date,StringType,true), > StructField(update_date,StringType,true), > StructField(proc_serial,StringType,true), > StructField(use_cust_id,StringType,true), > StructField(ext_prod_inst_id,StringType,true), > StructField(address_desc,StringType,true), > StructField(area_id,StringType,true), > StructField(update_staff,StringType,true), > StructField(create_staff,StringType,true), > StructField(rec_update_date,StringType,true), > StructField(account,StringType,true), > StructField(version,StringType,true), > StructField(community_id,StringType,true), > StructField(ext_acc_prod_inst_id,StringType,true), > StructField(distributor_id,StringType,true), > StructField(sharding_id,StringType,true))) ] > e_carbon.prod_inst_his_c[his_id#1818] > > at > org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56) > at > org.apache.spark.sql.execution.exchange.ShuffleExchange.doExecute(ShuffleExchange.scala:112) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132) > at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113) > at > org.apache.spark.sql.execution.InputAdapter.inputRDDs(WholeStageCodegenExec.scala:235) > at > org.apache.spark.sql.execution.aggregate.HashAggregateExec.inputRDDs(HashAggregateExec.scala:141) > at > org.apache.spark.sql.execution.FilterExec.inputRDDs(basicPhysicalOperators.scala:124) > at > org.apache.spark.sql.execution.ProjectExec.inputRDDs(basicPhysicalOperators.scala:42) > at > org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:368) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132) > at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113) > at > org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:225) > at > org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:308) > at > org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:38) > at > org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$Dataset$$execute$1$1.apply(Dataset.scala:2386) > at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:57) > at org.apache.spark.sql.Dataset.withNewExecutionId(Dataset.scala:2788) > at > org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$execute$1(Dataset.scala:2385) > at > org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collect(Dataset.scala:2392) > at > org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2128) > at > org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2127) > at org.apache.spark.sql.Dataset.withTypedCallback(Dataset.scala:2818) > at org.apache.spark.sql.Dataset.head(Dataset.scala:2127) > at org.apache.spark.sql.Dataset.take(Dataset.scala:2342) > at org.apache.spark.sql.Dataset.showString(Dataset.scala:248) > at org.apache.spark.sql.Dataset.show(Dataset.scala:638) > at org.apache.spark.sql.Dataset.show(Dataset.scala:597) > at org.apache.spark.sql.Dataset.show(Dataset.scala:606) > ... 50 elided > Caused by: java.io.IOException: Problem in loading segment block. > at > org.apache.carbondata.core.datastore.SegmentTaskIndexStore.get(SegmentTaskIndexStore.java:102) > at > org.apache.carbondata.core.datastore.SegmentTaskIndexStore.get(SegmentTaskIndexStore.java:54) > at > org.apache.carbondata.hadoop.CacheAccessClient.get(CacheAccessClient.java:67) > at > org.apache.carbondata.hadoop.CarbonInputFormat.getSegmentAbstractIndexs(CarbonInputFormat.java:543) > at > org.apache.carbondata.hadoop.CarbonInputFormat.getDataBlocksOfSegment(CarbonInputFormat.java:382) > at > org.apache.carbondata.hadoop.CarbonInputFormat.getSplits(CarbonInputFormat.java:321) > at > org.apache.carbondata.hadoop.CarbonInputFormat.getSplits(CarbonInputFormat.java:262) > at > org.apache.carbondata.spark.rdd.CarbonScanRDD.getPartitions(CarbonScanRDD.scala:81) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at org.apache.spark.ShuffleDependency. > <init> > (Dependency.scala:91) > at > org.apache.spark.sql.execution.exchange.ShuffleExchange$.prepareShuffleDependency(ShuffleExchange.scala:261) > at > org.apache.spark.sql.execution.exchange.ShuffleExchange.prepareShuffleDependency(ShuffleExchange.scala:84) > at > org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$doExecute$1.apply(ShuffleExchange.scala:121) > at > org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$doExecute$1.apply(ShuffleExchange.scala:112) > at > org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52) > ... 85 more > Caused by: java.lang.NullPointerException > at > org.apache.carbondata.core.datastore.filesystem.AbstractDFSCarbonFile.getSize(AbstractDFSCarbonFile.java:113) > at > org.apache.carbondata.core.util.CarbonUtil.calculateDriverBTreeSize(CarbonUtil.java:1009) > at > org.apache.carbondata.core.datastore.SegmentTaskIndexStore.calculateRequiredSize(SegmentTaskIndexStore.java:304) > at > org.apache.carbondata.core.datastore.SegmentTaskIndexStore.loadAndGetTaskIdToSegmentsMap(SegmentTaskIndexStore.java:236) > at > org.apache.carbondata.core.datastore.SegmentTaskIndexStore.get(SegmentTaskIndexStore.java:96) > ... 112 more > > > > [e_carbon@HETL032 testdata]$ hdfs dfs -ls > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/*.carbonindex > -rw-r--r-- 3 e_carbon e_carbon_group 44859 2017-10-25 14:53 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/0_batchno0-0-1508912751699.carbonindex > -rw-r--r-- 3 e_carbon e_carbon_group 43893 2017-10-25 14:55 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/1_batchno0-0-1508912751699.carbonindex > -rw-r--r-- 3 e_carbon e_carbon_group 42684 2017-10-25 14:55 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/2_batchno0-0-1508912751699.carbonindex > -rw-r--r-- 3 e_carbon e_carbon_group 40751 2017-10-25 14:43 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/3_batchno0-0-1508912751699.carbonindex > -rw-r--r-- 3 e_carbon e_carbon_group 40212 2017-10-25 14:54 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/4_batchno0-0-1508912751699.carbonindex > -rw-r--r-- 3 e_carbon e_carbon_group 11249 2017-10-29 06:37 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/618_batchno0-0-1509230160810.carbonindex > -rw-r--r-- 3 e_carbon e_carbon_group 7197 2017-10-29 06:53 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/619_batchno0-0-1509230585040.carbonindex > -rw-r--r-- 3 e_carbon e_carbon_group 11516 2017-10-29 06:55 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/620_batchno0-0-1509231215665.carbonindex > -rw-r--r-- 3 e_carbon e_carbon_group 6750 2017-10-29 07:17 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/621_batchno0-0-1509231670521.carbonindex > -rw-r--r-- 3 e_carbon e_carbon_group 11320 2017-10-29 07:19 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/622_batchno0-0-1509232641994.carbonindex > -rw-r--r-- 3 e_carbon e_carbon_group 6858 2017-10-29 07:35 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/623_batchno0-0-1509233118616.carbonindex > -rw-r--r-- 3 e_carbon e_carbon_group 11423 2017-10-29 07:37 > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_c/Fact/Part0/Segment_0/625_batchno0-0-1509233731459.carbonindex > > > > yixu2001 -- Sent from: http://apache-carbondata-dev-mailing-list-archive.1130556.n5.nabble.com/ |
Hi,
Do you have a more definite test case for this. I tried this in-house but Update -> (Vertical Compacion) -> CLEAN. But this is not getting reproduce. Can you send the logs for Update? Wanted to check if Horizontal Compaction is occuring or not. In our in-house test data Horizontal compaction is running after every Update. On Tue, Nov 7, 2017 at 1:24 PM, yixu2001 <[hidden email]> wrote: > dev > What time is expected to release a patch, we can test > > > yixu2001 > > From: yixu2001 > Date: 2017-11-07 15:14 > To: dev > Subject: Re: Re: After MAJOR index lost > dev > run Multiple updates Generate ten thousand delta file > cc.sql("update e_carbon.prod_inst_his_c A set > (a.ETL_date,a.prod_inst_id,a.OWNER_CUST_ID,a.ACC_PROD_INST_ > ID,a.DVERSION,a.GTID,a.IND,a.ODS_STATE,A.SRC,a.kafka_date, > a.PRODUCT_ID,a.ADDRESS_ID,a.PAYMENT_MODE_CD,a.PRODUCT_ > PASSWORD,a.IMPORTANT_LEVEL,a.AREA_CODE,a.ACC_NBR,a.EXCH_ID, > a.COMMON_REGION_ID,a.REMARK,a.PAY_CYCLE,a.BEGIN_RENT_TIME,a. > STOP_RENT_TIME,a.FINISH_TIME,a.STOP_STATUS,a.STATUS_CD,a. > CREATE_DATE,a.STATUS_DATE,a.UPDATE_DATE,a.PROC_SERIAL,a. > USE_CUST_ID,a.EXT_PROD_INST_ID,a.ADDRESS_DESC,a.AREA_ID,a. > UPDATE_STAFF,a.CREATE_STAFF,a.REC_UPDATE_DATE,a.ACCOUNT,a. > VERSION,a.COMMUNITY_ID,a.EXT_ACC_PROD_INST_ID,a. > DISTRIBUTOR_ID,a.SHARDING_ID)=(select b.etl_date,b.prod_inst_id,b. > OWNER_CUST_ID,b.ACC_PROD_INST_ID,B.DVERSION,b.GTID,b.IND,B. > ODS_STATE,B.SRC,b.kafka_date,b.PRODUCT_ID,b.ADDRESS_ID,b. > PAYMENT_MODE_CD,b.PRODUCT_PASSWORD,b.IMPORTANT_LEVEL,b. > AREA_CODE,b.ACC_NBR,b.EXCH_ID,b.COMMON_REGION_ID,b.REMARK,b. > PAY_CYCLE,b.BEGIN_RENT_TIME,b.STOP_RENT_TIME,b.FINISH_TIME, > b.STOP_STATUS,b.STATUS_CD,b.CREATE_DATE,b.STATUS_DATE,b. > UPDATE_DATE,b.PROC_SERIAL,b.USE_CUST_ID,b.EXT_PROD_INST_ > ID,b.ADDRESS_DESC,b.AREA_ID,b.UPDATE_STAFF,b.CREATE_STAFF,b. > REC_UPDATE_DATE,b.ACCOUNT,b.VERSION,b.COMMUNITY_ID,b.EXT_ > ACC_PROD_INST_ID,b.DISTRIBUTOR_ID,b.SHARDING_ID from > cache_prod_inst_his_u b where a.his_id=b.his_id)").show; > > > yixu2001 > > From: Liang Chen > Date: 2017-11-02 02:29 > To: dev > Subject: Re: After MAJOR index lost > Hi > > Yes, checked the log message, looks have some issues. > Can you share the reproduce steps: > Did you use how many machines to do data load, and load how many times? > > Regards > Liang > > > yixu2001 wrote > > dev > > environment spark.2.1.1 carbondata 1.1.1 hadoop 2.7.2 > > > > run ALTER table e_carbon.prod_inst_all_c COMPACT 'MAJOR' > > CLEAN FILES FOR TABLE e_carbon.prod_inst_all_c > > > > 17/10/30 14:59:21 ERROR filesystem.AbstractDFSCarbonFile: main Exception > > occurred:File does not exist: > > hdfs://ns1/user/e_carbon/public/carbon.store/e_carbon/ > prod_inst_his_c/Fact/Part0/Segment_0/624_batchno0-0- > 1509233731459.carbonindex > > > > > > 623_batchno0-0-1509233118616.carbonindex and > > 625_batchno0-0-1509233731459.carbonindex betwwen the lost of > > 624_batchno0-0-1509233731459.carbonindex > > > > -rw-r--r-- 3 e_carbon e_carbon_group 6750 2017-10-29 07:17 > > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_ > c/Fact/Part0/Segment_0/621_batchno0-0-1509231670521.carbonindex > > -rw-r--r-- 3 e_carbon e_carbon_group 11320 2017-10-29 07:19 > > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_ > c/Fact/Part0/Segment_0/622_batchno0-0-1509232641994.carbonindex > > -rw-r--r-- 3 e_carbon e_carbon_group 6858 2017-10-29 07:35 > > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_ > c/Fact/Part0/Segment_0/623_batchno0-0-1509233118616.carbonindex > > -rw-r--r-- 3 e_carbon e_carbon_group 11423 2017-10-29 07:37 > > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_ > c/Fact/Part0/Segment_0/625_batchno0-0-1509233731459.carbonindex > > > > scala> cc.sql("select his_id,count(*) from e_carbon.prod_inst_his_c group > > by his_id having count(*)>1").show > > 17/10/30 14:59:21 ERROR filesystem.AbstractDFSCarbonFile: main Exception > > occurred:File does not exist: > > hdfs://ns1/user/e_carbon/public/carbon.store/e_carbon/ > prod_inst_his_c/Fact/Part0/Segment_0/624_batchno0-0- > 1509233731459.carbonindex > > org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, > > tree: > > Exchange hashpartitioning(his_id#1818, 100) > > +- *HashAggregate(keys=[his_id#1818], functions=[partial_count(1), > > partial_count(1)], output=[his_id#1818, count#1967L, count#1968L]) > > +- *BatchedScan CarbonDatasourceHadoopRelation [ Database name > > :e_carbon, Table name :prod_inst_his_c, Schema > > :Some(StructType(StructField(his_id,StringType,true), > > StructField(ETL_date,StringType,true), > > StructField(prod_inst_id,StringType,true), > > StructField(owner_cust_id,StringType,true), > > StructField(acc_prod_inst_id,StringType,true), > > StructField(DVERSION,StringType,true), StructField(GTID,StringType, > true), > > StructField(IND,StringType,true), StructField(ODS_STATE, > StringType,true), > > StructField(SRC,StringType,true), StructField(kafka_date, > StringType,true), > > StructField(product_id,StringType,true), > > StructField(address_id,StringType,true), > > StructField(payment_mode_cd,StringType,true), > > StructField(product_password,StringType,true), > > StructField(important_level,StringType,true), > > StructField(area_code,StringType,true), > > StructField(acc_nbr,StringType,true), > > StructField(exch_id,StringType,true), > > StructField(common_region_id,StringType,true), > > StructField(remark,StringType,true), > > StructField(pay_cycle,StringType,true), > > StructField(begin_rent_time,StringType,true), > > StructField(stop_rent_time,StringType,true), > > StructField(finish_time,StringType,true), > > StructField(stop_status,StringType,true), > > StructField(status_cd,StringType,true), > > StructField(create_date,StringType,true), > > StructField(status_date,StringType,true), > > StructField(update_date,StringType,true), > > StructField(proc_serial,StringType,true), > > StructField(use_cust_id,StringType,true), > > StructField(ext_prod_inst_id,StringType,true), > > StructField(address_desc,StringType,true), > > StructField(area_id,StringType,true), > > StructField(update_staff,StringType,true), > > StructField(create_staff,StringType,true), > > StructField(rec_update_date,StringType,true), > > StructField(account,StringType,true), > > StructField(version,StringType,true), > > StructField(community_id,StringType,true), > > StructField(ext_acc_prod_inst_id,StringType,true), > > StructField(distributor_id,StringType,true), > > StructField(sharding_id,StringType,true))) ] > > e_carbon.prod_inst_his_c[his_id#1818] > > > > at > > org.apache.spark.sql.catalyst.errors.package$.attachTree( > package.scala:56) > > at > > org.apache.spark.sql.execution.exchange.ShuffleExchange.doExecute( > ShuffleExchange.scala:112) > > at > > org.apache.spark.sql.execution.SparkPlan$$anonfun$ > execute$1.apply(SparkPlan.scala:114) > > at > > org.apache.spark.sql.execution.SparkPlan$$anonfun$ > execute$1.apply(SparkPlan.scala:114) > > at > > org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply( > SparkPlan.scala:135) > > at > > org.apache.spark.rdd.RDDOperationScope$.withScope( > RDDOperationScope.scala:151) > > at > > org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala: > 132) > > at org.apache.spark.sql.execution.SparkPlan.execute( > SparkPlan.scala:113) > > at > > org.apache.spark.sql.execution.InputAdapter.inputRDDs( > WholeStageCodegenExec.scala:235) > > at > > org.apache.spark.sql.execution.aggregate.HashAggregateExec.inputRDDs( > HashAggregateExec.scala:141) > > at > > org.apache.spark.sql.execution.FilterExec.inputRDDs( > basicPhysicalOperators.scala:124) > > at > > org.apache.spark.sql.execution.ProjectExec.inputRDDs( > basicPhysicalOperators.scala:42) > > at > > org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute( > WholeStageCodegenExec.scala:368) > > at > > org.apache.spark.sql.execution.SparkPlan$$anonfun$ > execute$1.apply(SparkPlan.scala:114) > > at > > org.apache.spark.sql.execution.SparkPlan$$anonfun$ > execute$1.apply(SparkPlan.scala:114) > > at > > org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply( > SparkPlan.scala:135) > > at > > org.apache.spark.rdd.RDDOperationScope$.withScope( > RDDOperationScope.scala:151) > > at > > org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala: > 132) > > at org.apache.spark.sql.execution.SparkPlan.execute( > SparkPlan.scala:113) > > at > > org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan. > scala:225) > > at > > org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala: > 308) > > at > > org.apache.spark.sql.execution.CollectLimitExec. > executeCollect(limit.scala:38) > > at > > org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$ > Dataset$$execute$1$1.apply(Dataset.scala:2386) > > at > > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId( > SQLExecution.scala:57) > > at org.apache.spark.sql.Dataset.withNewExecutionId(Dataset.scala:2788) > > at > > org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$ > execute$1(Dataset.scala:2385) > > at > > org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$ > collect(Dataset.scala:2392) > > at > > org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2128) > > at > > org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2127) > > at org.apache.spark.sql.Dataset.withTypedCallback(Dataset.scala:2818) > > at org.apache.spark.sql.Dataset.head(Dataset.scala:2127) > > at org.apache.spark.sql.Dataset.take(Dataset.scala:2342) > > at org.apache.spark.sql.Dataset.showString(Dataset.scala:248) > > at org.apache.spark.sql.Dataset.show(Dataset.scala:638) > > at org.apache.spark.sql.Dataset.show(Dataset.scala:597) > > at org.apache.spark.sql.Dataset.show(Dataset.scala:606) > > ... 50 elided > > Caused by: java.io.IOException: Problem in loading segment block. > > at > > org.apache.carbondata.core.datastore.SegmentTaskIndexStore.get( > SegmentTaskIndexStore.java:102) > > at > > org.apache.carbondata.core.datastore.SegmentTaskIndexStore.get( > SegmentTaskIndexStore.java:54) > > at > > org.apache.carbondata.hadoop.CacheAccessClient.get( > CacheAccessClient.java:67) > > at > > org.apache.carbondata.hadoop.CarbonInputFormat.getSegmentAbstractIndexs( > CarbonInputFormat.java:543) > > at > > org.apache.carbondata.hadoop.CarbonInputFormat.getDataBlocksOfSegment( > CarbonInputFormat.java:382) > > at > > org.apache.carbondata.hadoop.CarbonInputFormat.getSplits( > CarbonInputFormat.java:321) > > at > > org.apache.carbondata.hadoop.CarbonInputFormat.getSplits( > CarbonInputFormat.java:262) > > at > > org.apache.carbondata.spark.rdd.CarbonScanRDD. > getPartitions(CarbonScanRDD.scala:81) > > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > > at scala.Option.getOrElse(Option.scala:121) > > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > > at > > org.apache.spark.rdd.MapPartitionsRDD.getPartitions( > MapPartitionsRDD.scala:35) > > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > > at scala.Option.getOrElse(Option.scala:121) > > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > > at > > org.apache.spark.rdd.MapPartitionsRDD.getPartitions( > MapPartitionsRDD.scala:35) > > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) > > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) > > at scala.Option.getOrElse(Option.scala:121) > > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > > at org.apache.spark.ShuffleDependency. > > <init> > > (Dependency.scala:91) > > at > > org.apache.spark.sql.execution.exchange.ShuffleExchange$. > prepareShuffleDependency(ShuffleExchange.scala:261) > > at > > org.apache.spark.sql.execution.exchange.ShuffleExchange. > prepareShuffleDependency(ShuffleExchange.scala:84) > > at > > org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$ > doExecute$1.apply(ShuffleExchange.scala:121) > > at > > org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$ > doExecute$1.apply(ShuffleExchange.scala:112) > > at > > org.apache.spark.sql.catalyst.errors.package$.attachTree( > package.scala:52) > > ... 85 more > > Caused by: java.lang.NullPointerException > > at > > org.apache.carbondata.core.datastore.filesystem. > AbstractDFSCarbonFile.getSize(AbstractDFSCarbonFile.java:113) > > at > > org.apache.carbondata.core.util.CarbonUtil.calculateDriverBTreeSize( > CarbonUtil.java:1009) > > at > > org.apache.carbondata.core.datastore.SegmentTaskIndexStore. > calculateRequiredSize(SegmentTaskIndexStore.java:304) > > at > > org.apache.carbondata.core.datastore.SegmentTaskIndexStore. > loadAndGetTaskIdToSegmentsMap(SegmentTaskIndexStore.java:236) > > at > > org.apache.carbondata.core.datastore.SegmentTaskIndexStore.get( > SegmentTaskIndexStore.java:96) > > ... 112 more > > > > > > > > [e_carbon@HETL032 testdata]$ hdfs dfs -ls > > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_ > c/Fact/Part0/Segment_0/*.carbonindex > > -rw-r--r-- 3 e_carbon e_carbon_group 44859 2017-10-25 14:53 > > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_ > c/Fact/Part0/Segment_0/0_batchno0-0-1508912751699.carbonindex > > -rw-r--r-- 3 e_carbon e_carbon_group 43893 2017-10-25 14:55 > > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_ > c/Fact/Part0/Segment_0/1_batchno0-0-1508912751699.carbonindex > > -rw-r--r-- 3 e_carbon e_carbon_group 42684 2017-10-25 14:55 > > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_ > c/Fact/Part0/Segment_0/2_batchno0-0-1508912751699.carbonindex > > -rw-r--r-- 3 e_carbon e_carbon_group 40751 2017-10-25 14:43 > > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_ > c/Fact/Part0/Segment_0/3_batchno0-0-1508912751699.carbonindex > > -rw-r--r-- 3 e_carbon e_carbon_group 40212 2017-10-25 14:54 > > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_ > c/Fact/Part0/Segment_0/4_batchno0-0-1508912751699.carbonindex > > -rw-r--r-- 3 e_carbon e_carbon_group 11249 2017-10-29 06:37 > > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_ > c/Fact/Part0/Segment_0/618_batchno0-0-1509230160810.carbonindex > > -rw-r--r-- 3 e_carbon e_carbon_group 7197 2017-10-29 06:53 > > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_ > c/Fact/Part0/Segment_0/619_batchno0-0-1509230585040.carbonindex > > -rw-r--r-- 3 e_carbon e_carbon_group 11516 2017-10-29 06:55 > > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_ > c/Fact/Part0/Segment_0/620_batchno0-0-1509231215665.carbonindex > > -rw-r--r-- 3 e_carbon e_carbon_group 6750 2017-10-29 07:17 > > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_ > c/Fact/Part0/Segment_0/621_batchno0-0-1509231670521.carbonindex > > -rw-r--r-- 3 e_carbon e_carbon_group 11320 2017-10-29 07:19 > > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_ > c/Fact/Part0/Segment_0/622_batchno0-0-1509232641994.carbonindex > > -rw-r--r-- 3 e_carbon e_carbon_group 6858 2017-10-29 07:35 > > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_ > c/Fact/Part0/Segment_0/623_batchno0-0-1509233118616.carbonindex > > -rw-r--r-- 3 e_carbon e_carbon_group 11423 2017-10-29 07:37 > > /user/e_carbon/public/carbon.store/e_carbon/prod_inst_his_ > c/Fact/Part0/Segment_0/625_batchno0-0-1509233731459.carbonindex > > > > > > > > yixu2001 > > > > > > -- > Sent from: http://apache-carbondata-dev-mailing-list-archive.1130556. > n5.nabble.com/ > -- Thanks Sounak |
Free forum by Nabble | Edit this page |