Login  Register

Getting [Problem in loading segment blocks] error after doing multi update operations

Posted by yixu2001 on Feb 25, 2018; 8:29am
URL: http://apache-carbondata-dev-mailing-list-archive.168.s1.nabble.com/Getting-Problem-in-loading-segment-blocks-error-after-doing-multi-update-operations-tp40249.html

 I'm using carbondata1.3+spark2.1.1+hadoop2.7.1 to do multi update operations
here is the replay step:

import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.CarbonSession._
val cc = SparkSession.builder().config(sc.getConf).getOrCreateCarbonSession("hdfs://ns1/user/ip_crm")  
// create table
cc.sql("CREATE TABLE IF NOT EXISTS public.c_compact3 (id string,qqnum string,nick string,age string,gender string,auth string,qunnum string,mvcc string) STORED BY 'carbondata' TBLPROPERTIES ('SORT_COLUMNS'='id')").show;
// data prepare
import org.apache.spark.sql.types._
import org.apache.spark.sql.Row
val schema = StructType(StructField("id",StringType,true)::StructField("qqnum",StringType,true)::StructField("nick",StringType,true)::StructField("age",StringType,true)::StructField("gender",StringType,true)::StructField("auth",StringType,true)::StructField("qunnum",StringType,true)::StructField("mvcc",IntegerType,true)::Nil)
val data = cc.sparkContext.parallelize(1 to 50000000,4).map { i =>
Row.fromSeq(Seq(i.toString,i.toString.concat("aaaaaaaa").concat(i.toString),"2009-05-27",i.toString.concat("c").concat(i.toString),"1","1",i.toString.concat("dddddd").concat(i.toString),1))
}
cc.createDataFrame(data, schema).createOrReplaceTempView("ddd")
cc.sql("insert into public.c_compact3 select * from ddd").show;

// update table multi times in while loop
import scala.util.Random
   var bcnum=1;
 while (true) {
       bcnum=1+bcnum;
      println(bcnum);
      println("111111111");
      var randomNmber = Random.nextInt(1000)
      cc.sql(s"DROP TABLE IF EXISTS cache_compact3").show;
      cc.sql(s"cache table  cache_compact3  as select * from   public.c_compact3  where pmod(cast(id as int),1000)=$randomNmber").show(100, false);
      cc.sql("select count(*) from cache_compact3").show;
       cc.sql("update public.c_compact3 a set (a.id,a.qqnum,a.nick,a.age,a.gender,a.auth,a.qunnum,a.mvcc)=(select b.id,b.qqnum,b.nick,b.age,b.gender,b.auth,b.qunnum,b.mvcc from   cache_compact3 b where b.id=a.id)").show;
       println("222222222");
       Thread.sleep(30000);
   }

after about 30 loop,[Problem in loading segment blocks] error happended.
then performing select count operations on the table and get exception like follows:

scala>cc.sql("select count(*) from  public.c_compact3").show;
18/02/25 08:49:46 AUDIT CarbonMetaStoreFactory: [hdd340][ip_crm][Thread-1]File based carbon metastore is enabled
Exchange SinglePartition
+- *HashAggregate(keys=[], functions=[partial_count(1)], output=[count#33L])
   +- *BatchedScan CarbonDatasourceHadoopRelation [ Database name :public, Table name :c_compact3, Schema :Some(StructType(StructField(id,StringType,true), StructField(qqnum,StringType,true), StructField(nick,StringType,true), StructField(age,StringType,true), StructField(gender,StringType,true), StructField(auth,StringType,true), StructField(qunnum,StringType,true), StructField(mvcc,StringType,true))) ] public.c_compact3[]

  at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56)
  at org.apache.spark.sql.execution.exchange.ShuffleExchange.doExecute(ShuffleExchange.scala:112)
  at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114)
  at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114)
  at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135)
  at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132)
  at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113)
  at org.apache.spark.sql.execution.InputAdapter.inputRDDs(WholeStageCodegenExec.scala:235)
  at org.apache.spark.sql.execution.aggregate.HashAggregateExec.inputRDDs(HashAggregateExec.scala:141)
  at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:368)
  at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114)
  at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114)
  at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135)
  at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132)
  at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113)
  at org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:225)
  at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:308)
  at org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:38)
  at org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$Dataset$$execute$1$1.apply(Dataset.scala:2386)
  at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:57)
  at org.apache.spark.sql.Dataset.withNewExecutionId(Dataset.scala:2788)
  at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$execute$1(Dataset.scala:2385)
  at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collect(Dataset.scala:2392)
  at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2128)
  at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2127)
  at org.apache.spark.sql.Dataset.withTypedCallback(Dataset.scala:2818)
  at org.apache.spark.sql.Dataset.head(Dataset.scala:2127)
  at org.apache.spark.sql.Dataset.take(Dataset.scala:2342)
  at org.apache.spark.sql.Dataset.showString(Dataset.scala:248)
  at org.apache.spark.sql.Dataset.show(Dataset.scala:638)
  at org.apache.spark.sql.Dataset.show(Dataset.scala:597)
  at org.apache.spark.sql.Dataset.show(Dataset.scala:606)
  ... 50 elided
Caused by: java.io.IOException: Problem in loading segment blocks.
  at org.apache.carbondata.core.indexstore.BlockletDataMapIndexStore.getAll(BlockletDataMapIndexStore.java:153)
  at org.apache.carbondata.core.indexstore.blockletindex.BlockletDataMapFactory.getDataMaps(BlockletDataMapFactory.java:76)
  at org.apache.carbondata.core.datamap.TableDataMap.prune(TableDataMap.java:72)
  at org.apache.carbondata.hadoop.api.CarbonTableInputFormat.getDataBlocksOfSegment(CarbonTableInputFormat.java:739)
  at org.apache.carbondata.hadoop.api.CarbonTableInputFormat.getSplits(CarbonTableInputFormat.java:666)
  at org.apache.carbondata.hadoop.api.CarbonTableInputFormat.getSplits(CarbonTableInputFormat.java:426)
  at org.apache.carbondata.spark.rdd.CarbonScanRDD.getPartitions(CarbonScanRDD.scala:96)
  at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252)
  at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250)
  at scala.Option.getOrElse(Option.scala:121)
  at org.apache.spark.rdd.RDD.partitions(RDD.scala:250)
  at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)
  at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252)
  at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250)
  at scala.Option.getOrElse(Option.scala:121)
  at org.apache.spark.rdd.RDD.partitions(RDD.scala:250)
  at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)
  at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252)
  at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250)
  at scala.Option.getOrElse(Option.scala:121)
  at org.apache.spark.rdd.RDD.partitions(RDD.scala:250)
  at org.apache.spark.ShuffleDependency.<init>(Dependency.scala:91)
  at org.apache.spark.sql.execution.exchange.ShuffleExchange$.prepareShuffleDependency(ShuffleExchange.scala:261)
  at org.apache.spark.sql.execution.exchange.ShuffleExchange.prepareShuffleDependency(ShuffleExchange.scala:84)
  at org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$doExecute$1.apply(ShuffleExchange.scala:121)
  at org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$doExecute$1.apply(ShuffleExchange.scala:112)
  at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52)
  ... 83 more
Caused by: java.lang.ArrayIndexOutOfBoundsException: 0
  at org.apache.carbondata.core.datastore.filesystem.AbstractDFSCarbonFile.getLocations(AbstractDFSCarbonFile.java:514)
  at org.apache.carbondata.core.indexstore.BlockletDataMapIndexStore.getAll(BlockletDataMapIndexStore.java:142)
  ... 109 more