[ https://issues.apache.org/jira/browse/CARBONDATA-1551?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16223994#comment-16223994 ] xubo245 commented on CARBONDATA-1551: ------------------------------------- When compact table that has array<string> complex data type, there also is an error: {code:java} java.lang.IndexOutOfBoundsException: Index: 0, Size: 0 at java.util.ArrayList.rangeCheck(ArrayList.java:653) at java.util.ArrayList.get(ArrayList.java:429) at org.apache.carbondata.core.datastore.block.SegmentProperties.assignComplexOrdinal(SegmentProperties.java:473) at org.apache.carbondata.core.datastore.block.SegmentProperties.fillDimensionAndMeasureDetails(SegmentProperties.java:398) at org.apache.carbondata.core.datastore.block.SegmentProperties.<init>(SegmentProperties.java:174) at org.apache.carbondata.spark.rdd.CarbonMergerRDD$$anon$1.<init>(CarbonMergerRDD.scala:160) at org.apache.carbondata.spark.rdd.CarbonMergerRDD.internalCompute(CarbonMergerRDD.scala:78) at org.apache.carbondata.spark.rdd.CarbonRDD.compute(CarbonRDD.scala:60) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) at org.apache.spark.scheduler.Task.run(Task.scala:99) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:282) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:748) {code} test code is : {code:java} sql( """ | CREATE TABLE big_array( | list ARRAY<STRING> | ) | STORED BY 'carbondata' """.stripMargin) sql( s""" | LOAD DATA LOCAL INPATH '${file.getAbsolutePath}' | INTO TABLE big_array | OPTIONS ('header'='false') """.stripMargin) sql( s""" | LOAD DATA LOCAL INPATH '${file.getAbsolutePath}' | INTO TABLE big_array | OPTIONS ('header'='false') """.stripMargin) checkAnswer( sql("select count(*) from big_array"), Row(66000) ) val exception_compaction: Exception = intercept[Exception] { sql("alter table big_array compact 'major'") } {code} Analysis: in compaction, the array<string> is expressed with one column, but normal should be two column, so it will out of index. > There is an error when table has Array<STRING> column and ENABLE_AUTO_LOAD_MERGE is true > ---------------------------------------------------------------------------------------- > > Key: CARBONDATA-1551 > URL: https://issues.apache.org/jira/browse/CARBONDATA-1551 > Project: CarbonData > Issue Type: Bug > Components: data-load > Affects Versions: 1.1.1 > Reporter: xubo245 > > There is an error when table has Array<STRING> column and ENABLE_AUTO_LOAD_MERGE is true > {code:java} > //unfinish > sql("drop table if exists array_table") > CarbonProperties.getInstance().addProperty(CarbonCommonConstants.ENABLE_AUTO_LOAD_MERGE, "true") > sql( > s""" > | CREATE TABLE array_table( > | complexData ARRAY<STRING> > | ) > | STORED BY 'carbondata' > | TBLPROPERTIES('sort_columns'='') > """.stripMargin) > val storeLocation = s"$rootPath/integration/spark-common-test/src/test/resources/bool/ArrayString.csv" > for (i <- 0 until 4) { > sql( > s""" > | LOAD DATA LOCAL INPATH '${storeLocation}' > | INTO TABLE array_table > | options('FILEHEADER'='complexData') > """.stripMargin) > } > checkAnswer( > sql("select count(*) from array_table"), > Seq(Row(40)) > ) > val segments = sql("SHOW SEGMENTS FOR TABLE array_table") > val SegmentSequenceIds = segments.collect().map { each => (each.toSeq) (0) } > assert(!SegmentSequenceIds.contains("0.1")) > assert(SegmentSequenceIds.length == 4) > CarbonProperties.getInstance().addProperty(CarbonCommonConstants.ENABLE_AUTO_LOAD_MERGE, > CarbonCommonConstants.DEFAULT_ENABLE_AUTO_LOAD_MERGE) > sql("drop table if exists array_table") > {code} -- This message was sent by Atlassian JIRA (v6.4.14#64029) |
Free forum by Nabble | Edit this page |