[ https://issues.apache.org/jira/browse/CARBONDATA-2914?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Chetan Bhat updated CARBONDATA-2914: ------------------------------------ Description: Steps : User creates a table in Carbon session Beeline in 1.4.1 Carbon version using dictionary include column as sort column and loads data in the table. User upgrades to 1.5.0 version and connects to Beeline in spark session. User copies the HDFS segment data files of 1.4.1 version store to another location User creates a table using carbon with options having the path of the data files. User executes select filter queries on the dictionary sort columns. Issue : Select filter query fails for table with path of data store loaded in 1.4.1 version for dictionary sort columns 0: jdbc:hive2://10.18.98.136:23040/default> select * from flow_carbon_test4 where opt_prd_cde ='2889' and txn_dte>='20140101' and txn_dte <= '20160101' limit 1000; Error: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 10.0 failed 4 times, most recent failure: Lost task 0.3 in stage 10.0 (TID 25, BLR1000014291, executor 1): java.lang.ArrayIndexOutOfBoundsException: 0 at org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionColumnPage.fillVector(FixedLengthDimensionColumnPage.java:127) at org.apache.carbondata.core.scan.result.impl.FilterQueryScannedResult.fillColumnarDictionaryBatch(FilterQueryScannedResult.java:102) at org.apache.carbondata.core.scan.collector.impl.DictionaryBasedVectorResultCollector.fillResultToColumnarBatch(DictionaryBasedVectorResultCollector.java:172) at org.apache.carbondata.core.scan.collector.impl.RestructureBasedVectorResultCollector.collectResultInColumnarBatch(RestructureBasedVectorResultCollector.java:127) at org.apache.carbondata.core.scan.processor.DataBlockIterator.processNextBatch(DataBlockIterator.java:245) at org.apache.carbondata.core.scan.result.iterator.VectorDetailQueryResultIterator.processNextBatch(VectorDetailQueryResultIterator.java:48) at org.apache.carbondata.spark.vectorreader.VectorizedCarbonRecordReader.nextBatch(VectorizedCarbonRecordReader.java:324) at org.apache.carbondata.spark.vectorreader.VectorizedCarbonRecordReader.nextKeyValue(VectorizedCarbonRecordReader.java:185) at org.apache.spark.sql.execution.datasources.RecordReaderIterator.hasNext(RecordReaderIterator.scala:39) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:105) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:177) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:105) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.scan_nextBatch$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$8$$anon$1.hasNext(WholeStageCodegenExec.scala:395) at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:234) at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:228) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:827) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:827) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) at org.apache.spark.scheduler.Task.run(Task.scala:108) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:338) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) Driver stacktrace: (state=,code=0) 0: jdbc:hive2://10.18.98.136:23040/default> select * from flow_carbon_test4 where opp_bk ='1491999999094' and txn_dte>='20140101' and txn_dte <= '20160101' order by amt asc limit 1000; Error: org.apache.spark.SparkException: Job aborted due to stage failure: Task 1 in stage 49.0 failed 4 times, most recent failure: Lost task 1.3 in stage 49.0 (TID 119, BLR1000014291, executor 1): java.lang.ArrayIndexOutOfBoundsException: 0 at org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionColumnPage.fillVector(FixedLengthDimensionColumnPage.java:127) at org.apache.carbondata.core.scan.result.impl.FilterQueryScannedResult.fillColumnarDictionaryBatch(FilterQueryScannedResult.java:102) at org.apache.carbondata.core.scan.collector.impl.DictionaryBasedVectorResultCollector.fillResultToColumnarBatch(DictionaryBasedVectorResultCollector.java:172) at org.apache.carbondata.core.scan.collector.impl.RestructureBasedVectorResultCollector.collectResultInColumnarBatch(RestructureBasedVectorResultCollector.java:127) at org.apache.carbondata.core.scan.processor.DataBlockIterator.processNextBatch(DataBlockIterator.java:245) at org.apache.carbondata.core.scan.result.iterator.VectorDetailQueryResultIterator.processNextBatch(VectorDetailQueryResultIterator.java:48) at org.apache.carbondata.spark.vectorreader.VectorizedCarbonRecordReader.nextBatch(VectorizedCarbonRecordReader.java:324) at org.apache.carbondata.spark.vectorreader.VectorizedCarbonRecordReader.nextKeyValue(VectorizedCarbonRecordReader.java:185) at org.apache.spark.sql.execution.datasources.RecordReaderIterator.hasNext(RecordReaderIterator.scala:39) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:105) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:177) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:105) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.scan_nextBatch$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$8$$anon$1.hasNext(WholeStageCodegenExec.scala:395) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at scala.collection.convert.Wrappers$IteratorWrapper.hasNext(Wrappers.scala:30) at org.spark_project.guava.collect.Ordering.leastOf(Ordering.java:628) at org.apache.spark.util.collection.Utils$.takeOrdered(Utils.scala:37) at org.apache.spark.rdd.RDD$$anonfun$takeOrdered$1$$anonfun$30.apply(RDD.scala:1423) at org.apache.spark.rdd.RDD$$anonfun$takeOrdered$1$$anonfun$30.apply(RDD.scala:1420) at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:797) at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:797) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) at org.apache.spark.scheduler.Task.run(Task.scala:108) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:338) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) Driver stacktrace: (state=,code=0) 0: jdbc:hive2://10.18.98.136:23040/default> select * from flow_carbon_test4 where cus_ac = '6222621350672465397' and txn_bk IN ('00000000000', '00000000001','00000000002') OR own_bk IN ('00000000124','00000000175','00000000034','00000000231','00000000167','00000000182','00000000206') or opp_bk='1491999999107' and (txn_dte>='20140101' and txn_dte<='20140630') limit 1000; Error: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 189.0 failed 4 times, most recent failure: Lost task 0.3 in stage 189.0 (TID 599, BLR1000014291, executor 1): java.lang.ArrayIndexOutOfBoundsException: 0 at org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionColumnPage.fillVector(FixedLengthDimensionColumnPage.java:127) at org.apache.carbondata.core.scan.result.impl.FilterQueryScannedResult.fillColumnarDictionaryBatch(FilterQueryScannedResult.java:102) at org.apache.carbondata.core.scan.collector.impl.DictionaryBasedVectorResultCollector.fillResultToColumnarBatch(DictionaryBasedVectorResultCollector.java:172) at org.apache.carbondata.core.scan.collector.impl.RestructureBasedVectorResultCollector.collectResultInColumnarBatch(RestructureBasedVectorResultCollector.java:127) at org.apache.carbondata.core.scan.processor.DataBlockIterator.processNextBatch(DataBlockIterator.java:245) at org.apache.carbondata.core.scan.result.iterator.VectorDetailQueryResultIterator.processNextBatch(VectorDetailQueryResultIterator.java:48) at org.apache.carbondata.spark.vectorreader.VectorizedCarbonRecordReader.nextBatch(VectorizedCarbonRecordReader.java:324) at org.apache.carbondata.spark.vectorreader.VectorizedCarbonRecordReader.nextKeyValue(VectorizedCarbonRecordReader.java:185) at org.apache.spark.sql.execution.datasources.RecordReaderIterator.hasNext(RecordReaderIterator.scala:39) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:105) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:177) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:105) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.scan_nextBatch$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$8$$anon$1.hasNext(WholeStageCodegenExec.scala:395) at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:234) at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:228) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:827) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:827) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) at org.apache.spark.scheduler.Task.run(Task.scala:108) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:338) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) Driver stacktrace: (state=,code=0) Expected : The select filter queries should be executed successfully for table with path of data store loaded in 1.4.1 version for dictionary sort columns. was: Steps : User creates a table in Carbon session Beeline in 1.4.1 Carbon version using dictionary include column as sort column. User upgrades to 1.5.0 version and connects to Beeline in spark session. User copies the HDFS segment data files to another location User creates a table using carbon with options having the path of the data files. User executes select filter queries on the dictionary sort columns. Issue : Select filter query fails for table with path of data store loaded in 1.4.1 version for dictionary sort columns 0: jdbc:hive2://10.18.98.136:23040/default> select * from flow_carbon_test4 where opt_prd_cde ='2889' and txn_dte>='20140101' and txn_dte <= '20160101' limit 1000; Error: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 10.0 failed 4 times, most recent failure: Lost task 0.3 in stage 10.0 (TID 25, BLR1000014291, executor 1): java.lang.ArrayIndexOutOfBoundsException: 0 at org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionColumnPage.fillVector(FixedLengthDimensionColumnPage.java:127) at org.apache.carbondata.core.scan.result.impl.FilterQueryScannedResult.fillColumnarDictionaryBatch(FilterQueryScannedResult.java:102) at org.apache.carbondata.core.scan.collector.impl.DictionaryBasedVectorResultCollector.fillResultToColumnarBatch(DictionaryBasedVectorResultCollector.java:172) at org.apache.carbondata.core.scan.collector.impl.RestructureBasedVectorResultCollector.collectResultInColumnarBatch(RestructureBasedVectorResultCollector.java:127) at org.apache.carbondata.core.scan.processor.DataBlockIterator.processNextBatch(DataBlockIterator.java:245) at org.apache.carbondata.core.scan.result.iterator.VectorDetailQueryResultIterator.processNextBatch(VectorDetailQueryResultIterator.java:48) at org.apache.carbondata.spark.vectorreader.VectorizedCarbonRecordReader.nextBatch(VectorizedCarbonRecordReader.java:324) at org.apache.carbondata.spark.vectorreader.VectorizedCarbonRecordReader.nextKeyValue(VectorizedCarbonRecordReader.java:185) at org.apache.spark.sql.execution.datasources.RecordReaderIterator.hasNext(RecordReaderIterator.scala:39) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:105) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:177) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:105) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.scan_nextBatch$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$8$$anon$1.hasNext(WholeStageCodegenExec.scala:395) at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:234) at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:228) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:827) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:827) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) at org.apache.spark.scheduler.Task.run(Task.scala:108) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:338) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) Driver stacktrace: (state=,code=0) 0: jdbc:hive2://10.18.98.136:23040/default> select * from flow_carbon_test4 where opp_bk ='1491999999094' and txn_dte>='20140101' and txn_dte <= '20160101' order by amt asc limit 1000; Error: org.apache.spark.SparkException: Job aborted due to stage failure: Task 1 in stage 49.0 failed 4 times, most recent failure: Lost task 1.3 in stage 49.0 (TID 119, BLR1000014291, executor 1): java.lang.ArrayIndexOutOfBoundsException: 0 at org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionColumnPage.fillVector(FixedLengthDimensionColumnPage.java:127) at org.apache.carbondata.core.scan.result.impl.FilterQueryScannedResult.fillColumnarDictionaryBatch(FilterQueryScannedResult.java:102) at org.apache.carbondata.core.scan.collector.impl.DictionaryBasedVectorResultCollector.fillResultToColumnarBatch(DictionaryBasedVectorResultCollector.java:172) at org.apache.carbondata.core.scan.collector.impl.RestructureBasedVectorResultCollector.collectResultInColumnarBatch(RestructureBasedVectorResultCollector.java:127) at org.apache.carbondata.core.scan.processor.DataBlockIterator.processNextBatch(DataBlockIterator.java:245) at org.apache.carbondata.core.scan.result.iterator.VectorDetailQueryResultIterator.processNextBatch(VectorDetailQueryResultIterator.java:48) at org.apache.carbondata.spark.vectorreader.VectorizedCarbonRecordReader.nextBatch(VectorizedCarbonRecordReader.java:324) at org.apache.carbondata.spark.vectorreader.VectorizedCarbonRecordReader.nextKeyValue(VectorizedCarbonRecordReader.java:185) at org.apache.spark.sql.execution.datasources.RecordReaderIterator.hasNext(RecordReaderIterator.scala:39) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:105) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:177) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:105) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.scan_nextBatch$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$8$$anon$1.hasNext(WholeStageCodegenExec.scala:395) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at scala.collection.convert.Wrappers$IteratorWrapper.hasNext(Wrappers.scala:30) at org.spark_project.guava.collect.Ordering.leastOf(Ordering.java:628) at org.apache.spark.util.collection.Utils$.takeOrdered(Utils.scala:37) at org.apache.spark.rdd.RDD$$anonfun$takeOrdered$1$$anonfun$30.apply(RDD.scala:1423) at org.apache.spark.rdd.RDD$$anonfun$takeOrdered$1$$anonfun$30.apply(RDD.scala:1420) at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:797) at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:797) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) at org.apache.spark.scheduler.Task.run(Task.scala:108) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:338) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) Driver stacktrace: (state=,code=0) 0: jdbc:hive2://10.18.98.136:23040/default> select * from flow_carbon_test4 where cus_ac = '6222621350672465397' and txn_bk IN ('00000000000', '00000000001','00000000002') OR own_bk IN ('00000000124','00000000175','00000000034','00000000231','00000000167','00000000182','00000000206') or opp_bk='1491999999107' and (txn_dte>='20140101' and txn_dte<='20140630') limit 1000; Error: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 189.0 failed 4 times, most recent failure: Lost task 0.3 in stage 189.0 (TID 599, BLR1000014291, executor 1): java.lang.ArrayIndexOutOfBoundsException: 0 at org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionColumnPage.fillVector(FixedLengthDimensionColumnPage.java:127) at org.apache.carbondata.core.scan.result.impl.FilterQueryScannedResult.fillColumnarDictionaryBatch(FilterQueryScannedResult.java:102) at org.apache.carbondata.core.scan.collector.impl.DictionaryBasedVectorResultCollector.fillResultToColumnarBatch(DictionaryBasedVectorResultCollector.java:172) at org.apache.carbondata.core.scan.collector.impl.RestructureBasedVectorResultCollector.collectResultInColumnarBatch(RestructureBasedVectorResultCollector.java:127) at org.apache.carbondata.core.scan.processor.DataBlockIterator.processNextBatch(DataBlockIterator.java:245) at org.apache.carbondata.core.scan.result.iterator.VectorDetailQueryResultIterator.processNextBatch(VectorDetailQueryResultIterator.java:48) at org.apache.carbondata.spark.vectorreader.VectorizedCarbonRecordReader.nextBatch(VectorizedCarbonRecordReader.java:324) at org.apache.carbondata.spark.vectorreader.VectorizedCarbonRecordReader.nextKeyValue(VectorizedCarbonRecordReader.java:185) at org.apache.spark.sql.execution.datasources.RecordReaderIterator.hasNext(RecordReaderIterator.scala:39) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:105) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:177) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:105) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.scan_nextBatch$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$8$$anon$1.hasNext(WholeStageCodegenExec.scala:395) at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:234) at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:228) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:827) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:827) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) at org.apache.spark.scheduler.Task.run(Task.scala:108) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:338) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) Driver stacktrace: (state=,code=0) Expected : The select filter queries should be executed successfully for table with path of data store loaded in 1.4.1 version for dictionary sort columns. > Select filter query fails for table with path of data store loaded in 1.4.1 version for dictionary sort columns > --------------------------------------------------------------------------------------------------------------- > > Key: CARBONDATA-2914 > URL: https://issues.apache.org/jira/browse/CARBONDATA-2914 > Project: CarbonData > Issue Type: Bug > Components: data-query > Affects Versions: 1.5.0 > Environment: Spark 2.2 > Reporter: Chetan Bhat > Priority: Minor > > Steps : > User creates a table in Carbon session Beeline in 1.4.1 Carbon version using dictionary include column as sort column and loads data in the table. > User upgrades to 1.5.0 version and connects to Beeline in spark session. > User copies the HDFS segment data files of 1.4.1 version store to another location > User creates a table using carbon with options having the path of the data files. > User executes select filter queries on the dictionary sort columns. > > Issue : > Select filter query fails for table with path of data store loaded in 1.4.1 version for dictionary sort columns > 0: jdbc:hive2://10.18.98.136:23040/default> select * from flow_carbon_test4 where opt_prd_cde ='2889' and txn_dte>='20140101' and txn_dte <= '20160101' limit 1000; > Error: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 10.0 failed 4 times, most recent failure: Lost task 0.3 in stage 10.0 (TID 25, BLR1000014291, executor 1): java.lang.ArrayIndexOutOfBoundsException: 0 > at org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionColumnPage.fillVector(FixedLengthDimensionColumnPage.java:127) > at org.apache.carbondata.core.scan.result.impl.FilterQueryScannedResult.fillColumnarDictionaryBatch(FilterQueryScannedResult.java:102) > at org.apache.carbondata.core.scan.collector.impl.DictionaryBasedVectorResultCollector.fillResultToColumnarBatch(DictionaryBasedVectorResultCollector.java:172) > at org.apache.carbondata.core.scan.collector.impl.RestructureBasedVectorResultCollector.collectResultInColumnarBatch(RestructureBasedVectorResultCollector.java:127) > at org.apache.carbondata.core.scan.processor.DataBlockIterator.processNextBatch(DataBlockIterator.java:245) > at org.apache.carbondata.core.scan.result.iterator.VectorDetailQueryResultIterator.processNextBatch(VectorDetailQueryResultIterator.java:48) > at org.apache.carbondata.spark.vectorreader.VectorizedCarbonRecordReader.nextBatch(VectorizedCarbonRecordReader.java:324) > at org.apache.carbondata.spark.vectorreader.VectorizedCarbonRecordReader.nextKeyValue(VectorizedCarbonRecordReader.java:185) > at org.apache.spark.sql.execution.datasources.RecordReaderIterator.hasNext(RecordReaderIterator.scala:39) > at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:105) > at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:177) > at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:105) > at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.scan_nextBatch$(Unknown Source) > at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.processNext(Unknown Source) > at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) > at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$8$$anon$1.hasNext(WholeStageCodegenExec.scala:395) > at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:234) > at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:228) > at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:827) > at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:827) > at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) > at org.apache.spark.scheduler.Task.run(Task.scala:108) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:338) > at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > Driver stacktrace: (state=,code=0) > 0: jdbc:hive2://10.18.98.136:23040/default> select * from flow_carbon_test4 where opp_bk ='1491999999094' and txn_dte>='20140101' and txn_dte <= '20160101' order by amt asc limit 1000; > Error: org.apache.spark.SparkException: Job aborted due to stage failure: Task 1 in stage 49.0 failed 4 times, most recent failure: Lost task 1.3 in stage 49.0 (TID 119, BLR1000014291, executor 1): java.lang.ArrayIndexOutOfBoundsException: 0 > at org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionColumnPage.fillVector(FixedLengthDimensionColumnPage.java:127) > at org.apache.carbondata.core.scan.result.impl.FilterQueryScannedResult.fillColumnarDictionaryBatch(FilterQueryScannedResult.java:102) > at org.apache.carbondata.core.scan.collector.impl.DictionaryBasedVectorResultCollector.fillResultToColumnarBatch(DictionaryBasedVectorResultCollector.java:172) > at org.apache.carbondata.core.scan.collector.impl.RestructureBasedVectorResultCollector.collectResultInColumnarBatch(RestructureBasedVectorResultCollector.java:127) > at org.apache.carbondata.core.scan.processor.DataBlockIterator.processNextBatch(DataBlockIterator.java:245) > at org.apache.carbondata.core.scan.result.iterator.VectorDetailQueryResultIterator.processNextBatch(VectorDetailQueryResultIterator.java:48) > at org.apache.carbondata.spark.vectorreader.VectorizedCarbonRecordReader.nextBatch(VectorizedCarbonRecordReader.java:324) > at org.apache.carbondata.spark.vectorreader.VectorizedCarbonRecordReader.nextKeyValue(VectorizedCarbonRecordReader.java:185) > at org.apache.spark.sql.execution.datasources.RecordReaderIterator.hasNext(RecordReaderIterator.scala:39) > at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:105) > at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:177) > at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:105) > at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.scan_nextBatch$(Unknown Source) > at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.processNext(Unknown Source) > at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) > at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$8$$anon$1.hasNext(WholeStageCodegenExec.scala:395) > at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) > at scala.collection.convert.Wrappers$IteratorWrapper.hasNext(Wrappers.scala:30) > at org.spark_project.guava.collect.Ordering.leastOf(Ordering.java:628) > at org.apache.spark.util.collection.Utils$.takeOrdered(Utils.scala:37) > at org.apache.spark.rdd.RDD$$anonfun$takeOrdered$1$$anonfun$30.apply(RDD.scala:1423) > at org.apache.spark.rdd.RDD$$anonfun$takeOrdered$1$$anonfun$30.apply(RDD.scala:1420) > at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:797) > at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:797) > at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) > at org.apache.spark.scheduler.Task.run(Task.scala:108) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:338) > at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > Driver stacktrace: (state=,code=0) > 0: jdbc:hive2://10.18.98.136:23040/default> select * from flow_carbon_test4 where cus_ac = '6222621350672465397' and txn_bk IN ('00000000000', '00000000001','00000000002') OR own_bk IN ('00000000124','00000000175','00000000034','00000000231','00000000167','00000000182','00000000206') or opp_bk='1491999999107' and (txn_dte>='20140101' and txn_dte<='20140630') limit 1000; > Error: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 189.0 failed 4 times, most recent failure: Lost task 0.3 in stage 189.0 (TID 599, BLR1000014291, executor 1): java.lang.ArrayIndexOutOfBoundsException: 0 > at org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionColumnPage.fillVector(FixedLengthDimensionColumnPage.java:127) > at org.apache.carbondata.core.scan.result.impl.FilterQueryScannedResult.fillColumnarDictionaryBatch(FilterQueryScannedResult.java:102) > at org.apache.carbondata.core.scan.collector.impl.DictionaryBasedVectorResultCollector.fillResultToColumnarBatch(DictionaryBasedVectorResultCollector.java:172) > at org.apache.carbondata.core.scan.collector.impl.RestructureBasedVectorResultCollector.collectResultInColumnarBatch(RestructureBasedVectorResultCollector.java:127) > at org.apache.carbondata.core.scan.processor.DataBlockIterator.processNextBatch(DataBlockIterator.java:245) > at org.apache.carbondata.core.scan.result.iterator.VectorDetailQueryResultIterator.processNextBatch(VectorDetailQueryResultIterator.java:48) > at org.apache.carbondata.spark.vectorreader.VectorizedCarbonRecordReader.nextBatch(VectorizedCarbonRecordReader.java:324) > at org.apache.carbondata.spark.vectorreader.VectorizedCarbonRecordReader.nextKeyValue(VectorizedCarbonRecordReader.java:185) > at org.apache.spark.sql.execution.datasources.RecordReaderIterator.hasNext(RecordReaderIterator.scala:39) > at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:105) > at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:177) > at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:105) > at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.scan_nextBatch$(Unknown Source) > at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.processNext(Unknown Source) > at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) > at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$8$$anon$1.hasNext(WholeStageCodegenExec.scala:395) > at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:234) > at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:228) > at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:827) > at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:827) > at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) > at org.apache.spark.scheduler.Task.run(Task.scala:108) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:338) > at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > Driver stacktrace: (state=,code=0) > > Expected : The select filter queries should be executed successfully for table with path of data store loaded in 1.4.1 version for dictionary sort columns. > > -- This message was sent by Atlassian JIRA (v7.6.3#76005) |
Free forum by Nabble | Edit this page |