Chetan Bhat created CARBONDATA-4129:
--------------------------------------- Summary: Class cast exception when array, struct , binary and string type data tried to be merged Key: CARBONDATA-4129 URL: https://issues.apache.org/jira/browse/CARBONDATA-4129 Project: CarbonData Issue Type: Bug Components: data-query Affects Versions: 2.1.0 Environment: Spark 2.4.5 Reporter: Chetan Bhat *Scenario 1 : - merge command with insertion on string with expression **throws error.**. Also insert into binary with expression throws error.* drop table if exists A; drop table if exists B; CREATE TABLE A(id Int, name string, description string,address string, note string) stored as carbondata tblproperties('long_string_columns'='description,note','table_blocksize'='1','SORT_SCOPE'='global_sort','table_page_size_inmb'='1'); CREATE TABLE B(id Int, name string, description string,address string, note string) stored as carbondata tblproperties('long_string_columns'='description,note','table_blocksize'='1','SORT_SCOPE'='global_sort','table_page_size_inmb'='1'); insert into A select 1,"name11111A","asasfdfdfdsf","tutyutyuty","6867898980909099-0-0-0878676565454545465768798"; insert into A select 2,"name11112A","asasfdfdfdsf","tutyutyuty","6867898980909099-0-0-0878676565454545465768798"; insert into A select 3,"name11113A","asasfdfdfdsf","tutyutyuty","6867898980909099-0-0-0878676565454545465768798"; insert into A select 4,"name11114A","asasfdfdfdsf","tutyutyuty","6867898980909099-0-0-0878676565454545465768798"; insert into A select 5,"name11115A","asasfdfdfdsf","tutyutyuty","6867898980909099-0-0-0878676565454545465768798"; insert into B select 1,"name11111B","asasfdfdfdsf","tutyutyuty","6867898980909099-0-0-0878676565454545465768798"; insert into B select 2,"name11112B","asasfdfdfdsf","tutyutyuty","6867898980909099-0-0-0878676565454545465768798"; insert into B select 3,"name11113B","asasfdfdfdsf","tutyutyuty","6867898980909099-0-0-0878676565454545465768798"; insert into B select 6,"name11114B","asasfdfdfdsf","tutyutyuty","6867898980909099-0-0-0878676565454545465768798"; insert into B select 7,"name11115B","asasfdfdfdsf","tutyutyuty","6867898980909099-0-0-0878676565454545465768798"; MERGE INTO A USING B ON A.ID=B.ID WHEN NOT MATCHED AND B.ID=7 THEN INSERT (A.ID,A.name,A.description ,A.address, A.note) VALUES (B.ID,B.name+'10',B.description ,B.address,'test-string'); 0: jdbc:hive2://linux-63:22550/> MERGE INTO A USING B ON A.ID=B.ID WHEN NOT MATCHED AND B.ID=7 THEN INSERT (A.ID,A.name,A.description ,A.address, A.note) VALUES (B.ID,B.name+'10',B.description ,B.address,'test-string'); Error: org.apache.spark.SparkException: Job aborted due to stage failure: Task 4 in stage 3813.0 failed 4 times, most recent failure: Lost task 4.3 in stage 3813.0 (TID 23528, linux-63, executor 5): java.lang.ClassCastException: org.apache.spark.sql.types.StringType$ cannot be cast to org.apache.spark.sql.types.NumericType at org.apache.spark.sql.catalyst.util.TypeUtils$.getNumeric(TypeUtils.scala:58) at org.apache.spark.sql.catalyst.expressions.Add.numeric$lzycompute(arithmetic.scala:166) at org.apache.spark.sql.catalyst.expressions.Add.numeric(arithmetic.scala:166) at org.apache.spark.sql.catalyst.expressions.Add.nullSafeEval(arithmetic.scala:172) at org.apache.spark.sql.catalyst.expressions.BinaryExpression.eval(Expression.scala:486) at org.apache.spark.sql.catalyst.expressions.InterpretedMutableProjection.apply(Projection.scala:92) at org.apache.spark.sql.catalyst.expressions.InterpretedMutableProjection.apply(Projection.scala:66) at org.apache.spark.sql.execution.command.mutation.merge.MergeProjection.apply(MergeProjection.scala:54) at org.apache.spark.sql.execution.command.mutation.merge.CarbonMergeDataSetCommand$$anonfun$processIUD$1$$anon$1$$anonfun$next$1.apply(CarbonMergeDataSetCommand.scala:341) at org.apache.spark.sql.execution.command.mutation.merge.CarbonMergeDataSetCommand$$anonfun$processIUD$1$$anon$1$$anonfun$next$1.apply(CarbonMergeDataSetCommand.scala:338) at scala.collection.immutable.List.foreach(List.scala:392) at org.apache.spark.sql.execution.command.mutation.merge.CarbonMergeDataSetCommand$$anonfun$processIUD$1$$anon$1.next(CarbonMergeDataSetCommand.scala:338) at org.apache.spark.sql.execution.command.mutation.merge.CarbonMergeDataSetCommand$$anonfun$processIUD$1$$anon$1.next(CarbonMergeDataSetCommand.scala:319) at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:463) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409) at org.apache.spark.sql.execution.columnar.CachedRDDBuilder$$anonfun$1$$anon$1.hasNext(InMemoryRelation.scala:125) at org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:221) at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:299) at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1326) at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1317) at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1252) at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1317) at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1043) at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:357) at org.apache.spark.rdd.RDD.iterator(RDD.scala:308) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346) at org.apache.spark.rdd.RDD.iterator(RDD.scala:310) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346) at org.apache.spark.rdd.RDD.iterator(RDD.scala:310) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346) at org.apache.spark.rdd.RDD.iterator(RDD.scala:310) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346) at org.apache.spark.rdd.RDD.iterator(RDD.scala:310) at org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:102) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:55) at org.apache.spark.scheduler.Task.run(Task.scala:123) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:413) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1551) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:419) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Driver stacktrace: (state=,code=0) 0: jdbc:hive2://linux-63:22550/> *Scenario 2 :- merge with struct complex type **throws error.*** drop table if exists A; drop table if exists B; create table A (CUST_ID string, YEAR int, MONTH int, AGE int, GENDER string, EDUCATED string, IS_MARRIED string, STRUCT_INT_DOUBLE_STRING_DATE struct<ID:int,SALARY:double,COUNTRY:STRING,CHECK_DATE:timestamp>,CARD_COUNT int,DEBIT_COUNT int, CREDIT_COUNT int, DEPOSIT double, HQ_DEPOSIT double) stored as carbondata ; LOAD DATA INPATH 'hdfs://hacluster/chetan/Struct.csv' INTO table A options ('DELIMITER'=',', 'QUOTECHAR'='"', 'FILEHEADER'='CUST_ID,YEAR,MONTH,AGE,GENDER,EDUCATED,IS_MARRIED,STRUCT_INT_DOUBLE_STRING_DATE,CARD_COUNT,DEBIT_COUNT,CREDIT_COUNT,DEPOSIT,HQ_DEPOSIT','COMPLEX_DELIMITER_LEVEL_1'='$'); create table B (CUST_ID string, YEAR int, MONTH int, AGE int, GENDER string, EDUCATED string, IS_MARRIED string, STRUCT_INT_DOUBLE_STRING_DATE struct<ID:int,SALARY:double,COUNTRY:STRING,CHECK_DATE:timestamp>,CARD_COUNT int,DEBIT_COUNT int, CREDIT_COUNT int, DEPOSIT double, HQ_DEPOSIT double) stored as carbondata ; LOAD DATA INPATH 'hdfs://hacluster/chetan/Struct.csv' INTO table B options ('DELIMITER'=',', 'QUOTECHAR'='"', 'FILEHEADER'='CUST_ID,YEAR,MONTH,AGE,GENDER,EDUCATED,IS_MARRIED,STRUCT_INT_DOUBLE_STRING_DATE,CARD_COUNT,DEBIT_COUNT,CREDIT_COUNT,DEPOSIT,HQ_DEPOSIT','COMPLEX_DELIMITER_LEVEL_1'='$'); MERGE INTO A USING B ON A.YEAR=B.YEAR WHEN MATCHED AND A.YEAR=2015 THEN DELETE WHEN MATCHED AND A.YEAR=2015 THEN UPDATE SET *; 0: jdbc:hive2://linux-63:22550/> MERGE INTO A USING B ON A.YEAR=B.YEAR WHEN MATCHED AND A.YEAR=2015 THEN DELETE WHEN MATCHED AND A.YEAR=2015 THEN UPDATE SET *; Error: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 3818.0 failed 4 times, most recent failure: Lost task 0.3 in stage 3818.0 (TID 23535, linux-65, executor 3): java.lang.ClassCastException: org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema cannot be cast to org.apache.spark.sql.catalyst.InternalRow at org.apache.spark.sql.catalyst.expressions.BaseGenericInternalRow$class.getStruct(rows.scala:51) at org.apache.spark.sql.catalyst.expressions.GenericInternalRow.getStruct(rows.scala:195) at org.apache.spark.sql.catalyst.InternalRow$$anonfun$getAccessor$12.apply(InternalRow.scala:140) at org.apache.spark.sql.catalyst.InternalRow$$anonfun$getAccessor$12.apply(InternalRow.scala:140) at org.apache.spark.sql.catalyst.expressions.BoundReference.eval(BoundAttribute.scala:44) at org.apache.spark.sql.catalyst.expressions.InterpretedMutableProjection.apply(Projection.scala:92) at org.apache.spark.sql.catalyst.expressions.InterpretedMutableProjection.apply(Projection.scala:66) at org.apache.spark.sql.execution.command.mutation.merge.MergeProjection.apply(MergeProjection.scala:54) at org.apache.spark.sql.execution.command.mutation.merge.CarbonMergeDataSetCommand$$anonfun$processIUD$1$$anon$1$$anonfun$next$1.apply(CarbonMergeDataSetCommand.scala:341) at org.apache.spark.sql.execution.command.mutation.merge.CarbonMergeDataSetCommand$$anonfun$processIUD$1$$anon$1$$anonfun$next$1.apply(CarbonMergeDataSetCommand.scala:338) at scala.collection.immutable.List.foreach(List.scala:392) at org.apache.spark.sql.execution.command.mutation.merge.CarbonMergeDataSetCommand$$anonfun$processIUD$1$$anon$1.next(CarbonMergeDataSetCommand.scala:338) at org.apache.spark.sql.execution.command.mutation.merge.CarbonMergeDataSetCommand$$anonfun$processIUD$1$$anon$1.next(CarbonMergeDataSetCommand.scala:319) at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:463) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409) at org.apache.spark.sql.execution.columnar.CachedRDDBuilder$$anonfun$1$$anon$1.hasNext(InMemoryRelation.scala:125) at org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:221) at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:299) at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1326) at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1317) at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1252) at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1317) at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1043) at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:357) at org.apache.spark.rdd.RDD.iterator(RDD.scala:308) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346) at org.apache.spark.rdd.RDD.iterator(RDD.scala:310) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346) at org.apache.spark.rdd.RDD.iterator(RDD.scala:310) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346) at org.apache.spark.rdd.RDD.iterator(RDD.scala:310) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346) at org.apache.spark.rdd.RDD.iterator(RDD.scala:310) at org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:102) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:55) at org.apache.spark.scheduler.Task.run(Task.scala:123) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:413) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1551) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:419) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Driver stacktrace: (state=,code=0) 0: jdbc:hive2://linux-63:22550/> *Scenario 3 : - Merge with array complex type **throws error.*** drop table if exists A; drop table if exists B; create table A (CUST_ID string, YEAR int, MONTH int, AGE int, GENDER string, EDUCATED string, IS_MARRIED string, ARRAY_INT array<int>,ARRAY_STRING array<string>,ARRAY_DATE array<timestamp>,CARD_COUNT int,DEBIT_COUNT int, CREDIT_COUNT int, DEPOSIT double, HQ_DEPOSIT double) stored as carbondata; LOAD DATA INPATH 'hdfs://hacluster/chetan/Array.csv' INTO table A options ('DELIMITER'=',', 'QUOTECHAR'='"', 'FILEHEADER'='CUST_ID,YEAR,MONTH,AGE,GENDER,EDUCATED,IS_MARRIED,ARRAY_INT,ARRAY_STRING,ARRAY_DATE,CARD_COUNT,DEBIT_COUNT,CREDIT_COUNT,DEPOSIT,HQ_DEPOSIT','COMPLEX_DELIMITER_LEVEL_1'='$'); create table B (CUST_ID string, YEAR int, MONTH int, AGE int, GENDER string, EDUCATED string, IS_MARRIED string, ARRAY_INT array<int>,ARRAY_STRING array<string>,ARRAY_DATE array<timestamp>,CARD_COUNT int,DEBIT_COUNT int, CREDIT_COUNT int, DEPOSIT double, HQ_DEPOSIT double) stored as carbondata; LOAD DATA INPATH 'hdfs://hacluster/chetan/Array.csv' INTO table B options ('DELIMITER'=',', 'QUOTECHAR'='"', 'FILEHEADER'='CUST_ID,YEAR,MONTH,AGE,GENDER,EDUCATED,IS_MARRIED,ARRAY_INT,ARRAY_STRING,ARRAY_DATE,CARD_COUNT,DEBIT_COUNT,CREDIT_COUNT,DEPOSIT,HQ_DEPOSIT','COMPLEX_DELIMITER_LEVEL_1'='$'); MERGE INTO A USING B ON A.YEAR=B.YEAR WHEN MATCHED AND A.YEAR=2015 THEN DELETE WHEN MATCHED AND A.YEAR=2015 THEN UPDATE SET *; 0: jdbc:hive2://linux-63:22550/> MERGE INTO A USING B ON A.YEAR=B.YEAR WHEN MATCHED AND A.YEAR=2015 THEN DELETE WHEN MATCHED AND A.YEAR=2015 THEN UPDATE SET *; Error: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 3823.0 failed 4 times, most recent failure: Lost task 0.3 in stage 3823.0 (TID 23542, linux-65, executor 3): java.lang.ClassCastException: scala.collection.mutable.WrappedArray$ofRef cannot be cast to org.apache.spark.sql.catalyst.util.ArrayData at org.apache.spark.sql.catalyst.expressions.BaseGenericInternalRow$class.getArray(rows.scala:48) at org.apache.spark.sql.catalyst.expressions.GenericInternalRow.getArray(rows.scala:195) at org.apache.spark.sql.catalyst.InternalRow$$anonfun$getAccessor$13.apply(InternalRow.scala:141) at org.apache.spark.sql.catalyst.InternalRow$$anonfun$getAccessor$13.apply(InternalRow.scala:141) at org.apache.spark.sql.catalyst.expressions.BoundReference.eval(BoundAttribute.scala:44) at org.apache.spark.sql.catalyst.expressions.InterpretedMutableProjection.apply(Projection.scala:92) at org.apache.spark.sql.catalyst.expressions.InterpretedMutableProjection.apply(Projection.scala:66) at org.apache.spark.sql.execution.command.mutation.merge.MergeProjection.apply(MergeProjection.scala:54) at org.apache.spark.sql.execution.command.mutation.merge.CarbonMergeDataSetCommand$$anonfun$processIUD$1$$anon$1$$anonfun$next$1.apply(CarbonMergeDataSetCommand.scala:341) at org.apache.spark.sql.execution.command.mutation.merge.CarbonMergeDataSetCommand$$anonfun$processIUD$1$$anon$1$$anonfun$next$1.apply(CarbonMergeDataSetCommand.scala:338) at scala.collection.immutable.List.foreach(List.scala:392) at org.apache.spark.sql.execution.command.mutation.merge.CarbonMergeDataSetCommand$$anonfun$processIUD$1$$anon$1.next(CarbonMergeDataSetCommand.scala:338) at org.apache.spark.sql.execution.command.mutation.merge.CarbonMergeDataSetCommand$$anonfun$processIUD$1$$anon$1.next(CarbonMergeDataSetCommand.scala:319) at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:463) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409) at org.apache.spark.sql.execution.columnar.CachedRDDBuilder$$anonfun$1$$anon$1.hasNext(InMemoryRelation.scala:125) at org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:221) at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:299) at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1326) at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1317) at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1252) at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1317) at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1043) at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:357) at org.apache.spark.rdd.RDD.iterator(RDD.scala:308) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346) at org.apache.spark.rdd.RDD.iterator(RDD.scala:310) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346) at org.apache.spark.rdd.RDD.iterator(RDD.scala:310) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346) at org.apache.spark.rdd.RDD.iterator(RDD.scala:310) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346) at org.apache.spark.rdd.RDD.iterator(RDD.scala:310) at org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:102) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:55) at org.apache.spark.scheduler.Task.run(Task.scala:123) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:413) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1551) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:419) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Driver stacktrace: (state=,code=0) 0: jdbc:hive2://linux-63:22550/> -- This message was sent by Atlassian Jira (v8.3.4#803005) |
Free forum by Nabble | Edit this page |