[ https://issues.apache.org/jira/browse/CARBONDATA-3639?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Ajantha Bhat updated CARBONDATA-3639: ------------------------------------- Description: h6. Global Sort CSV loading flow with Binary non-sort columns throws exception Previous exception in task: Dataload failed, String length cannot exceed 32000 charactersPrevious exception in task: Dataload failed, String length cannot exceed 32000 characters org.apache.carbondata.streaming.parser.FieldConverter$.objectToString(FieldConverter.scala:53) org.apache.carbondata.spark.util.CarbonScalaUtil$.getString(CarbonScalaUtil.scala:71) org.apache.carbondata.spark.rdd.NewRddIterator$$anonfun$next$1.apply$mcVI$sp(NewCarbonDataLoadRDD.scala:358) scala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160) org.apache.carbondata.spark.rdd.NewRddIterator.next(NewCarbonDataLoadRDD.scala:357) org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$1.next(DataLoadProcessorStepOnSpark.scala:66) org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$1.next(DataLoadProcessorStepOnSpark.scala:61) org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$2.next(DataLoadProcessorStepOnSpark.scala:92) org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$2.next(DataLoadProcessorStepOnSpark.scala:83) org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$6.next(DataLoadProcessorStepOnSpark.scala:253) org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$6.next(DataLoadProcessorStepOnSpark.scala:248) scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:462) scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125) org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96) org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) org.apache.spark.scheduler.Task.run(Task.scala:109) org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345) java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) java.lang.Thread.run(Thread.java:748) at org.apache.spark.TaskContextImpl.invokeListeners(TaskContextImpl.scala:139) at org.apache.spark.TaskContextImpl.markTaskFailed(TaskContextImpl.scala:107) at org.apache.spark.scheduler.Task.run(Task.scala:114) ... 4 more h6. was: h6. Global Sort CSV loading flow with Binary non-sort columns throws exception h6. Previous exception in task: Dataload failed, String length cannot exceed 32000 charactersPrevious exception in task: Dataload failed, String length cannot exceed 32000 characters org.apache.carbondata.streaming.parser.FieldConverter$.objectToString(FieldConverter.scala:53) org.apache.carbondata.spark.util.CarbonScalaUtil$.getString(CarbonScalaUtil.scala:71) org.apache.carbondata.spark.rdd.NewRddIterator$$anonfun$next$1.apply$mcVI$sp(NewCarbonDataLoadRDD.scala:358) scala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160) org.apache.carbondata.spark.rdd.NewRddIterator.next(NewCarbonDataLoadRDD.scala:357) org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$1.next(DataLoadProcessorStepOnSpark.scala:66) org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$1.next(DataLoadProcessorStepOnSpark.scala:61) org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$2.next(DataLoadProcessorStepOnSpark.scala:92) org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$2.next(DataLoadProcessorStepOnSpark.scala:83) org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$6.next(DataLoadProcessorStepOnSpark.scala:253) org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$6.next(DataLoadProcessorStepOnSpark.scala:248) scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:462) scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125) org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96) org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) org.apache.spark.scheduler.Task.run(Task.scala:109) org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345) java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) java.lang.Thread.run(Thread.java:748) at org.apache.spark.TaskContextImpl.invokeListeners(TaskContextImpl.scala:139) at org.apache.spark.TaskContextImpl.markTaskFailed(TaskContextImpl.scala:107) at org.apache.spark.scheduler.Task.run(Task.scala:114) ... 4 more2019-12-28 17:37:47 ERROR TaskSetManager:70 - Task 0 in stage 0.0 failed 1 times; aborting job2019-12-28 17:37:47 ERROR CarbonDataRDDFactory$:429 - org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 1 times, most recent failure: Lost task 0.0 in stage 0.0 (TID 0, localhost, executor driver): java.lang.Exception: Dataload failed, String length cannot exceed 32000 characters at org.apache.carbondata.streaming.parser.FieldConverter$.objectToString(FieldConverter.scala:53) at org.apache.carbondata.spark.util.CarbonScalaUtil$.getString(CarbonScalaUtil.scala:71) at org.apache.carbondata.spark.rdd.NewRddIterator$$anonfun$next$1.apply$mcVI$sp(NewCarbonDataLoadRDD.scala:358) at scala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160) at org.apache.carbondata.spark.rdd.NewRddIterator.next(NewCarbonDataLoadRDD.scala:357) at org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$1.next(DataLoadProcessorStepOnSpark.scala:66) at org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$1.next(DataLoadProcessorStepOnSpark.scala:61) at org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$2.next(DataLoadProcessorStepOnSpark.scala:92) at org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$2.next(DataLoadProcessorStepOnSpark.scala:83) at org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$6.next(DataLoadProcessorStepOnSpark.scala:253) at org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$6.next(DataLoadProcessorStepOnSpark.scala:248) at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:462) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) at org.apache.spark.scheduler.Task.run(Task.scala:109) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Suppressed: org.apache.spark.util.TaskCompletionListenerException: Exception 0: Data Loading failed for table binarytableException 1: Data Loading failed for table binarytable Previous exception in task: Dataload failed, String length cannot exceed 32000 characters org.apache.carbondata.streaming.parser.FieldConverter$.objectToString(FieldConverter.scala:53) org.apache.carbondata.spark.util.CarbonScalaUtil$.getString(CarbonScalaUtil.scala:71) org.apache.carbondata.spark.rdd.NewRddIterator$$anonfun$next$1.apply$mcVI$sp(NewCarbonDataLoadRDD.scala:358) scala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160) org.apache.carbondata.spark.rdd.NewRddIterator.next(NewCarbonDataLoadRDD.scala:357) org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$1.next(DataLoadProcessorStepOnSpark.scala:66) org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$1.next(DataLoadProcessorStepOnSpark.scala:61) org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$2.next(DataLoadProcessorStepOnSpark.scala:92) org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$2.next(DataLoadProcessorStepOnSpark.scala:83) org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$6.next(DataLoadProcessorStepOnSpark.scala:253) org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$6.next(DataLoadProcessorStepOnSpark.scala:248) scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:462) scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125) org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96) org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) org.apache.spark.scheduler.Task.run(Task.scala:109) org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345) java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) java.lang.Thread.run(Thread.java:748) at org.apache.spark.TaskContextImpl.invokeListeners(TaskContextImpl.scala:139) at org.apache.spark.TaskContextImpl.markTaskFailed(TaskContextImpl.scala:107) at org.apache.spark.scheduler.Task.run(Task.scala:114) ... 4 more h6. > Global Sort CSV loading flow with Binary non-sort columns throws exception > -------------------------------------------------------------------------- > > Key: CARBONDATA-3639 > URL: https://issues.apache.org/jira/browse/CARBONDATA-3639 > Project: CarbonData > Issue Type: Bug > Reporter: Ajantha Bhat > Assignee: Ajantha Bhat > Priority: Major > > h6. Global Sort CSV loading flow with Binary non-sort columns throws exception > Previous exception in task: Dataload failed, String length cannot exceed 32000 charactersPrevious exception in task: Dataload failed, String length cannot exceed 32000 characters org.apache.carbondata.streaming.parser.FieldConverter$.objectToString(FieldConverter.scala:53) org.apache.carbondata.spark.util.CarbonScalaUtil$.getString(CarbonScalaUtil.scala:71) org.apache.carbondata.spark.rdd.NewRddIterator$$anonfun$next$1.apply$mcVI$sp(NewCarbonDataLoadRDD.scala:358) scala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160) org.apache.carbondata.spark.rdd.NewRddIterator.next(NewCarbonDataLoadRDD.scala:357) org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$1.next(DataLoadProcessorStepOnSpark.scala:66) org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$1.next(DataLoadProcessorStepOnSpark.scala:61) org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$2.next(DataLoadProcessorStepOnSpark.scala:92) org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$2.next(DataLoadProcessorStepOnSpark.scala:83) org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$6.next(DataLoadProcessorStepOnSpark.scala:253) org.apache.carbondata.spark.load.DataLoadProcessorStepOnSpark$$anon$6.next(DataLoadProcessorStepOnSpark.scala:248) scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:462) scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125) org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96) org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) org.apache.spark.scheduler.Task.run(Task.scala:109) org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345) java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) java.lang.Thread.run(Thread.java:748) at org.apache.spark.TaskContextImpl.invokeListeners(TaskContextImpl.scala:139) at org.apache.spark.TaskContextImpl.markTaskFailed(TaskContextImpl.scala:107) at org.apache.spark.scheduler.Task.run(Task.scala:114) ... 4 more > h6. -- This message was sent by Atlassian Jira (v8.3.4#803005) |
Free forum by Nabble | Edit this page |