[ https://issues.apache.org/jira/browse/CARBONDATA-585?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Pallavi Singh updated CARBONDATA-585: ------------------------------------- Environment: Cluster Description: On the execution of the following query : LOAD DATA inpath 'hdfs://localhost:54310/csv/test.csv' INTO table employee options('DELIMITER'=',', 'FILEHEADER'='id, firstname'); the table schema is a following : +-----------+------------+----------+--+ | col_name | data_type | comment | +-----------+------------+----------+--+ | id | bigint | | | firstname | string | | +-----------+------------+----------+--+ The load gets successful at times but we also end up often with the following error : Dictionary file is locked for Updation. Following below are the logs : AUDIT 02-01 18:17:07,009 - [knoldus][pallavi][Thread-110]Dataload failure for default.employee. Please check the logs INFO 02-01 18:17:07,020 - pool-30-thread-1 Successfully deleted the lock file /tmp/default/employee/meta.lock INFO 02-01 18:17:07,022 - Table MetaData Unlocked Successfully after data load ERROR 02-01 18:17:07,022 - Error executing query, currentState RUNNING, org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 2.0 failed 4 times, most recent failure: Lost task 0.3 in stage 2.0 (TID 5, 192.168.2.188): java.lang.RuntimeException: Dictionary file firstname is locked for updation. Please try after some time at scala.sys.package$.error(package.scala:27) at org.apache.carbondata.spark.rdd.CarbonGlobalDictionaryGenerateRDD$$anon$1.<init>(CarbonGlobalDictionaryRDD.scala:364) at org.apache.carbondata.spark.rdd.CarbonGlobalDictionaryGenerateRDD.compute(CarbonGlobalDictionaryRDD.scala:302) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66) at org.apache.spark.scheduler.Task.run(Task.scala:89) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:227) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1431) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1419) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1418) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1418) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799) at scala.Option.foreach(Option.scala:236) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:799) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1640) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1599) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1588) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:620) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1832) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1845) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1858) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1929) at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:927) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111) at org.apache.spark.rdd.RDD.withScope(RDD.scala:316) at org.apache.spark.rdd.RDD.collect(RDD.scala:926) at org.apache.carbondata.spark.util.GlobalDictionaryUtil$.generateGlobalDictionary(GlobalDictionaryUtil.scala:769) was: On the execution of the following query : LOAD DATA inpath 'hdfs://localhost:54310/csv/test.csv' INTO table employee options('DELIMITER'=',', 'FILEHEADER'='id, firstname'); the table schema is a following : +-----------+------------+----------+--+ | col_name | data_type | comment | +-----------+------------+----------+--+ | id | bigint | | | firstname | string | | +-----------+------------+----------+--+ The load gets successful at times but we also end up often with the following error : Dictionary file is locked for Updation. Following below are the logs : AUDIT 02-01 18:17:07,009 - [knoldus][pallavi][Thread-110]Dataload failure for default.employee. Please check the logs INFO 02-01 18:17:07,020 - pool-30-thread-1 Successfully deleted the lock file /tmp/default/employee/meta.lock INFO 02-01 18:17:07,022 - Table MetaData Unlocked Successfully after data load ERROR 02-01 18:17:07,022 - Error executing query, currentState RUNNING, org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 2.0 failed 4 times, most recent failure: Lost task 0.3 in stage 2.0 (TID 5, 192.168.2.188): java.lang.RuntimeException: Dictionary file firstname is locked for updation. Please try after some time at scala.sys.package$.error(package.scala:27) at org.apache.carbondata.spark.rdd.CarbonGlobalDictionaryGenerateRDD$$anon$1.<init>(CarbonGlobalDictionaryRDD.scala:364) at org.apache.carbondata.spark.rdd.CarbonGlobalDictionaryGenerateRDD.compute(CarbonGlobalDictionaryRDD.scala:302) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66) at org.apache.spark.scheduler.Task.run(Task.scala:89) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:227) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1431) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1419) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1418) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1418) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799) at scala.Option.foreach(Option.scala:236) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:799) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1640) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1599) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1588) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:620) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1832) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1845) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1858) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1929) at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:927) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111) at org.apache.spark.rdd.RDD.withScope(RDD.scala:316) at org.apache.spark.rdd.RDD.collect(RDD.scala:926) at org.apache.carbondata.spark.util.GlobalDictionaryUtil$.generateGlobalDictionary(GlobalDictionaryUtil.scala:769) Component/s: data-load > Dictionary file is locked for Updation > -------------------------------------- > > Key: CARBONDATA-585 > URL: https://issues.apache.org/jira/browse/CARBONDATA-585 > Project: CarbonData > Issue Type: Bug > Components: data-load > Affects Versions: 1.0.0-incubating > Environment: Cluster > Reporter: Pallavi Singh > > On the execution of the following query : > LOAD DATA inpath 'hdfs://localhost:54310/csv/test.csv' INTO table employee options('DELIMITER'=',', 'FILEHEADER'='id, firstname'); > the table schema is a following : > +-----------+------------+----------+--+ > | col_name | data_type | comment | > +-----------+------------+----------+--+ > | id | bigint | | > | firstname | string | | > +-----------+------------+----------+--+ > The load gets successful at times but we also end up often with the following error : > Dictionary file is locked for Updation. > Following below are the logs : > AUDIT 02-01 18:17:07,009 - [knoldus][pallavi][Thread-110]Dataload failure for default.employee. Please check the logs > INFO 02-01 18:17:07,020 - pool-30-thread-1 Successfully deleted the lock file /tmp/default/employee/meta.lock > INFO 02-01 18:17:07,022 - Table MetaData Unlocked Successfully after data load > ERROR 02-01 18:17:07,022 - Error executing query, currentState RUNNING, > org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 2.0 failed 4 times, most recent failure: Lost task 0.3 in stage 2.0 (TID 5, 192.168.2.188): java.lang.RuntimeException: Dictionary file firstname is locked for updation. Please try after some time > at scala.sys.package$.error(package.scala:27) > at org.apache.carbondata.spark.rdd.CarbonGlobalDictionaryGenerateRDD$$anon$1.<init>(CarbonGlobalDictionaryRDD.scala:364) > at org.apache.carbondata.spark.rdd.CarbonGlobalDictionaryGenerateRDD.compute(CarbonGlobalDictionaryRDD.scala:302) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66) > at org.apache.spark.scheduler.Task.run(Task.scala:89) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:227) > at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > Driver stacktrace: > at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1431) > at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1419) > at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1418) > at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) > at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1418) > at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799) > at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799) > at scala.Option.foreach(Option.scala:236) > at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:799) > at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1640) > at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1599) > at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1588) > at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) > at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:620) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1832) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1845) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1858) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1929) > at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:927) > at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150) > at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111) > at org.apache.spark.rdd.RDD.withScope(RDD.scala:316) > at org.apache.spark.rdd.RDD.collect(RDD.scala:926) > at org.apache.carbondata.spark.util.GlobalDictionaryUtil$.generateGlobalDictionary(GlobalDictionaryUtil.scala:769) > -- This message was sent by Atlassian JIRA (v6.3.4#6332) |
Free forum by Nabble | Edit this page |