Hi
Have you solved this issue after applying new configurations?
Regards
Lianggeda wrotehello:
i test data in spark locak model ,then load data inpath to table ,works well.
but when i use yarn-client modle, with 1w rows , size :940k ,but error happend ,there is no lock find in tmp dir ,i don't know how to debug,help.thanks.
spark1.6 hadoop 2.7|2.6 carbondata 0.2
local mode: run ok
$SPARK_HOME/bin/spark-shell --master local[4] --jars /usr/local/spark/lib/carbondata_2.10-0.2.0- import java.io._incubating-shade-hadoop2.7.1. jar
yarn command : run bad
$SPARK_HOME/bin/spark-shell --verbose --master yarn-client --driver-memory 1G --driver-cores 1 --executor-memory 4G --num-executors 5 --executor-cores 1 --conf "spark.executor.extraJavaOptions=-XX:NewRatio= 2 -XX:PermSize=512m -XX:MaxPermSize=512m -XX:SurvivorRatio=6 -verbose:gc -XX:-PrintGCDetails -XX:+PrintGCTimeStamps " --conf "spark.driver. extraJavaOptions=-XX: MaxPermSize=512m -XX:PermSize=512m" --conf spark.yarn.driver. memoryOverhead=1024 --conf spark.yarn.executor. memoryOverhead=3096 --jars /usr/local/spark/lib/ carbondata_2.10-0.2.0- incubating-shade-hadoop2.7.1. jar
import org.apache.hadoop.hive.conf.HiveConf
import org.apache.spark.sql.CarbonContext
val storePath = "hdfs://test:8020/usr/carbondata/store"
val cc = new CarbonContext(sc, storePath)
cc.setConf(HiveConf.ConfVars.HIVECHECKFILEFORMAT.varname, "false")
cc.setConf("carbon.kettle.home","/usr/local/spark/ carbondata/carbonplugins")
cc.sql("CREATE TABLE `LINEORDER3` ( LO_ORDERKEY bigint, LO_LINENUMBER int, LO_CUSTKEY bigint, LO_PARTKEY bigint, LO_SUPPKEY bigint, LO_ORDERDATE int, LO_ORDERPRIOTITY string, LO_SHIPPRIOTITY int, LO_QUANTITY int, LO_EXTENDEDPRICE int, LO_ORDTOTALPRICE int, LO_DISCOUNT int, LO_REVENUE int, LO_SUPPLYCOST int, LO_TAX int, LO_COMMITDATE int, LO_SHIPMODE string ) STORED BY 'carbondata'")
cc.sql(s"load data local inpath 'hdfs://test:8020/tmp/lineorder_1w.tbl' into table lineorder3 options('DELIMITER'='|', 'FILEHEADER'='LO_ORDERKEY, LO_LINENUMBER, LO_CUSTKEY, LO_PARTKEY , LO_SUPPKEY , LO_ORDERDATE , LO_ORDERPRIOTITY , LO_SHIPPRIOTITY , LO_QUANTITY ,LO_EXTENDEDPRICE , LO_ORDTOTALPRICE ,LO_DISCOUNT , LO_REVENUE , LO_SUPPLYCOST, LO_TAX, LO_COMMITDATE, LO_SHIPMODE')")
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 2.0 failed 4 times, most recent failure: Lost task 0.3 in stage 2.0 (TID 8, datanode03-bi-dev): java.lang.RuntimeException: Dictionary file lo_orderpriotity is locked for updation. Please try after some time
at scala.sys.package$.error(package.scala:27)
at org.apache.carbondata.spark.rdd. CarbonGlobalDictionaryGenerate RDD$$anon$1.<init>( CarbonGlobalDictionaryRDD. scala:353)
at org.apache.carbondata.spark.rdd. CarbonGlobalDictionaryGenerate RDD.compute( CarbonGlobalDictionaryRDD. scala:293)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD. scala:306)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask. scala:66)
at org.apache.spark.scheduler.Task.run(Task.scala:89)
at org.apache.spark.executor.Executor$TaskRunner.run( Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker( ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run( ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org $apache$spark$scheduler$DAGScheduler$$ failJobAndIndependentStages( DAGScheduler.scala:1431)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$ abortStage$1.apply( DAGScheduler.scala:1419)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$ abortStage$1.apply( DAGScheduler.scala:1418)
at scala.collection.mutable.ResizableArray$class.foreach( ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach( ArrayBuffer.scala:47)
at org.apache.spark.scheduler.DAGScheduler.abortStage( DAGScheduler.scala:1418)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$ handleTaskSetFailed$1.apply( DAGScheduler.scala:799)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$ handleTaskSetFailed$1.apply( DAGScheduler.scala:799)
at scala.Option.foreach(Option.scala:236)
at org.apache.spark.scheduler.DAGScheduler. handleTaskSetFailed( DAGScheduler.scala:799)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop. doOnReceive(DAGScheduler. scala:1640)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop. onReceive(DAGScheduler.scala: 1599)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop. onReceive(DAGScheduler.scala: 1588)
at org.apache.spark.util.EventLoop$$anon$1.run( EventLoop.scala:48)
at org.apache.spark.scheduler.DAGScheduler.runJob( DAGScheduler.scala:620)
at org.apache.spark.SparkContext.runJob(SparkContext.scala: 1832)
at org.apache.spark.SparkContext.runJob(SparkContext.scala: 1845)
at org.apache.spark.SparkContext.runJob(SparkContext.scala: 1858)
at org.apache.spark.SparkContext.runJob(SparkContext.scala: 1929)
at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD. scala:927)
at org.apache.spark.rdd.RDDOperationScope$.withScope( RDDOperationScope.scala:150)
at org.apache.spark.rdd.RDDOperationScope$.withScope( RDDOperationScope.scala:111)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:316)
at org.apache.spark.rdd.RDD.collect(RDD.scala:926)
at org.apache.carbondata.spark.util.GlobalDictionaryUtil$. generateGlobalDictionary( GlobalDictionaryUtil.scala: 800)
at org.apache.spark.sql.execution.command. LoadTableUsingKettle.run( carbonTableSchema.scala:1197)
at org.apache.spark.sql.execution.command.LoadTable. run(carbonTableSchema.scala: 1036)
at org.apache.spark.sql.execution.ExecutedCommand. sideEffectResult$lzycompute( commands.scala:58)
at org.apache.spark.sql.execution.ExecutedCommand. sideEffectResult(commands. scala:56)
at org.apache.spark.sql.execution.ExecutedCommand. doExecute(commands.scala:70)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$ execute$5.apply(SparkPlan. scala:132)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$ execute$5.apply(SparkPlan. scala:130)
at org.apache.spark.rdd.RDDOperationScope$.withScope( RDDOperationScope.scala:150)
at org.apache.spark.sql.execution.SparkPlan.execute( SparkPlan.scala:130)
at org.apache.spark.sql.execution.QueryExecution. toRdd$lzycompute( QueryExecution.scala:55)
at org.apache.spark.sql.execution.QueryExecution. toRdd(QueryExecution.scala:55)
at org.apache.spark.sql.DataFrame.<init>(DataFrame. scala:145)
at org.apache.spark.sql.DataFrame.<init>(DataFrame. scala:130)
at org.apache.carbondata.spark.rdd.CarbonDataFrameRDD.<init>( CarbonDataFrameRDD.scala:23)
at org.apache.spark.sql.CarbonContext.sql( CarbonContext.scala:137)
at $line32.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$ iwC.<init>(<console>:37)
at $line32.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.< init>(<console>:42)
at $line32.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init> (<console>:44)
at $line32.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(< console>:46)
at $line32.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console> :48)
at $line32.$read$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:50)
at $line32.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:52)
at $line32.$read$$iwC$$iwC$$iwC.<init>(<console>:54)
at $line32.$read$$iwC$$iwC.<init>(<console>:56)
at $line32.$read$$iwC.<init>(<console>:58)
at $line32.$read.<init>(<console>:60)
at $line32.$read$.<init>(<console>:64)
at $line32.$read$.<clinit>(<console>)
at $line32.$eval$.<init>(<console>:7)
at $line32.$eval$.<clinit>(<console>)
at $line32.$eval.$print(<console>)
at sun.reflect.NativeMethodAccessorImpl. invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl. invoke( NativeMethodAccessorImpl.java: 57)
at sun.reflect.DelegatingMethodAccessorImpl. invoke( DelegatingMethodAccessorImpl. java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call( SparkIMain.scala:1065)
at org.apache.spark.repl.SparkIMain$Request.loadAndRun( SparkIMain.scala:1346)
at org.apache.spark.repl.SparkIMain.loadAndRunReq$1( SparkIMain.scala:840)
at org.apache.spark.repl.SparkIMain.interpret( SparkIMain.scala:871)
at org.apache.spark.repl.SparkIMain.interpret( SparkIMain.scala:819)
at org.apache.spark.repl.SparkILoop.reallyInterpret$1( SparkILoop.scala:857)
at org.apache.spark.repl.SparkILoop. interpretStartingWith( SparkILoop.scala:902)
at org.apache.spark.repl.SparkILoop.command(SparkILoop. scala:814)
at org.apache.spark.repl.SparkILoop.processLine$1( SparkILoop.scala:657)
at org.apache.spark.repl.SparkILoop.innerLoop$1( SparkILoop.scala:665)
at org.apache.spark.repl.SparkILoop.org $apache$spark$repl$SparkILoop$$loop( SparkILoop.scala:670)
at org.apache.spark.repl.SparkILoop$$anonfun$org$ apache$spark$repl$SparkILoop$$ process$1.apply$mcZ$sp( SparkILoop.scala:997)
at org.apache.spark.repl.SparkILoop$$anonfun$org$ apache$spark$repl$SparkILoop$$ process$1.apply(SparkILoop. scala:945)
at org.apache.spark.repl.SparkILoop$$anonfun$org$ apache$spark$repl$SparkILoop$$ process$1.apply(SparkILoop. scala:945)
at scala.tools.nsc.util.ScalaClassLoader$. savingContextLoader( ScalaClassLoader.scala:135)
at org.apache.spark.repl.SparkILoop.org $apache$spark$repl$SparkILoop$$process( SparkILoop.scala:945)
at org.apache.spark.repl.SparkILoop.process(SparkILoop. scala:1059)
at org.apache.spark.repl.Main$.main(Main.scala:31)
at org.apache.spark.repl.Main.main(Main.scala)
at sun.reflect.NativeMethodAccessorImpl. invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl. invoke( NativeMethodAccessorImpl.java: 57)
at sun.reflect.DelegatingMethodAccessorImpl. invoke( DelegatingMethodAccessorImpl. java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$ deploy$SparkSubmit$$runMain( SparkSubmit.scala:731)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1( SparkSubmit.scala:181)
at org.apache.spark.deploy.SparkSubmit$.submit( SparkSubmit.scala:206)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit. scala:121)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit. scala)
Caused by: java.lang.RuntimeException: Dictionary file lo_orderpriotity is locked for updation. Please try after some time
at scala.sys.package$.error(package.scala:27)
If you reply to this email, your message will be added to the discussion below:http://apache-carbondata-mailing-list-archive.1130556. n5.nabble.com/carbondata-0-2- load-data-failed-in-yarn- molde-tp3908p3973.html
Free forum by Nabble | Edit this page |