hi, dev,
The latest release version apache-carbondata-1.0.0-incubating-rc2 which takes Spark-1.6.2 to build throws exception ` java.lang.ClassNotFoundException: org.apache.carbondata.spark.rdd.CarbonBlockDistinctValuesCombineRDD` when i load data following Quick Start Guide. Env: a. CarbonData-1.0.0-incubating-rc2 b. Spark-1.6.2 c. Hadoop-2.7.1 d. CarbonData on "Spark on YARN" Cluster and run yarn-client mode. any suggestions? Thank you. The exception stack trace as below: -------- ERROR 14-02 12:21:02,005 - main generate global dictionary failed org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 4 times, most recent failure: Lost task 0.3 in stage 0.0 (TID 3, nodemanger): java.lang.ClassNotFoundException: org.apache.carbondata.spark.rdd.CarbonBlockDistinctValuesCombineRDD at org.apache.spark.repl.ExecutorClassLoader.findClass(ExecutorClassLoader.scala:84) at java.lang.ClassLoader.loadClass(ClassLoader.java:425) at java.lang.ClassLoader.loadClass(ClassLoader.java:358) at java.lang.Class.forName0(Native Method) at java.lang.Class.forName(Class.java:274) at org.apache.spark.serializer.JavaDeserializationStream$$anon$1.resolveClass(JavaSerializer.scala:68) at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1612) at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1517) at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1771) at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350) at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1990) at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1915) at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1798) at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350) at java.io.ObjectInputStream.readObject(ObjectInputStream.java:370) at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:76) at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:115) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:64) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) at org.apache.spark.scheduler.Task.run(Task.scala:89) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:227) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1431) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1419) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1418) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1418) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799) at scala.Option.foreach(Option.scala:236) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:799) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1640) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1599) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1588) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:620) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1832) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1845) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1858) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1929) at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:927) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111) at org.apache.spark.rdd.RDD.withScope(RDD.scala:316) at org.apache.spark.rdd.RDD.collect(RDD.scala:926) at org.apache.carbondata.spark.util.GlobalDictionaryUtil$.generateGlobalDictionary(GlobalDictionaryUtil.scala:742) at org.apache.spark.sql.execution.command.LoadTable.run(carbonTableSchema.scala:577) at org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult$lzycompute(commands.scala:58) at org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult(commands.scala:56) at org.apache.spark.sql.execution.ExecutedCommand.doExecute(commands.scala:70) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:132) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:130) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:130) at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:55) at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:55) at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:145) at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:130) at org.apache.spark.sql.CarbonContext.sql(CarbonContext.scala:139) at $line22.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:33) at $line22.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:38) at $line22.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:40) at $line22.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:42) at $line22.$read$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:44) at $line22.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:46) at $line22.$read$$iwC$$iwC$$iwC.<init>(<console>:48) at $line22.$read$$iwC$$iwC.<init>(<console>:50) at $line22.$read$$iwC.<init>(<console>:52) at $line22.$read.<init>(<console>:54) at $line22.$read$.<init>(<console>:58) at $line22.$read$.<clinit>(<console>) at $line22.$eval$.<init>(<console>:7) at $line22.$eval$.<clinit>(<console>) at $line22.$eval.$print(<console>) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065) at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1346) at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840) at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871) at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819) at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:857) at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902) at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814) at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:657) at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:665) at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$loop(SparkILoop.scala:670) at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:997) at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945) at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945) at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135) at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:945) at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1059) at org.apache.spark.repl.Main$.main(Main.scala:31) at org.apache.spark.repl.Main.main(Main.scala) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:735) at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181) at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206) at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121) at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) Caused by: java.lang.ClassNotFoundException: org.apache.carbondata.spark.rdd.CarbonBlockDistinctValuesCombineRDD at org.apache.spark.repl.ExecutorClassLoader.findClass(ExecutorClassLoader.scala:84) at java.lang.ClassLoader.loadClass(ClassLoader.java:425) at java.lang.ClassLoader.loadClass(ClassLoader.java:358) at java.lang.Class.forName0(Native Method) at java.lang.Class.forName(Class.java:274) at org.apache.spark.serializer.JavaDeserializationStream$$anon$1.resolveClass(JavaSerializer.scala:68) at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1612) at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1517) at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1771) at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350) at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1990) at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1915) at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1798) at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350) at java.io.ObjectInputStream.readObject(ObjectInputStream.java:370) at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:76) at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:115) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:64) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) at org.apache.spark.scheduler.Task.run(Task.scala:89) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:227) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) |
Hi,
I think the carbon jar is compiled properly. Can you use any decompiler and decompile carbondata-spark-common-1.1.0-incubating-SNAPSHOT.jar present in spark-common module target folder and check whether the required class file org.apache.carbondata.spark.rdd.CarbonBlockDistinctValuesCombineRDD is present or not. If you are using only the assembly jar then decompile and check in assembly jar. Regards Manish Gupta On Tue, Feb 14, 2017 at 11:19 AM, Xiaoqiao He <[hidden email]> wrote: > hi, dev, > > The latest release version apache-carbondata-1.0.0-incubating-rc2 which > takes Spark-1.6.2 to build throws exception ` > java.lang.ClassNotFoundException: > org.apache.carbondata.spark.rdd.CarbonBlockDistinctValuesCombineRDD` when > i > load data following Quick Start Guide. > > Env: > a. CarbonData-1.0.0-incubating-rc2 > b. Spark-1.6.2 > c. Hadoop-2.7.1 > d. CarbonData on "Spark on YARN" Cluster and run yarn-client mode. > > any suggestions? Thank you. > > The exception stack trace as below: > > -------- > ERROR 14-02 12:21:02,005 - main generate global dictionary failed > org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 > in stage 0.0 failed 4 times, most recent failure: Lost task 0.3 in stage > 0.0 (TID 3, nodemanger): java.lang.ClassNotFoundException: > org.apache.carbondata.spark.rdd.CarbonBlockDistinctValuesCombineRDD > at > org.apache.spark.repl.ExecutorClassLoader.findClass( > ExecutorClassLoader.scala:84) > > at java.lang.ClassLoader.loadClass(ClassLoader.java:425) > at java.lang.ClassLoader.loadClass(ClassLoader.java:358) > at java.lang.Class.forName0(Native Method) > at java.lang.Class.forName(Class.java:274) > at > org.apache.spark.serializer.JavaDeserializationStream$$ > anon$1.resolveClass(JavaSerializer.scala:68) > > at > java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1612) > at > java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1517) > at > java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1771) > at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350) > at > java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1990) > at > java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1915) > at > java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1798) > at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350) > at java.io.ObjectInputStream.readObject(ObjectInputStream.java:370) > at > org.apache.spark.serializer.JavaDeserializationStream. > readObject(JavaSerializer.scala:76) > > at > org.apache.spark.serializer.JavaSerializerInstance. > deserialize(JavaSerializer.scala:115) > > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:64) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) > at org.apache.spark.scheduler.Task.run(Task.scala:89) > at > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:227) > at > java.util.concurrent.ThreadPoolExecutor.runWorker( > ThreadPoolExecutor.java:1145) > > at > java.util.concurrent.ThreadPoolExecutor$Worker.run( > ThreadPoolExecutor.java:615) > > at java.lang.Thread.run(Thread.java:745) > > Driver stacktrace: > at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$ > scheduler$DAGScheduler$$failJobAndIndependentStages( > DAGScheduler.scala:1431) > > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply( > DAGScheduler.scala:1419) > > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply( > DAGScheduler.scala:1418) > > at > scala.collection.mutable.ResizableArray$class.foreach( > ResizableArray.scala:59) > > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) > at > org.apache.spark.scheduler.DAGScheduler.abortStage( > DAGScheduler.scala:1418) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$ > handleTaskSetFailed$1.apply(DAGScheduler.scala:799) > > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$ > handleTaskSetFailed$1.apply(DAGScheduler.scala:799) > > at scala.Option.foreach(Option.scala:236) > at > org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed( > DAGScheduler.scala:799) > > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop. > doOnReceive(DAGScheduler.scala:1640) > > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop. > onReceive(DAGScheduler.scala:1599) > > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop. > onReceive(DAGScheduler.scala:1588) > > at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) > at > org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:620) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1832) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1845) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1858) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1929) > at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:927) > at > org.apache.spark.rdd.RDDOperationScope$.withScope( > RDDOperationScope.scala:150) > > at > org.apache.spark.rdd.RDDOperationScope$.withScope( > RDDOperationScope.scala:111) > > at org.apache.spark.rdd.RDD.withScope(RDD.scala:316) > at org.apache.spark.rdd.RDD.collect(RDD.scala:926) > at > org.apache.carbondata.spark.util.GlobalDictionaryUtil$. > generateGlobalDictionary(GlobalDictionaryUtil.scala:742) > > at > org.apache.spark.sql.execution.command.LoadTable. > run(carbonTableSchema.scala:577) > > at > org.apache.spark.sql.execution.ExecutedCommand. > sideEffectResult$lzycompute(commands.scala:58) > > at > org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult(commands. > scala:56) > > at > org.apache.spark.sql.execution.ExecutedCommand. > doExecute(commands.scala:70) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$ > execute$5.apply(SparkPlan.scala:132) > > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$ > execute$5.apply(SparkPlan.scala:130) > > at > org.apache.spark.rdd.RDDOperationScope$.withScope( > RDDOperationScope.scala:150) > > at > org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:130) > at > org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute( > QueryExecution.scala:55) > > at > org.apache.spark.sql.execution.QueryExecution. > toRdd(QueryExecution.scala:55) > > at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:145) > at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:130) > at org.apache.spark.sql.CarbonContext.sql(CarbonContext.scala:139) > at > $line22.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.< > init>(<console>:33) > > at > $line22.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:38) > at > $line22.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:40) > at $line22.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:42) > at $line22.$read$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:44) > at $line22.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:46) > at $line22.$read$$iwC$$iwC$$iwC.<init>(<console>:48) > at $line22.$read$$iwC$$iwC.<init>(<console>:50) > at $line22.$read$$iwC.<init>(<console>:52) > at $line22.$read.<init>(<console>:54) > at $line22.$read$.<init>(<console>:58) > at $line22.$read$.<clinit>(<console>) > at $line22.$eval$.<init>(<console>:7) > at $line22.$eval$.<clinit>(<console>) > at $line22.$eval.$print(<console>) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java: > 57) > > at > sun.reflect.DelegatingMethodAccessorImpl.invoke( > DelegatingMethodAccessorImpl.java:43) > > at java.lang.reflect.Method.invoke(Method.java:606) > at > org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065) > at > org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1346) > at > org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840) > at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871) > at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819) > at > org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:857) > at > org.apache.spark.repl.SparkILoop.interpretStartingWith( > SparkILoop.scala:902) > > at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814) > at > org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:657) > at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:665) > at org.apache.spark.repl.SparkILoop.org$apache$spark$ > repl$SparkILoop$$loop(SparkILoop.scala:670) > > at > org.apache.spark.repl.SparkILoop$$anonfun$org$ > apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:997) > > at > org.apache.spark.repl.SparkILoop$$anonfun$org$ > apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945) > > at > org.apache.spark.repl.SparkILoop$$anonfun$org$ > apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945) > > at > scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader( > ScalaClassLoader.scala:135) > > at org.apache.spark.repl.SparkILoop.org$apache$spark$ > repl$SparkILoop$$process(SparkILoop.scala:945) > > at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1059) > at org.apache.spark.repl.Main$.main(Main.scala:31) > at org.apache.spark.repl.Main.main(Main.scala) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java: > 57) > > at > sun.reflect.DelegatingMethodAccessorImpl.invoke( > DelegatingMethodAccessorImpl.java:43) > > at java.lang.reflect.Method.invoke(Method.java:606) > at > org.apache.spark.deploy.SparkSubmit$.org$apache$spark$ > deploy$SparkSubmit$$runMain(SparkSubmit.scala:735) > > at > org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181) > at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206) > at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121) > at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) > Caused by: java.lang.ClassNotFoundException: > org.apache.carbondata.spark.rdd.CarbonBlockDistinctValuesCombineRDD > at > org.apache.spark.repl.ExecutorClassLoader.findClass( > ExecutorClassLoader.scala:84) > > at java.lang.ClassLoader.loadClass(ClassLoader.java:425) > at java.lang.ClassLoader.loadClass(ClassLoader.java:358) > at java.lang.Class.forName0(Native Method) > at java.lang.Class.forName(Class.java:274) > at > org.apache.spark.serializer.JavaDeserializationStream$$ > anon$1.resolveClass(JavaSerializer.scala:68) > > at > java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1612) > at > java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1517) > at > java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1771) > at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350) > at > java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1990) > at > java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1915) > at > java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1798) > at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350) > at java.io.ObjectInputStream.readObject(ObjectInputStream.java:370) > at > org.apache.spark.serializer.JavaDeserializationStream. > readObject(JavaSerializer.scala:76) > > at > org.apache.spark.serializer.JavaSerializerInstance. > deserialize(JavaSerializer.scala:115) > > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:64) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) > at org.apache.spark.scheduler.Task.run(Task.scala:89) > at > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:227) > at > java.util.concurrent.ThreadPoolExecutor.runWorker( > ThreadPoolExecutor.java:1145) > > at > java.util.concurrent.ThreadPoolExecutor$Worker.run( > ThreadPoolExecutor.java:615) > > at java.lang.Thread.run(Thread.java:745) > |
hi Manish Gupta,
Thanks for you focus, actually i try to load data following https://github.com/apache/incubator-carbondata/blob/master/docs/quick-start-guide.md for deploying carbondata-1.0.0. 1.when i execute carbondata by `bin/spark-shell`, it throws as above. 2.when i execute carbondata by `bin/spark-shell --jars carbonlib/carbondata_2.10-1.0.0-incubating-shade-hadoop2.7.1.jar`, it throws another exception as below: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 > in stage 0.0 failed 4 times, most recent failure: Lost task 0.3 in stage > 0.0 (TID 3, [task hostname]): org.apache.spark.SparkException: File > ./carbondata_2.10-1.0.0-incubating-shade-hadoop2.7.1.jar exists and does > not match contents of > http://master:50843/jars/carbondata_2.10-1.0.0-incubating-shade-hadoop2.7.1.jar I check the assembly jar and CarbonBlockDistinctValuesCombineRDD is present actually. anyone who meet the same problem? Best Regards, Hexiaoqiao On Wed, Feb 15, 2017 at 12:56 AM, manish gupta <[hidden email]> wrote: > Hi, > > I think the carbon jar is compiled properly. Can you use any decompiler and > decompile carbondata-spark-common-1.1.0-incubating-SNAPSHOT.jar present in > spark-common module target folder and check whether the required class file > org.apache.carbondata.spark.rdd.CarbonBlockDistinctValuesCombineRDD is > present or not. > > If you are using only the assembly jar then decompile and check in assembly > jar. > > Regards > Manish Gupta > > On Tue, Feb 14, 2017 at 11:19 AM, Xiaoqiao He <[hidden email]> wrote: > > > hi, dev, > > > > The latest release version apache-carbondata-1.0.0-incubating-rc2 which > > takes Spark-1.6.2 to build throws exception ` > > java.lang.ClassNotFoundException: > > org.apache.carbondata.spark.rdd.CarbonBlockDistinctValuesCombineRDD` > when > > i > > load data following Quick Start Guide. > > > > Env: > > a. CarbonData-1.0.0-incubating-rc2 > > b. Spark-1.6.2 > > c. Hadoop-2.7.1 > > d. CarbonData on "Spark on YARN" Cluster and run yarn-client mode. > > > > any suggestions? Thank you. > > > > The exception stack trace as below: > > > > -------- > > ERROR 14-02 12:21:02,005 - main generate global dictionary failed > > org.apache.spark.SparkException: Job aborted due to stage failure: Task > 0 > > in stage 0.0 failed 4 times, most recent failure: Lost task 0.3 in stage > > 0.0 (TID 3, nodemanger): java.lang.ClassNotFoundException: > > org.apache.carbondata.spark.rdd.CarbonBlockDistinctValuesCombineRDD > > at > > org.apache.spark.repl.ExecutorClassLoader.findClass( > > ExecutorClassLoader.scala:84) > > > > at java.lang.ClassLoader.loadClass(ClassLoader.java:425) > > at java.lang.ClassLoader.loadClass(ClassLoader.java:358) > > at java.lang.Class.forName0(Native Method) > > at java.lang.Class.forName(Class.java:274) > > at > > org.apache.spark.serializer.JavaDeserializationStream$$ > > anon$1.resolveClass(JavaSerializer.scala:68) > > > > at > > java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1612) > > at > > java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1517) > > at > > java.io.ObjectInputStream.readOrdinaryObject( > ObjectInputStream.java:1771) > > at java.io.ObjectInputStream.readObject0(ObjectInputStream. > java:1350) > > at > > java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1990) > > at > > java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1915) > > at > > java.io.ObjectInputStream.readOrdinaryObject( > ObjectInputStream.java:1798) > > at java.io.ObjectInputStream.readObject0(ObjectInputStream. > java:1350) > > at java.io.ObjectInputStream.readObject(ObjectInputStream.java:370) > > at > > org.apache.spark.serializer.JavaDeserializationStream. > > readObject(JavaSerializer.scala:76) > > > > at > > org.apache.spark.serializer.JavaSerializerInstance. > > deserialize(JavaSerializer.scala:115) > > > > at > > org.apache.spark.scheduler.ShuffleMapTask.runTask( > ShuffleMapTask.scala:64) > > at > > org.apache.spark.scheduler.ShuffleMapTask.runTask( > ShuffleMapTask.scala:41) > > at org.apache.spark.scheduler.Task.run(Task.scala:89) > > at > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:227) > > at > > java.util.concurrent.ThreadPoolExecutor.runWorker( > > ThreadPoolExecutor.java:1145) > > > > at > > java.util.concurrent.ThreadPoolExecutor$Worker.run( > > ThreadPoolExecutor.java:615) > > > > at java.lang.Thread.run(Thread.java:745) > > > > Driver stacktrace: > > at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$ > > scheduler$DAGScheduler$$failJobAndIndependentStages( > > DAGScheduler.scala:1431) > > > > at > > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply( > > DAGScheduler.scala:1419) > > > > at > > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply( > > DAGScheduler.scala:1418) > > > > at > > scala.collection.mutable.ResizableArray$class.foreach( > > ResizableArray.scala:59) > > > > at scala.collection.mutable.ArrayBuffer.foreach( > ArrayBuffer.scala:47) > > at > > org.apache.spark.scheduler.DAGScheduler.abortStage( > > DAGScheduler.scala:1418) > > at > > org.apache.spark.scheduler.DAGScheduler$$anonfun$ > > handleTaskSetFailed$1.apply(DAGScheduler.scala:799) > > > > at > > org.apache.spark.scheduler.DAGScheduler$$anonfun$ > > handleTaskSetFailed$1.apply(DAGScheduler.scala:799) > > > > at scala.Option.foreach(Option.scala:236) > > at > > org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed( > > DAGScheduler.scala:799) > > > > at > > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop. > > doOnReceive(DAGScheduler.scala:1640) > > > > at > > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop. > > onReceive(DAGScheduler.scala:1599) > > > > at > > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop. > > onReceive(DAGScheduler.scala:1588) > > > > at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) > > at > > org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:620) > > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1832) > > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1845) > > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1858) > > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1929) > > at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:927) > > at > > org.apache.spark.rdd.RDDOperationScope$.withScope( > > RDDOperationScope.scala:150) > > > > at > > org.apache.spark.rdd.RDDOperationScope$.withScope( > > RDDOperationScope.scala:111) > > > > at org.apache.spark.rdd.RDD.withScope(RDD.scala:316) > > at org.apache.spark.rdd.RDD.collect(RDD.scala:926) > > at > > org.apache.carbondata.spark.util.GlobalDictionaryUtil$. > > generateGlobalDictionary(GlobalDictionaryUtil.scala:742) > > > > at > > org.apache.spark.sql.execution.command.LoadTable. > > run(carbonTableSchema.scala:577) > > > > at > > org.apache.spark.sql.execution.ExecutedCommand. > > sideEffectResult$lzycompute(commands.scala:58) > > > > at > > org.apache.spark.sql.execution.ExecutedCommand. > sideEffectResult(commands. > > scala:56) > > > > at > > org.apache.spark.sql.execution.ExecutedCommand. > > doExecute(commands.scala:70) > > at > > org.apache.spark.sql.execution.SparkPlan$$anonfun$ > > execute$5.apply(SparkPlan.scala:132) > > > > at > > org.apache.spark.sql.execution.SparkPlan$$anonfun$ > > execute$5.apply(SparkPlan.scala:130) > > > > at > > org.apache.spark.rdd.RDDOperationScope$.withScope( > > RDDOperationScope.scala:150) > > > > at > > org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:130) > > at > > org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute( > > QueryExecution.scala:55) > > > > at > > org.apache.spark.sql.execution.QueryExecution. > > toRdd(QueryExecution.scala:55) > > > > at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:145) > > at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:130) > > at org.apache.spark.sql.CarbonContext.sql(CarbonContext.scala:139) > > at > > $line22.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.< > > init>(<console>:33) > > > > at > > $line22.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init> > (<console>:38) > > at > > $line22.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:40) > > at $line22.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:42) > > at $line22.$read$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:44) > > at $line22.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:46) > > at $line22.$read$$iwC$$iwC$$iwC.<init>(<console>:48) > > at $line22.$read$$iwC$$iwC.<init>(<console>:50) > > at $line22.$read$$iwC.<init>(<console>:52) > > at $line22.$read.<init>(<console>:54) > > at $line22.$read$.<init>(<console>:58) > > at $line22.$read$.<clinit>(<console>) > > at $line22.$eval$.<init>(<console>:7) > > at $line22.$eval$.<clinit>(<console>) > > at $line22.$eval.$print(<console>) > > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > > at > > sun.reflect.NativeMethodAccessorImpl.invoke( > NativeMethodAccessorImpl.java: > > 57) > > > > at > > sun.reflect.DelegatingMethodAccessorImpl.invoke( > > DelegatingMethodAccessorImpl.java:43) > > > > at java.lang.reflect.Method.invoke(Method.java:606) > > at > > org.apache.spark.repl.SparkIMain$ReadEvalPrint.call( > SparkIMain.scala:1065) > > at > > org.apache.spark.repl.SparkIMain$Request.loadAndRun( > SparkIMain.scala:1346) > > at > > org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840) > > at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871) > > at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819) > > at > > org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:857) > > at > > org.apache.spark.repl.SparkILoop.interpretStartingWith( > > SparkILoop.scala:902) > > > > at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814) > > at > > org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:657) > > at org.apache.spark.repl.SparkILoop.innerLoop$1( > SparkILoop.scala:665) > > at org.apache.spark.repl.SparkILoop.org$apache$spark$ > > repl$SparkILoop$$loop(SparkILoop.scala:670) > > > > at > > org.apache.spark.repl.SparkILoop$$anonfun$org$ > > apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp( > SparkILoop.scala:997) > > > > at > > org.apache.spark.repl.SparkILoop$$anonfun$org$ > > apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945) > > > > at > > org.apache.spark.repl.SparkILoop$$anonfun$org$ > > apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945) > > > > at > > scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader( > > ScalaClassLoader.scala:135) > > > > at org.apache.spark.repl.SparkILoop.org$apache$spark$ > > repl$SparkILoop$$process(SparkILoop.scala:945) > > > > at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1059) > > at org.apache.spark.repl.Main$.main(Main.scala:31) > > at org.apache.spark.repl.Main.main(Main.scala) > > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > > at > > sun.reflect.NativeMethodAccessorImpl.invoke( > NativeMethodAccessorImpl.java: > > 57) > > > > at > > sun.reflect.DelegatingMethodAccessorImpl.invoke( > > DelegatingMethodAccessorImpl.java:43) > > > > at java.lang.reflect.Method.invoke(Method.java:606) > > at > > org.apache.spark.deploy.SparkSubmit$.org$apache$spark$ > > deploy$SparkSubmit$$runMain(SparkSubmit.scala:735) > > > > at > > org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181) > > at org.apache.spark.deploy.SparkSubmit$.submit( > SparkSubmit.scala:206) > > at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121) > > at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) > > Caused by: java.lang.ClassNotFoundException: > > org.apache.carbondata.spark.rdd.CarbonBlockDistinctValuesCombineRDD > > at > > org.apache.spark.repl.ExecutorClassLoader.findClass( > > ExecutorClassLoader.scala:84) > > > > at java.lang.ClassLoader.loadClass(ClassLoader.java:425) > > at java.lang.ClassLoader.loadClass(ClassLoader.java:358) > > at java.lang.Class.forName0(Native Method) > > at java.lang.Class.forName(Class.java:274) > > at > > org.apache.spark.serializer.JavaDeserializationStream$$ > > anon$1.resolveClass(JavaSerializer.scala:68) > > > > at > > java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1612) > > at > > java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1517) > > at > > java.io.ObjectInputStream.readOrdinaryObject( > ObjectInputStream.java:1771) > > at java.io.ObjectInputStream.readObject0(ObjectInputStream. > java:1350) > > at > > java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1990) > > at > > java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1915) > > at > > java.io.ObjectInputStream.readOrdinaryObject( > ObjectInputStream.java:1798) > > at java.io.ObjectInputStream.readObject0(ObjectInputStream. > java:1350) > > at java.io.ObjectInputStream.readObject(ObjectInputStream.java:370) > > at > > org.apache.spark.serializer.JavaDeserializationStream. > > readObject(JavaSerializer.scala:76) > > > > at > > org.apache.spark.serializer.JavaSerializerInstance. > > deserialize(JavaSerializer.scala:115) > > > > at > > org.apache.spark.scheduler.ShuffleMapTask.runTask( > ShuffleMapTask.scala:64) > > at > > org.apache.spark.scheduler.ShuffleMapTask.runTask( > ShuffleMapTask.scala:41) > > at org.apache.spark.scheduler.Task.run(Task.scala:89) > > at > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:227) > > at > > java.util.concurrent.ThreadPoolExecutor.runWorker( > > ThreadPoolExecutor.java:1145) > > > > at > > java.util.concurrent.ThreadPoolExecutor$Worker.run( > > ThreadPoolExecutor.java:615) > > > > at java.lang.Thread.run(Thread.java:745) > > > |
Administrator
|
Hi He xiaoqiao
Quick start is local model spark. Your case is yarn cluster , please check : https://github.com/apache/incubator-carbondata/blob/master/docs/installation-guide.md Regards Liang 2017-02-15 3:29 GMT-08:00 Xiaoqiao He <[hidden email]>: > hi Manish Gupta, > > Thanks for you focus, actually i try to load data following > https://github.com/apache/incubator-carbondata/blob/ > master/docs/quick-start-guide.md > for deploying carbondata-1.0.0. > > 1.when i execute carbondata by `bin/spark-shell`, it throws as above. > 2.when i execute carbondata by `bin/spark-shell --jars > carbonlib/carbondata_2.10-1.0.0-incubating-shade-hadoop2.7.1.jar`, it > throws another exception as below: > > org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 > > in stage 0.0 failed 4 times, most recent failure: Lost task 0.3 in stage > > 0.0 (TID 3, [task hostname]): org.apache.spark.SparkException: File > > ./carbondata_2.10-1.0.0-incubating-shade-hadoop2.7.1.jar exists and does > > not match contents of > > http://master:50843/jars/carbondata_2.10-1.0.0- > incubating-shade-hadoop2.7.1.jar > > > I check the assembly jar and CarbonBlockDistinctValuesCombineRDD is > present > actually. > > anyone who meet the same problem? > > Best Regards, > Hexiaoqiao > > > On Wed, Feb 15, 2017 at 12:56 AM, manish gupta <[hidden email]> > wrote: > > > Hi, > > > > I think the carbon jar is compiled properly. Can you use any decompiler > and > > decompile carbondata-spark-common-1.1.0-incubating-SNAPSHOT.jar present > in > > spark-common module target folder and check whether the required class > file > > org.apache.carbondata.spark.rdd.CarbonBlockDistinctValuesCombineRDD is > > present or not. > > > > If you are using only the assembly jar then decompile and check in > assembly > > jar. > > > > Regards > > Manish Gupta > > > > On Tue, Feb 14, 2017 at 11:19 AM, Xiaoqiao He <[hidden email]> > wrote: > > > > > hi, dev, > > > > > > The latest release version apache-carbondata-1.0.0-incubating-rc2 > which > > > takes Spark-1.6.2 to build throws exception ` > > > java.lang.ClassNotFoundException: > > > org.apache.carbondata.spark.rdd.CarbonBlockDistinctValuesCombineRDD` > > when > > > i > > > load data following Quick Start Guide. > > > > > > Env: > > > a. CarbonData-1.0.0-incubating-rc2 > > > b. Spark-1.6.2 > > > c. Hadoop-2.7.1 > > > d. CarbonData on "Spark on YARN" Cluster and run yarn-client mode. > > > > > > any suggestions? Thank you. > > > > > > The exception stack trace as below: > > > > > > -------- > > > ERROR 14-02 12:21:02,005 - main generate global dictionary failed > > > org.apache.spark.SparkException: Job aborted due to stage failure: > Task > > 0 > > > in stage 0.0 failed 4 times, most recent failure: Lost task 0.3 in > stage > > > 0.0 (TID 3, nodemanger): java.lang.ClassNotFoundException: > > > org.apache.carbondata.spark.rdd.CarbonBlockDistinctValuesCombineRDD > > > at > > > org.apache.spark.repl.ExecutorClassLoader.findClass( > > > ExecutorClassLoader.scala:84) > > > > > > at java.lang.ClassLoader.loadClass(ClassLoader.java:425) > > > at java.lang.ClassLoader.loadClass(ClassLoader.java:358) > > > at java.lang.Class.forName0(Native Method) > > > at java.lang.Class.forName(Class.java:274) > > > at > > > org.apache.spark.serializer.JavaDeserializationStream$$ > > > anon$1.resolveClass(JavaSerializer.scala:68) > > > > > > at > > > java.io.ObjectInputStream.readNonProxyDesc( > ObjectInputStream.java:1612) > > > at > > > java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1517) > > > at > > > java.io.ObjectInputStream.readOrdinaryObject( > > ObjectInputStream.java:1771) > > > at java.io.ObjectInputStream.readObject0(ObjectInputStream. > > java:1350) > > > at > > > java.io.ObjectInputStream.defaultReadFields( > ObjectInputStream.java:1990) > > > at > > > java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1915) > > > at > > > java.io.ObjectInputStream.readOrdinaryObject( > > ObjectInputStream.java:1798) > > > at java.io.ObjectInputStream.readObject0(ObjectInputStream. > > java:1350) > > > at java.io.ObjectInputStream.readObject(ObjectInputStream. > java:370) > > > at > > > org.apache.spark.serializer.JavaDeserializationStream. > > > readObject(JavaSerializer.scala:76) > > > > > > at > > > org.apache.spark.serializer.JavaSerializerInstance. > > > deserialize(JavaSerializer.scala:115) > > > > > > at > > > org.apache.spark.scheduler.ShuffleMapTask.runTask( > > ShuffleMapTask.scala:64) > > > at > > > org.apache.spark.scheduler.ShuffleMapTask.runTask( > > ShuffleMapTask.scala:41) > > > at org.apache.spark.scheduler.Task.run(Task.scala:89) > > > at > > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:227) > > > at > > > java.util.concurrent.ThreadPoolExecutor.runWorker( > > > ThreadPoolExecutor.java:1145) > > > > > > at > > > java.util.concurrent.ThreadPoolExecutor$Worker.run( > > > ThreadPoolExecutor.java:615) > > > > > > at java.lang.Thread.run(Thread.java:745) > > > > > > Driver stacktrace: > > > at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$ > > > scheduler$DAGScheduler$$failJobAndIndependentStages( > > > DAGScheduler.scala:1431) > > > > > > at > > > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply( > > > DAGScheduler.scala:1419) > > > > > > at > > > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply( > > > DAGScheduler.scala:1418) > > > > > > at > > > scala.collection.mutable.ResizableArray$class.foreach( > > > ResizableArray.scala:59) > > > > > > at scala.collection.mutable.ArrayBuffer.foreach( > > ArrayBuffer.scala:47) > > > at > > > org.apache.spark.scheduler.DAGScheduler.abortStage( > > > DAGScheduler.scala:1418) > > > at > > > org.apache.spark.scheduler.DAGScheduler$$anonfun$ > > > handleTaskSetFailed$1.apply(DAGScheduler.scala:799) > > > > > > at > > > org.apache.spark.scheduler.DAGScheduler$$anonfun$ > > > handleTaskSetFailed$1.apply(DAGScheduler.scala:799) > > > > > > at scala.Option.foreach(Option.scala:236) > > > at > > > org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed( > > > DAGScheduler.scala:799) > > > > > > at > > > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop. > > > doOnReceive(DAGScheduler.scala:1640) > > > > > > at > > > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop. > > > onReceive(DAGScheduler.scala:1599) > > > > > > at > > > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop. > > > onReceive(DAGScheduler.scala:1588) > > > > > > at org.apache.spark.util.EventLoop$$anon$1.run( > EventLoop.scala:48) > > > at > > > org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:620) > > > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1832) > > > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1845) > > > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1858) > > > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1929) > > > at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD. > scala:927) > > > at > > > org.apache.spark.rdd.RDDOperationScope$.withScope( > > > RDDOperationScope.scala:150) > > > > > > at > > > org.apache.spark.rdd.RDDOperationScope$.withScope( > > > RDDOperationScope.scala:111) > > > > > > at org.apache.spark.rdd.RDD.withScope(RDD.scala:316) > > > at org.apache.spark.rdd.RDD.collect(RDD.scala:926) > > > at > > > org.apache.carbondata.spark.util.GlobalDictionaryUtil$. > > > generateGlobalDictionary(GlobalDictionaryUtil.scala:742) > > > > > > at > > > org.apache.spark.sql.execution.command.LoadTable. > > > run(carbonTableSchema.scala:577) > > > > > > at > > > org.apache.spark.sql.execution.ExecutedCommand. > > > sideEffectResult$lzycompute(commands.scala:58) > > > > > > at > > > org.apache.spark.sql.execution.ExecutedCommand. > > sideEffectResult(commands. > > > scala:56) > > > > > > at > > > org.apache.spark.sql.execution.ExecutedCommand. > > > doExecute(commands.scala:70) > > > at > > > org.apache.spark.sql.execution.SparkPlan$$anonfun$ > > > execute$5.apply(SparkPlan.scala:132) > > > > > > at > > > org.apache.spark.sql.execution.SparkPlan$$anonfun$ > > > execute$5.apply(SparkPlan.scala:130) > > > > > > at > > > org.apache.spark.rdd.RDDOperationScope$.withScope( > > > RDDOperationScope.scala:150) > > > > > > at > > > org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:130) > > > at > > > org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute( > > > QueryExecution.scala:55) > > > > > > at > > > org.apache.spark.sql.execution.QueryExecution. > > > toRdd(QueryExecution.scala:55) > > > > > > at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:145) > > > at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:130) > > > at org.apache.spark.sql.CarbonContext.sql( > CarbonContext.scala:139) > > > at > > > $line22.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.< > > > init>(<console>:33) > > > > > > at > > > $line22.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init> > > (<console>:38) > > > at > > > $line22.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:40) > > > at $line22.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console> > :42) > > > at $line22.$read$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:44) > > > at $line22.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:46) > > > at $line22.$read$$iwC$$iwC$$iwC.<init>(<console>:48) > > > at $line22.$read$$iwC$$iwC.<init>(<console>:50) > > > at $line22.$read$$iwC.<init>(<console>:52) > > > at $line22.$read.<init>(<console>:54) > > > at $line22.$read$.<init>(<console>:58) > > > at $line22.$read$.<clinit>(<console>) > > > at $line22.$eval$.<init>(<console>:7) > > > at $line22.$eval$.<clinit>(<console>) > > > at $line22.$eval.$print(<console>) > > > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > > > at > > > sun.reflect.NativeMethodAccessorImpl.invoke( > > NativeMethodAccessorImpl.java: > > > 57) > > > > > > at > > > sun.reflect.DelegatingMethodAccessorImpl.invoke( > > > DelegatingMethodAccessorImpl.java:43) > > > > > > at java.lang.reflect.Method.invoke(Method.java:606) > > > at > > > org.apache.spark.repl.SparkIMain$ReadEvalPrint.call( > > SparkIMain.scala:1065) > > > at > > > org.apache.spark.repl.SparkIMain$Request.loadAndRun( > > SparkIMain.scala:1346) > > > at > > > org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840) > > > at org.apache.spark.repl.SparkIMain.interpret( > SparkIMain.scala:871) > > > at org.apache.spark.repl.SparkIMain.interpret( > SparkIMain.scala:819) > > > at > > > org.apache.spark.repl.SparkILoop.reallyInterpret$1( > SparkILoop.scala:857) > > > at > > > org.apache.spark.repl.SparkILoop.interpretStartingWith( > > > SparkILoop.scala:902) > > > > > > at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814) > > > at > > > org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:657) > > > at org.apache.spark.repl.SparkILoop.innerLoop$1( > > SparkILoop.scala:665) > > > at org.apache.spark.repl.SparkILoop.org$apache$spark$ > > > repl$SparkILoop$$loop(SparkILoop.scala:670) > > > > > > at > > > org.apache.spark.repl.SparkILoop$$anonfun$org$ > > > apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp( > > SparkILoop.scala:997) > > > > > > at > > > org.apache.spark.repl.SparkILoop$$anonfun$org$ > > > apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945) > > > > > > at > > > org.apache.spark.repl.SparkILoop$$anonfun$org$ > > > apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945) > > > > > > at > > > scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader( > > > ScalaClassLoader.scala:135) > > > > > > at org.apache.spark.repl.SparkILoop.org$apache$spark$ > > > repl$SparkILoop$$process(SparkILoop.scala:945) > > > > > > at org.apache.spark.repl.SparkILoop.process(SparkILoop. > scala:1059) > > > at org.apache.spark.repl.Main$.main(Main.scala:31) > > > at org.apache.spark.repl.Main.main(Main.scala) > > > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > > > at > > > sun.reflect.NativeMethodAccessorImpl.invoke( > > NativeMethodAccessorImpl.java: > > > 57) > > > > > > at > > > sun.reflect.DelegatingMethodAccessorImpl.invoke( > > > DelegatingMethodAccessorImpl.java:43) > > > > > > at java.lang.reflect.Method.invoke(Method.java:606) > > > at > > > org.apache.spark.deploy.SparkSubmit$.org$apache$spark$ > > > deploy$SparkSubmit$$runMain(SparkSubmit.scala:735) > > > > > > at > > > org.apache.spark.deploy.SparkSubmit$.doRunMain$1( > SparkSubmit.scala:181) > > > at org.apache.spark.deploy.SparkSubmit$.submit( > > SparkSubmit.scala:206) > > > at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit. > scala:121) > > > at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) > > > Caused by: java.lang.ClassNotFoundException: > > > org.apache.carbondata.spark.rdd.CarbonBlockDistinctValuesCombineRDD > > > at > > > org.apache.spark.repl.ExecutorClassLoader.findClass( > > > ExecutorClassLoader.scala:84) > > > > > > at java.lang.ClassLoader.loadClass(ClassLoader.java:425) > > > at java.lang.ClassLoader.loadClass(ClassLoader.java:358) > > > at java.lang.Class.forName0(Native Method) > > > at java.lang.Class.forName(Class.java:274) > > > at > > > org.apache.spark.serializer.JavaDeserializationStream$$ > > > anon$1.resolveClass(JavaSerializer.scala:68) > > > > > > at > > > java.io.ObjectInputStream.readNonProxyDesc( > ObjectInputStream.java:1612) > > > at > > > java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1517) > > > at > > > java.io.ObjectInputStream.readOrdinaryObject( > > ObjectInputStream.java:1771) > > > at java.io.ObjectInputStream.readObject0(ObjectInputStream. > > java:1350) > > > at > > > java.io.ObjectInputStream.defaultReadFields( > ObjectInputStream.java:1990) > > > at > > > java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1915) > > > at > > > java.io.ObjectInputStream.readOrdinaryObject( > > ObjectInputStream.java:1798) > > > at java.io.ObjectInputStream.readObject0(ObjectInputStream. > > java:1350) > > > at java.io.ObjectInputStream.readObject(ObjectInputStream. > java:370) > > > at > > > org.apache.spark.serializer.JavaDeserializationStream. > > > readObject(JavaSerializer.scala:76) > > > > > > at > > > org.apache.spark.serializer.JavaSerializerInstance. > > > deserialize(JavaSerializer.scala:115) > > > > > > at > > > org.apache.spark.scheduler.ShuffleMapTask.runTask( > > ShuffleMapTask.scala:64) > > > at > > > org.apache.spark.scheduler.ShuffleMapTask.runTask( > > ShuffleMapTask.scala:41) > > > at org.apache.spark.scheduler.Task.run(Task.scala:89) > > > at > > > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:227) > > > at > > > java.util.concurrent.ThreadPoolExecutor.runWorker( > > > ThreadPoolExecutor.java:1145) > > > > > > at > > > java.util.concurrent.ThreadPoolExecutor$Worker.run( > > > ThreadPoolExecutor.java:615) > > > > > > at java.lang.Thread.run(Thread.java:745) > > > > > > -- Regards Liang |
hi Liang Chen,
Thank for your help. It is true that i install and configure carbondata on "spark on yarn" cluster following installation guide ( https://github.com/apache/incubator-carbondata/blob/master/docs/installation-guide.md#installing-and-configuring-carbondata-on-spark-on-yarn-cluster ). Best Regards, Heixaoqiao On Thu, Feb 16, 2017 at 7:47 AM, Liang Chen <[hidden email]> wrote: > Hi He xiaoqiao > > Quick start is local model spark. > Your case is yarn cluster , please check : > https://github.com/apache/incubator-carbondata/blob/ > master/docs/installation-guide.md > > Regards > Liang > > 2017-02-15 3:29 GMT-08:00 Xiaoqiao He <[hidden email]>: > > > hi Manish Gupta, > > > > Thanks for you focus, actually i try to load data following > > https://github.com/apache/incubator-carbondata/blob/ > > master/docs/quick-start-guide.md > > for deploying carbondata-1.0.0. > > > > 1.when i execute carbondata by `bin/spark-shell`, it throws as above. > > 2.when i execute carbondata by `bin/spark-shell --jars > > carbonlib/carbondata_2.10-1.0.0-incubating-shade-hadoop2.7.1.jar`, it > > throws another exception as below: > > > > org.apache.spark.SparkException: Job aborted due to stage failure: Task > 0 > > > in stage 0.0 failed 4 times, most recent failure: Lost task 0.3 in > stage > > > 0.0 (TID 3, [task hostname]): org.apache.spark.SparkException: File > > > ./carbondata_2.10-1.0.0-incubating-shade-hadoop2.7.1.jar exists and > does > > > not match contents of > > > http://master:50843/jars/carbondata_2.10-1.0.0- > > incubating-shade-hadoop2.7.1.jar > > > > > > I check the assembly jar and CarbonBlockDistinctValuesCombineRDD is > > present > > actually. > > > > anyone who meet the same problem? > > > > Best Regards, > > Hexiaoqiao > > > > > > On Wed, Feb 15, 2017 at 12:56 AM, manish gupta < > [hidden email]> > > wrote: > > > > > Hi, > > > > > > I think the carbon jar is compiled properly. Can you use any decompiler > > and > > > decompile carbondata-spark-common-1.1.0-incubating-SNAPSHOT.jar > present > > in > > > spark-common module target folder and check whether the required class > > file > > > org.apache.carbondata.spark.rdd.CarbonBlockDistinctValuesCombineRDD is > > > present or not. > > > > > > If you are using only the assembly jar then decompile and check in > > assembly > > > jar. > > > > > > Regards > > > Manish Gupta > > > > > > On Tue, Feb 14, 2017 at 11:19 AM, Xiaoqiao He <[hidden email]> > > wrote: > > > > > > > hi, dev, > > > > > > > > The latest release version apache-carbondata-1.0.0-incubating-rc2 > > which > > > > takes Spark-1.6.2 to build throws exception ` > > > > java.lang.ClassNotFoundException: > > > > org.apache.carbondata.spark.rdd.CarbonBlockDistinctValuesCombineRDD` > > > when > > > > i > > > > load data following Quick Start Guide. > > > > > > > > Env: > > > > a. CarbonData-1.0.0-incubating-rc2 > > > > b. Spark-1.6.2 > > > > c. Hadoop-2.7.1 > > > > d. CarbonData on "Spark on YARN" Cluster and run yarn-client mode. > > > > > > > > any suggestions? Thank you. > > > > > > > > The exception stack trace as below: > > > > > > > > -------- > > > > ERROR 14-02 12:21:02,005 - main generate global dictionary failed > > > > org.apache.spark.SparkException: Job aborted due to stage failure: > > Task > > > 0 > > > > in stage 0.0 failed 4 times, most recent failure: Lost task 0.3 in > > stage > > > > 0.0 (TID 3, nodemanger): java.lang.ClassNotFoundException: > > > > org.apache.carbondata.spark.rdd.CarbonBlockDistinctValuesCombineRDD > > > > at > > > > org.apache.spark.repl.ExecutorClassLoader.findClass( > > > > ExecutorClassLoader.scala:84) > > > > > > > > at java.lang.ClassLoader.loadClass(ClassLoader.java:425) > > > > at java.lang.ClassLoader.loadClass(ClassLoader.java:358) > > > > at java.lang.Class.forName0(Native Method) > > > > at java.lang.Class.forName(Class.java:274) > > > > at > > > > org.apache.spark.serializer.JavaDeserializationStream$$ > > > > anon$1.resolveClass(JavaSerializer.scala:68) > > > > > > > > at > > > > java.io.ObjectInputStream.readNonProxyDesc( > > ObjectInputStream.java:1612) > > > > at > > > > java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1517) > > > > at > > > > java.io.ObjectInputStream.readOrdinaryObject( > > > ObjectInputStream.java:1771) > > > > at java.io.ObjectInputStream.readObject0(ObjectInputStream. > > > java:1350) > > > > at > > > > java.io.ObjectInputStream.defaultReadFields( > > ObjectInputStream.java:1990) > > > > at > > > > java.io.ObjectInputStream.readSerialData( > ObjectInputStream.java:1915) > > > > at > > > > java.io.ObjectInputStream.readOrdinaryObject( > > > ObjectInputStream.java:1798) > > > > at java.io.ObjectInputStream.readObject0(ObjectInputStream. > > > java:1350) > > > > at java.io.ObjectInputStream.readObject(ObjectInputStream. > > java:370) > > > > at > > > > org.apache.spark.serializer.JavaDeserializationStream. > > > > readObject(JavaSerializer.scala:76) > > > > > > > > at > > > > org.apache.spark.serializer.JavaSerializerInstance. > > > > deserialize(JavaSerializer.scala:115) > > > > > > > > at > > > > org.apache.spark.scheduler.ShuffleMapTask.runTask( > > > ShuffleMapTask.scala:64) > > > > at > > > > org.apache.spark.scheduler.ShuffleMapTask.runTask( > > > ShuffleMapTask.scala:41) > > > > at org.apache.spark.scheduler.Task.run(Task.scala:89) > > > > at > > > > org.apache.spark.executor.Executor$TaskRunner.run( > Executor.scala:227) > > > > at > > > > java.util.concurrent.ThreadPoolExecutor.runWorker( > > > > ThreadPoolExecutor.java:1145) > > > > > > > > at > > > > java.util.concurrent.ThreadPoolExecutor$Worker.run( > > > > ThreadPoolExecutor.java:615) > > > > > > > > at java.lang.Thread.run(Thread.java:745) > > > > > > > > Driver stacktrace: > > > > at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$ > > > > scheduler$DAGScheduler$$failJobAndIndependentStages( > > > > DAGScheduler.scala:1431) > > > > > > > > at > > > > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply( > > > > DAGScheduler.scala:1419) > > > > > > > > at > > > > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply( > > > > DAGScheduler.scala:1418) > > > > > > > > at > > > > scala.collection.mutable.ResizableArray$class.foreach( > > > > ResizableArray.scala:59) > > > > > > > > at scala.collection.mutable.ArrayBuffer.foreach( > > > ArrayBuffer.scala:47) > > > > at > > > > org.apache.spark.scheduler.DAGScheduler.abortStage( > > > > DAGScheduler.scala:1418) > > > > at > > > > org.apache.spark.scheduler.DAGScheduler$$anonfun$ > > > > handleTaskSetFailed$1.apply(DAGScheduler.scala:799) > > > > > > > > at > > > > org.apache.spark.scheduler.DAGScheduler$$anonfun$ > > > > handleTaskSetFailed$1.apply(DAGScheduler.scala:799) > > > > > > > > at scala.Option.foreach(Option.scala:236) > > > > at > > > > org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed( > > > > DAGScheduler.scala:799) > > > > > > > > at > > > > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop. > > > > doOnReceive(DAGScheduler.scala:1640) > > > > > > > > at > > > > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop. > > > > onReceive(DAGScheduler.scala:1599) > > > > > > > > at > > > > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop. > > > > onReceive(DAGScheduler.scala:1588) > > > > > > > > at org.apache.spark.util.EventLoop$$anon$1.run( > > EventLoop.scala:48) > > > > at > > > > org.apache.spark.scheduler.DAGScheduler.runJob( > DAGScheduler.scala:620) > > > > at org.apache.spark.SparkContext.runJob(SparkContext.scala: > 1832) > > > > at org.apache.spark.SparkContext.runJob(SparkContext.scala: > 1845) > > > > at org.apache.spark.SparkContext.runJob(SparkContext.scala: > 1858) > > > > at org.apache.spark.SparkContext.runJob(SparkContext.scala: > 1929) > > > > at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD. > > scala:927) > > > > at > > > > org.apache.spark.rdd.RDDOperationScope$.withScope( > > > > RDDOperationScope.scala:150) > > > > > > > > at > > > > org.apache.spark.rdd.RDDOperationScope$.withScope( > > > > RDDOperationScope.scala:111) > > > > > > > > at org.apache.spark.rdd.RDD.withScope(RDD.scala:316) > > > > at org.apache.spark.rdd.RDD.collect(RDD.scala:926) > > > > at > > > > org.apache.carbondata.spark.util.GlobalDictionaryUtil$. > > > > generateGlobalDictionary(GlobalDictionaryUtil.scala:742) > > > > > > > > at > > > > org.apache.spark.sql.execution.command.LoadTable. > > > > run(carbonTableSchema.scala:577) > > > > > > > > at > > > > org.apache.spark.sql.execution.ExecutedCommand. > > > > sideEffectResult$lzycompute(commands.scala:58) > > > > > > > > at > > > > org.apache.spark.sql.execution.ExecutedCommand. > > > sideEffectResult(commands. > > > > scala:56) > > > > > > > > at > > > > org.apache.spark.sql.execution.ExecutedCommand. > > > > doExecute(commands.scala:70) > > > > at > > > > org.apache.spark.sql.execution.SparkPlan$$anonfun$ > > > > execute$5.apply(SparkPlan.scala:132) > > > > > > > > at > > > > org.apache.spark.sql.execution.SparkPlan$$anonfun$ > > > > execute$5.apply(SparkPlan.scala:130) > > > > > > > > at > > > > org.apache.spark.rdd.RDDOperationScope$.withScope( > > > > RDDOperationScope.scala:150) > > > > > > > > at > > > > org.apache.spark.sql.execution.SparkPlan.execute( > SparkPlan.scala:130) > > > > at > > > > org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute( > > > > QueryExecution.scala:55) > > > > > > > > at > > > > org.apache.spark.sql.execution.QueryExecution. > > > > toRdd(QueryExecution.scala:55) > > > > > > > > at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:145) > > > > at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:130) > > > > at org.apache.spark.sql.CarbonContext.sql( > > CarbonContext.scala:139) > > > > at > > > > $line22.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.< > > > > init>(<console>:33) > > > > > > > > at > > > > $line22.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init> > > > (<console>:38) > > > > at > > > > $line22.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(< > console>:40) > > > > at $line22.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console> > > :42) > > > > at $line22.$read$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:44) > > > > at $line22.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:46) > > > > at $line22.$read$$iwC$$iwC$$iwC.<init>(<console>:48) > > > > at $line22.$read$$iwC$$iwC.<init>(<console>:50) > > > > at $line22.$read$$iwC.<init>(<console>:52) > > > > at $line22.$read.<init>(<console>:54) > > > > at $line22.$read$.<init>(<console>:58) > > > > at $line22.$read$.<clinit>(<console>) > > > > at $line22.$eval$.<init>(<console>:7) > > > > at $line22.$eval$.<clinit>(<console>) > > > > at $line22.$eval.$print(<console>) > > > > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > > > > at > > > > sun.reflect.NativeMethodAccessorImpl.invoke( > > > NativeMethodAccessorImpl.java: > > > > 57) > > > > > > > > at > > > > sun.reflect.DelegatingMethodAccessorImpl.invoke( > > > > DelegatingMethodAccessorImpl.java:43) > > > > > > > > at java.lang.reflect.Method.invoke(Method.java:606) > > > > at > > > > org.apache.spark.repl.SparkIMain$ReadEvalPrint.call( > > > SparkIMain.scala:1065) > > > > at > > > > org.apache.spark.repl.SparkIMain$Request.loadAndRun( > > > SparkIMain.scala:1346) > > > > at > > > > org.apache.spark.repl.SparkIMain.loadAndRunReq$1( > SparkIMain.scala:840) > > > > at org.apache.spark.repl.SparkIMain.interpret( > > SparkIMain.scala:871) > > > > at org.apache.spark.repl.SparkIMain.interpret( > > SparkIMain.scala:819) > > > > at > > > > org.apache.spark.repl.SparkILoop.reallyInterpret$1( > > SparkILoop.scala:857) > > > > at > > > > org.apache.spark.repl.SparkILoop.interpretStartingWith( > > > > SparkILoop.scala:902) > > > > > > > > at org.apache.spark.repl.SparkILoop.command(SparkILoop. > scala:814) > > > > at > > > > org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:657) > > > > at org.apache.spark.repl.SparkILoop.innerLoop$1( > > > SparkILoop.scala:665) > > > > at org.apache.spark.repl.SparkILoop.org$apache$spark$ > > > > repl$SparkILoop$$loop(SparkILoop.scala:670) > > > > > > > > at > > > > org.apache.spark.repl.SparkILoop$$anonfun$org$ > > > > apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp( > > > SparkILoop.scala:997) > > > > > > > > at > > > > org.apache.spark.repl.SparkILoop$$anonfun$org$ > > > > apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945) > > > > > > > > at > > > > org.apache.spark.repl.SparkILoop$$anonfun$org$ > > > > apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945) > > > > > > > > at > > > > scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader( > > > > ScalaClassLoader.scala:135) > > > > > > > > at org.apache.spark.repl.SparkILoop.org$apache$spark$ > > > > repl$SparkILoop$$process(SparkILoop.scala:945) > > > > > > > > at org.apache.spark.repl.SparkILoop.process(SparkILoop. > > scala:1059) > > > > at org.apache.spark.repl.Main$.main(Main.scala:31) > > > > at org.apache.spark.repl.Main.main(Main.scala) > > > > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > > > > at > > > > sun.reflect.NativeMethodAccessorImpl.invoke( > > > NativeMethodAccessorImpl.java: > > > > 57) > > > > > > > > at > > > > sun.reflect.DelegatingMethodAccessorImpl.invoke( > > > > DelegatingMethodAccessorImpl.java:43) > > > > > > > > at java.lang.reflect.Method.invoke(Method.java:606) > > > > at > > > > org.apache.spark.deploy.SparkSubmit$.org$apache$spark$ > > > > deploy$SparkSubmit$$runMain(SparkSubmit.scala:735) > > > > > > > > at > > > > org.apache.spark.deploy.SparkSubmit$.doRunMain$1( > > SparkSubmit.scala:181) > > > > at org.apache.spark.deploy.SparkSubmit$.submit( > > > SparkSubmit.scala:206) > > > > at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit. > > scala:121) > > > > at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) > > > > Caused by: java.lang.ClassNotFoundException: > > > > org.apache.carbondata.spark.rdd.CarbonBlockDistinctValuesCombineRDD > > > > at > > > > org.apache.spark.repl.ExecutorClassLoader.findClass( > > > > ExecutorClassLoader.scala:84) > > > > > > > > at java.lang.ClassLoader.loadClass(ClassLoader.java:425) > > > > at java.lang.ClassLoader.loadClass(ClassLoader.java:358) > > > > at java.lang.Class.forName0(Native Method) > > > > at java.lang.Class.forName(Class.java:274) > > > > at > > > > org.apache.spark.serializer.JavaDeserializationStream$$ > > > > anon$1.resolveClass(JavaSerializer.scala:68) > > > > > > > > at > > > > java.io.ObjectInputStream.readNonProxyDesc( > > ObjectInputStream.java:1612) > > > > at > > > > java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1517) > > > > at > > > > java.io.ObjectInputStream.readOrdinaryObject( > > > ObjectInputStream.java:1771) > > > > at java.io.ObjectInputStream.readObject0(ObjectInputStream. > > > java:1350) > > > > at > > > > java.io.ObjectInputStream.defaultReadFields( > > ObjectInputStream.java:1990) > > > > at > > > > java.io.ObjectInputStream.readSerialData( > ObjectInputStream.java:1915) > > > > at > > > > java.io.ObjectInputStream.readOrdinaryObject( > > > ObjectInputStream.java:1798) > > > > at java.io.ObjectInputStream.readObject0(ObjectInputStream. > > > java:1350) > > > > at java.io.ObjectInputStream.readObject(ObjectInputStream. > > java:370) > > > > at > > > > org.apache.spark.serializer.JavaDeserializationStream. > > > > readObject(JavaSerializer.scala:76) > > > > > > > > at > > > > org.apache.spark.serializer.JavaSerializerInstance. > > > > deserialize(JavaSerializer.scala:115) > > > > > > > > at > > > > org.apache.spark.scheduler.ShuffleMapTask.runTask( > > > ShuffleMapTask.scala:64) > > > > at > > > > org.apache.spark.scheduler.ShuffleMapTask.runTask( > > > ShuffleMapTask.scala:41) > > > > at org.apache.spark.scheduler.Task.run(Task.scala:89) > > > > at > > > > org.apache.spark.executor.Executor$TaskRunner.run( > Executor.scala:227) > > > > at > > > > java.util.concurrent.ThreadPoolExecutor.runWorker( > > > > ThreadPoolExecutor.java:1145) > > > > > > > > at > > > > java.util.concurrent.ThreadPoolExecutor$Worker.run( > > > > ThreadPoolExecutor.java:615) > > > > > > > > at java.lang.Thread.run(Thread.java:745) > > > > > > > > > > > > > -- > Regards > Liang > |
Hi Xiaoqiao,
Is the problem still exists? Can you try with clean build with "mvn clean -DskipTests -Pspark-1.6 package" command. Regards, Ravindra. On 16 February 2017 at 08:36, Xiaoqiao He <[hidden email]> wrote: > hi Liang Chen, > > Thank for your help. It is true that i install and configure carbondata on > "spark on yarn" cluster following installation guide ( > https://github.com/apache/incubator-carbondata/blob/ > master/docs/installation-guide.md#installing-and- > configuring-carbondata-on-spark-on-yarn-cluster > ). > > Best Regards, > Heixaoqiao > > > On Thu, Feb 16, 2017 at 7:47 AM, Liang Chen <[hidden email]> > wrote: > > > Hi He xiaoqiao > > > > Quick start is local model spark. > > Your case is yarn cluster , please check : > > https://github.com/apache/incubator-carbondata/blob/ > > master/docs/installation-guide.md > > > > Regards > > Liang > > > > 2017-02-15 3:29 GMT-08:00 Xiaoqiao He <[hidden email]>: > > > > > hi Manish Gupta, > > > > > > Thanks for you focus, actually i try to load data following > > > https://github.com/apache/incubator-carbondata/blob/ > > > master/docs/quick-start-guide.md > > > for deploying carbondata-1.0.0. > > > > > > 1.when i execute carbondata by `bin/spark-shell`, it throws as above. > > > 2.when i execute carbondata by `bin/spark-shell --jars > > > carbonlib/carbondata_2.10-1.0.0-incubating-shade-hadoop2.7.1.jar`, it > > > throws another exception as below: > > > > > > org.apache.spark.SparkException: Job aborted due to stage failure: > Task > > 0 > > > > in stage 0.0 failed 4 times, most recent failure: Lost task 0.3 in > > stage > > > > 0.0 (TID 3, [task hostname]): org.apache.spark.SparkException: File > > > > ./carbondata_2.10-1.0.0-incubating-shade-hadoop2.7.1.jar exists and > > does > > > > not match contents of > > > > http://master:50843/jars/carbondata_2.10-1.0.0- > > > incubating-shade-hadoop2.7.1.jar > > > > > > > > > I check the assembly jar and CarbonBlockDistinctValuesCombineRDD is > > > present > > > actually. > > > > > > anyone who meet the same problem? > > > > > > Best Regards, > > > Hexiaoqiao > > > > > > > > > On Wed, Feb 15, 2017 at 12:56 AM, manish gupta < > > [hidden email]> > > > wrote: > > > > > > > Hi, > > > > > > > > I think the carbon jar is compiled properly. Can you use any > decompiler > > > and > > > > decompile carbondata-spark-common-1.1.0-incubating-SNAPSHOT.jar > > present > > > in > > > > spark-common module target folder and check whether the required > class > > > file > > > > org.apache.carbondata.spark.rdd.CarbonBlockDistinctValuesCombineRDD > is > > > > present or not. > > > > > > > > If you are using only the assembly jar then decompile and check in > > > assembly > > > > jar. > > > > > > > > Regards > > > > Manish Gupta > > > > > > > > On Tue, Feb 14, 2017 at 11:19 AM, Xiaoqiao He <[hidden email]> > > > wrote: > > > > > > > > > hi, dev, > > > > > > > > > > The latest release version apache-carbondata-1.0.0-incubating-rc2 > > > which > > > > > takes Spark-1.6.2 to build throws exception ` > > > > > java.lang.ClassNotFoundException: > > > > > org.apache.carbondata.spark.rdd.CarbonBlockDistinctValuesCombi > neRDD` > > > > when > > > > > i > > > > > load data following Quick Start Guide. > > > > > > > > > > Env: > > > > > a. CarbonData-1.0.0-incubating-rc2 > > > > > b. Spark-1.6.2 > > > > > c. Hadoop-2.7.1 > > > > > d. CarbonData on "Spark on YARN" Cluster and run yarn-client mode. > > > > > > > > > > any suggestions? Thank you. > > > > > > > > > > The exception stack trace as below: > > > > > > > > > > -------- > > > > > ERROR 14-02 12:21:02,005 - main generate global dictionary failed > > > > > org.apache.spark.SparkException: Job aborted due to stage failure: > > > Task > > > > 0 > > > > > in stage 0.0 failed 4 times, most recent failure: Lost task 0.3 in > > > stage > > > > > 0.0 (TID 3, nodemanger): java.lang.ClassNotFoundException: > > > > > org.apache.carbondata.spark.rdd.CarbonBlockDistinctValuesCombi > neRDD > > > > > at > > > > > org.apache.spark.repl.ExecutorClassLoader.findClass( > > > > > ExecutorClassLoader.scala:84) > > > > > > > > > > at java.lang.ClassLoader.loadClass(ClassLoader.java:425) > > > > > at java.lang.ClassLoader.loadClass(ClassLoader.java:358) > > > > > at java.lang.Class.forName0(Native Method) > > > > > at java.lang.Class.forName(Class.java:274) > > > > > at > > > > > org.apache.spark.serializer.JavaDeserializationStream$$ > > > > > anon$1.resolveClass(JavaSerializer.scala:68) > > > > > > > > > > at > > > > > java.io.ObjectInputStream.readNonProxyDesc( > > > ObjectInputStream.java:1612) > > > > > at > > > > > java.io.ObjectInputStream.readClassDesc( > ObjectInputStream.java:1517) > > > > > at > > > > > java.io.ObjectInputStream.readOrdinaryObject( > > > > ObjectInputStream.java:1771) > > > > > at java.io.ObjectInputStream.readObject0(ObjectInputStream. > > > > java:1350) > > > > > at > > > > > java.io.ObjectInputStream.defaultReadFields( > > > ObjectInputStream.java:1990) > > > > > at > > > > > java.io.ObjectInputStream.readSerialData( > > ObjectInputStream.java:1915) > > > > > at > > > > > java.io.ObjectInputStream.readOrdinaryObject( > > > > ObjectInputStream.java:1798) > > > > > at java.io.ObjectInputStream.readObject0(ObjectInputStream. > > > > java:1350) > > > > > at java.io.ObjectInputStream.readObject(ObjectInputStream. > > > java:370) > > > > > at > > > > > org.apache.spark.serializer.JavaDeserializationStream. > > > > > readObject(JavaSerializer.scala:76) > > > > > > > > > > at > > > > > org.apache.spark.serializer.JavaSerializerInstance. > > > > > deserialize(JavaSerializer.scala:115) > > > > > > > > > > at > > > > > org.apache.spark.scheduler.ShuffleMapTask.runTask( > > > > ShuffleMapTask.scala:64) > > > > > at > > > > > org.apache.spark.scheduler.ShuffleMapTask.runTask( > > > > ShuffleMapTask.scala:41) > > > > > at org.apache.spark.scheduler.Task.run(Task.scala:89) > > > > > at > > > > > org.apache.spark.executor.Executor$TaskRunner.run( > > Executor.scala:227) > > > > > at > > > > > java.util.concurrent.ThreadPoolExecutor.runWorker( > > > > > ThreadPoolExecutor.java:1145) > > > > > > > > > > at > > > > > java.util.concurrent.ThreadPoolExecutor$Worker.run( > > > > > ThreadPoolExecutor.java:615) > > > > > > > > > > at java.lang.Thread.run(Thread.java:745) > > > > > > > > > > Driver stacktrace: > > > > > at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$ > > > > > scheduler$DAGScheduler$$failJobAndIndependentStages( > > > > > DAGScheduler.scala:1431) > > > > > > > > > > at > > > > > org.apache.spark.scheduler.DAGScheduler$$anonfun$ > abortStage$1.apply( > > > > > DAGScheduler.scala:1419) > > > > > > > > > > at > > > > > org.apache.spark.scheduler.DAGScheduler$$anonfun$ > abortStage$1.apply( > > > > > DAGScheduler.scala:1418) > > > > > > > > > > at > > > > > scala.collection.mutable.ResizableArray$class.foreach( > > > > > ResizableArray.scala:59) > > > > > > > > > > at scala.collection.mutable.ArrayBuffer.foreach( > > > > ArrayBuffer.scala:47) > > > > > at > > > > > org.apache.spark.scheduler.DAGScheduler.abortStage( > > > > > DAGScheduler.scala:1418) > > > > > at > > > > > org.apache.spark.scheduler.DAGScheduler$$anonfun$ > > > > > handleTaskSetFailed$1.apply(DAGScheduler.scala:799) > > > > > > > > > > at > > > > > org.apache.spark.scheduler.DAGScheduler$$anonfun$ > > > > > handleTaskSetFailed$1.apply(DAGScheduler.scala:799) > > > > > > > > > > at scala.Option.foreach(Option.scala:236) > > > > > at > > > > > org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed( > > > > > DAGScheduler.scala:799) > > > > > > > > > > at > > > > > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop. > > > > > doOnReceive(DAGScheduler.scala:1640) > > > > > > > > > > at > > > > > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop. > > > > > onReceive(DAGScheduler.scala:1599) > > > > > > > > > > at > > > > > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop. > > > > > onReceive(DAGScheduler.scala:1588) > > > > > > > > > > at org.apache.spark.util.EventLoop$$anon$1.run( > > > EventLoop.scala:48) > > > > > at > > > > > org.apache.spark.scheduler.DAGScheduler.runJob( > > DAGScheduler.scala:620) > > > > > at org.apache.spark.SparkContext.runJob(SparkContext.scala: > > 1832) > > > > > at org.apache.spark.SparkContext.runJob(SparkContext.scala: > > 1845) > > > > > at org.apache.spark.SparkContext.runJob(SparkContext.scala: > > 1858) > > > > > at org.apache.spark.SparkContext.runJob(SparkContext.scala: > > 1929) > > > > > at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD. > > > scala:927) > > > > > at > > > > > org.apache.spark.rdd.RDDOperationScope$.withScope( > > > > > RDDOperationScope.scala:150) > > > > > > > > > > at > > > > > org.apache.spark.rdd.RDDOperationScope$.withScope( > > > > > RDDOperationScope.scala:111) > > > > > > > > > > at org.apache.spark.rdd.RDD.withScope(RDD.scala:316) > > > > > at org.apache.spark.rdd.RDD.collect(RDD.scala:926) > > > > > at > > > > > org.apache.carbondata.spark.util.GlobalDictionaryUtil$. > > > > > generateGlobalDictionary(GlobalDictionaryUtil.scala:742) > > > > > > > > > > at > > > > > org.apache.spark.sql.execution.command.LoadTable. > > > > > run(carbonTableSchema.scala:577) > > > > > > > > > > at > > > > > org.apache.spark.sql.execution.ExecutedCommand. > > > > > sideEffectResult$lzycompute(commands.scala:58) > > > > > > > > > > at > > > > > org.apache.spark.sql.execution.ExecutedCommand. > > > > sideEffectResult(commands. > > > > > scala:56) > > > > > > > > > > at > > > > > org.apache.spark.sql.execution.ExecutedCommand. > > > > > doExecute(commands.scala:70) > > > > > at > > > > > org.apache.spark.sql.execution.SparkPlan$$anonfun$ > > > > > execute$5.apply(SparkPlan.scala:132) > > > > > > > > > > at > > > > > org.apache.spark.sql.execution.SparkPlan$$anonfun$ > > > > > execute$5.apply(SparkPlan.scala:130) > > > > > > > > > > at > > > > > org.apache.spark.rdd.RDDOperationScope$.withScope( > > > > > RDDOperationScope.scala:150) > > > > > > > > > > at > > > > > org.apache.spark.sql.execution.SparkPlan.execute( > > SparkPlan.scala:130) > > > > > at > > > > > org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute( > > > > > QueryExecution.scala:55) > > > > > > > > > > at > > > > > org.apache.spark.sql.execution.QueryExecution. > > > > > toRdd(QueryExecution.scala:55) > > > > > > > > > > at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:145) > > > > > at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:130) > > > > > at org.apache.spark.sql.CarbonContext.sql( > > > CarbonContext.scala:139) > > > > > at > > > > > $line22.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.< > > > > > init>(<console>:33) > > > > > > > > > > at > > > > > $line22.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init> > > > > (<console>:38) > > > > > at > > > > > $line22.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(< > > console>:40) > > > > > at $line22.$read$$iwC$$iwC$$iwC$$ > iwC$$iwC$$iwC.<init>(<console> > > > :42) > > > > > at $line22.$read$$iwC$$iwC$$iwC$$ > iwC$$iwC.<init>(<console>:44) > > > > > at $line22.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:46) > > > > > at $line22.$read$$iwC$$iwC$$iwC.<init>(<console>:48) > > > > > at $line22.$read$$iwC$$iwC.<init>(<console>:50) > > > > > at $line22.$read$$iwC.<init>(<console>:52) > > > > > at $line22.$read.<init>(<console>:54) > > > > > at $line22.$read$.<init>(<console>:58) > > > > > at $line22.$read$.<clinit>(<console>) > > > > > at $line22.$eval$.<init>(<console>:7) > > > > > at $line22.$eval$.<clinit>(<console>) > > > > > at $line22.$eval.$print(<console>) > > > > > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native > Method) > > > > > at > > > > > sun.reflect.NativeMethodAccessorImpl.invoke( > > > > NativeMethodAccessorImpl.java: > > > > > 57) > > > > > > > > > > at > > > > > sun.reflect.DelegatingMethodAccessorImpl.invoke( > > > > > DelegatingMethodAccessorImpl.java:43) > > > > > > > > > > at java.lang.reflect.Method.invoke(Method.java:606) > > > > > at > > > > > org.apache.spark.repl.SparkIMain$ReadEvalPrint.call( > > > > SparkIMain.scala:1065) > > > > > at > > > > > org.apache.spark.repl.SparkIMain$Request.loadAndRun( > > > > SparkIMain.scala:1346) > > > > > at > > > > > org.apache.spark.repl.SparkIMain.loadAndRunReq$1( > > SparkIMain.scala:840) > > > > > at org.apache.spark.repl.SparkIMain.interpret( > > > SparkIMain.scala:871) > > > > > at org.apache.spark.repl.SparkIMain.interpret( > > > SparkIMain.scala:819) > > > > > at > > > > > org.apache.spark.repl.SparkILoop.reallyInterpret$1( > > > SparkILoop.scala:857) > > > > > at > > > > > org.apache.spark.repl.SparkILoop.interpretStartingWith( > > > > > SparkILoop.scala:902) > > > > > > > > > > at org.apache.spark.repl.SparkILoop.command(SparkILoop. > > scala:814) > > > > > at > > > > > org.apache.spark.repl.SparkILoop.processLine$1( > SparkILoop.scala:657) > > > > > at org.apache.spark.repl.SparkILoop.innerLoop$1( > > > > SparkILoop.scala:665) > > > > > at org.apache.spark.repl.SparkILoop.org$apache$spark$ > > > > > repl$SparkILoop$$loop(SparkILoop.scala:670) > > > > > > > > > > at > > > > > org.apache.spark.repl.SparkILoop$$anonfun$org$ > > > > > apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp( > > > > SparkILoop.scala:997) > > > > > > > > > > at > > > > > org.apache.spark.repl.SparkILoop$$anonfun$org$ > > > > > apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop. > scala:945) > > > > > > > > > > at > > > > > org.apache.spark.repl.SparkILoop$$anonfun$org$ > > > > > apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop. > scala:945) > > > > > > > > > > at > > > > > scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader( > > > > > ScalaClassLoader.scala:135) > > > > > > > > > > at org.apache.spark.repl.SparkILoop.org$apache$spark$ > > > > > repl$SparkILoop$$process(SparkILoop.scala:945) > > > > > > > > > > at org.apache.spark.repl.SparkILoop.process(SparkILoop. > > > scala:1059) > > > > > at org.apache.spark.repl.Main$.main(Main.scala:31) > > > > > at org.apache.spark.repl.Main.main(Main.scala) > > > > > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native > Method) > > > > > at > > > > > sun.reflect.NativeMethodAccessorImpl.invoke( > > > > NativeMethodAccessorImpl.java: > > > > > 57) > > > > > > > > > > at > > > > > sun.reflect.DelegatingMethodAccessorImpl.invoke( > > > > > DelegatingMethodAccessorImpl.java:43) > > > > > > > > > > at java.lang.reflect.Method.invoke(Method.java:606) > > > > > at > > > > > org.apache.spark.deploy.SparkSubmit$.org$apache$spark$ > > > > > deploy$SparkSubmit$$runMain(SparkSubmit.scala:735) > > > > > > > > > > at > > > > > org.apache.spark.deploy.SparkSubmit$.doRunMain$1( > > > SparkSubmit.scala:181) > > > > > at org.apache.spark.deploy.SparkSubmit$.submit( > > > > SparkSubmit.scala:206) > > > > > at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit. > > > scala:121) > > > > > at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit. > scala) > > > > > Caused by: java.lang.ClassNotFoundException: > > > > > org.apache.carbondata.spark.rdd.CarbonBlockDistinctValuesCombi > neRDD > > > > > at > > > > > org.apache.spark.repl.ExecutorClassLoader.findClass( > > > > > ExecutorClassLoader.scala:84) > > > > > > > > > > at java.lang.ClassLoader.loadClass(ClassLoader.java:425) > > > > > at java.lang.ClassLoader.loadClass(ClassLoader.java:358) > > > > > at java.lang.Class.forName0(Native Method) > > > > > at java.lang.Class.forName(Class.java:274) > > > > > at > > > > > org.apache.spark.serializer.JavaDeserializationStream$$ > > > > > anon$1.resolveClass(JavaSerializer.scala:68) > > > > > > > > > > at > > > > > java.io.ObjectInputStream.readNonProxyDesc( > > > ObjectInputStream.java:1612) > > > > > at > > > > > java.io.ObjectInputStream.readClassDesc( > ObjectInputStream.java:1517) > > > > > at > > > > > java.io.ObjectInputStream.readOrdinaryObject( > > > > ObjectInputStream.java:1771) > > > > > at java.io.ObjectInputStream.readObject0(ObjectInputStream. > > > > java:1350) > > > > > at > > > > > java.io.ObjectInputStream.defaultReadFields( > > > ObjectInputStream.java:1990) > > > > > at > > > > > java.io.ObjectInputStream.readSerialData( > > ObjectInputStream.java:1915) > > > > > at > > > > > java.io.ObjectInputStream.readOrdinaryObject( > > > > ObjectInputStream.java:1798) > > > > > at java.io.ObjectInputStream.readObject0(ObjectInputStream. > > > > java:1350) > > > > > at java.io.ObjectInputStream.readObject(ObjectInputStream. > > > java:370) > > > > > at > > > > > org.apache.spark.serializer.JavaDeserializationStream. > > > > > readObject(JavaSerializer.scala:76) > > > > > > > > > > at > > > > > org.apache.spark.serializer.JavaSerializerInstance. > > > > > deserialize(JavaSerializer.scala:115) > > > > > > > > > > at > > > > > org.apache.spark.scheduler.ShuffleMapTask.runTask( > > > > ShuffleMapTask.scala:64) > > > > > at > > > > > org.apache.spark.scheduler.ShuffleMapTask.runTask( > > > > ShuffleMapTask.scala:41) > > > > > at org.apache.spark.scheduler.Task.run(Task.scala:89) > > > > > at > > > > > org.apache.spark.executor.Executor$TaskRunner.run( > > Executor.scala:227) > > > > > at > > > > > java.util.concurrent.ThreadPoolExecutor.runWorker( > > > > > ThreadPoolExecutor.java:1145) > > > > > > > > > > at > > > > > java.util.concurrent.ThreadPoolExecutor$Worker.run( > > > > > ThreadPoolExecutor.java:615) > > > > > > > > > > at java.lang.Thread.run(Thread.java:745) > > > > > > > > > > > > > > > > > > > > -- > > Regards > > Liang > > > -- Thanks & Regards, Ravi |
Hi Ravindra,
Thanks for your suggestions. But another problem met when I create table and load data. 1. I follow README to compile and build CarbonData actually, via https://github.com/apache/incubator-carbondata/blob/master/build/README.md : > mvn -DskipTests -Pspark-1.6 -Dspark.version=1.6.2 clean package 2. I think the exceptions mentioned above (ClassNotFoundException/'exists and does not match'), is related to configuration item of 'spark.executor.extraClassPath'. Since when i trace executor logs, i found it tries to load Class from the same path as spark.executor.extraClassPath config and it can not found local (this local path is valid only for driver), and throw exception. When I remove this item in configuration and run the same command with --jar parameter, then not throw this exception again. 3. but when i create table following quick-start as below: > scala> cc.sql("CREATE TABLE IF NOT EXISTS sample (id string, name string, > city string, age Int) STORED BY 'carbondata'") there is some info logs such as: > INFO 20-02 12:00:35,690 - main Query [CREATE TABLE TEST.SAMPLE USING > CARBONDATA OPTIONS (TABLENAME "TEST.SAMPLE", TABLEPATH > "/HOME/PATH/HEXIAOQIAO/CARBON.STORE/TEST/SAMPLE") ] and* TABLEPATH looks not the proper path (I have no idea why this path is not HDFS path)*, and then load data as blow but another exception throws. > scala> cc.sql("LOAD DATA INPATH > 'hdfs://hacluster/user/hadoop-data/sample.csv' INTO TABLE sample") there is some info logs such as: > INFO 20-02 12:01:27,608 - main HDFS lock > path:hdfs://hacluster/home/path/hexiaoqiao/carbon.store/test/sample/meta.lock *this lock path is not the expected hdfs path, it looks [hdfs scheme://authority] + local setup path of carbondata. (is storelocation not active?)* and throw exception: > INFO 20-02 12:01:42,668 - Table MetaData Unlocked Successfully after data > load > java.lang.RuntimeException: Table is locked for updation. Please try after > some time > at scala.sys.package$.error(package.scala:27) > at > org.apache.spark.sql.execution.command.LoadTable.run(carbonTableSchema.scala:360) > at > org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult$lzycompute(commands.scala:58) > at > org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult(commands.scala:56) > at > org.apache.spark.sql.execution.ExecutedCommand.doExecute(commands.scala:70) ...... CarbonData Configuration: carbon.storelocation=hdfs://hacluster/tmp/carbondata/carbon.store carbon.lock.type=HDFSLOCK FYI. Regards, Hexiaoqiao On Sat, Feb 18, 2017 at 3:26 PM, Ravindra Pesala <[hidden email]> wrote: > Hi Xiaoqiao, > > Is the problem still exists? > Can you try with clean build with "mvn clean -DskipTests -Pspark-1.6 > package" command. > > Regards, > Ravindra. > > On 16 February 2017 at 08:36, Xiaoqiao He <[hidden email]> wrote: > > > hi Liang Chen, > > > > Thank for your help. It is true that i install and configure carbondata > on > > "spark on yarn" cluster following installation guide ( > > https://github.com/apache/incubator-carbondata/blob/ > > master/docs/installation-guide.md#installing-and- > > configuring-carbondata-on-spark-on-yarn-cluster > > ). > > > > Best Regards, > > Heixaoqiao > > > > > > On Thu, Feb 16, 2017 at 7:47 AM, Liang Chen <[hidden email]> > > wrote: > > > > > Hi He xiaoqiao > > > > > > Quick start is local model spark. > > > Your case is yarn cluster , please check : > > > https://github.com/apache/incubator-carbondata/blob/ > > > master/docs/installation-guide.md > > > > > > Regards > > > Liang > > > > > > 2017-02-15 3:29 GMT-08:00 Xiaoqiao He <[hidden email]>: > > > > > > > hi Manish Gupta, > > > > > > > > Thanks for you focus, actually i try to load data following > > > > https://github.com/apache/incubator-carbondata/blob/ > > > > master/docs/quick-start-guide.md > > > > for deploying carbondata-1.0.0. > > > > > > > > 1.when i execute carbondata by `bin/spark-shell`, it throws as above. > > > > 2.when i execute carbondata by `bin/spark-shell --jars > > > > carbonlib/carbondata_2.10-1.0.0-incubating-shade-hadoop2.7.1.jar`, > it > > > > throws another exception as below: > > > > > > > > org.apache.spark.SparkException: Job aborted due to stage failure: > > Task > > > 0 > > > > > in stage 0.0 failed 4 times, most recent failure: Lost task 0.3 in > > > stage > > > > > 0.0 (TID 3, [task hostname]): org.apache.spark.SparkException: > File > > > > > ./carbondata_2.10-1.0.0-incubating-shade-hadoop2.7.1.jar exists > and > > > does > > > > > not match contents of > > > > > http://master:50843/jars/carbondata_2.10-1.0.0- > > > > incubating-shade-hadoop2.7.1.jar > > > > > > > > > > > > I check the assembly jar and CarbonBlockDistinctValuesCombineRDD is > > > > present > > > > actually. > > > > > > > > anyone who meet the same problem? > > > > > > > > Best Regards, > > > > Hexiaoqiao > > > > > > > > > > > > On Wed, Feb 15, 2017 at 12:56 AM, manish gupta < > > > [hidden email]> > > > > wrote: > > > > > > > > > Hi, > > > > > > > > > > I think the carbon jar is compiled properly. Can you use any > > decompiler > > > > and > > > > > decompile carbondata-spark-common-1.1.0-incubating-SNAPSHOT.jar > > > present > > > > in > > > > > spark-common module target folder and check whether the required > > class > > > > file > > > > > org.apache.carbondata.spark.rdd.CarbonBlockDistinctValuesCombi > neRDD > > is > > > > > present or not. > > > > > > > > > > If you are using only the assembly jar then decompile and check in > > > > assembly > > > > > jar. > > > > > > > > > > Regards > > > > > Manish Gupta > > > > > > > > > > On Tue, Feb 14, 2017 at 11:19 AM, Xiaoqiao He <[hidden email] > > > > > > wrote: > > > > > > > > > > > hi, dev, > > > > > > > > > > > > The latest release version apache-carbondata-1.0.0- > incubating-rc2 > > > > which > > > > > > takes Spark-1.6.2 to build throws exception ` > > > > > > java.lang.ClassNotFoundException: > > > > > > org.apache.carbondata.spark.rdd.CarbonBlockDistinctValuesCombi > > neRDD` > > > > > when > > > > > > i > > > > > > load data following Quick Start Guide. > > > > > > > > > > > > Env: > > > > > > a. CarbonData-1.0.0-incubating-rc2 > > > > > > b. Spark-1.6.2 > > > > > > c. Hadoop-2.7.1 > > > > > > d. CarbonData on "Spark on YARN" Cluster and run yarn-client > mode. > > > > > > > > > > > > any suggestions? Thank you. > > > > > > > > > > > > The exception stack trace as below: > > > > > > > > > > > > -------- > > > > > > ERROR 14-02 12:21:02,005 - main generate global dictionary failed > > > > > > org.apache.spark.SparkException: Job aborted due to stage > failure: > > > > Task > > > > > 0 > > > > > > in stage 0.0 failed 4 times, most recent failure: Lost task 0.3 > in > > > > stage > > > > > > 0.0 (TID 3, nodemanger): java.lang.ClassNotFoundException: > > > > > > org.apache.carbondata.spark.rdd.CarbonBlockDistinctValuesCombi > > neRDD > > > > > > at > > > > > > org.apache.spark.repl.ExecutorClassLoader.findClass( > > > > > > ExecutorClassLoader.scala:84) > > > > > > > > > > > > at java.lang.ClassLoader.loadClass(ClassLoader.java:425) > > > > > > at java.lang.ClassLoader.loadClass(ClassLoader.java:358) > > > > > > at java.lang.Class.forName0(Native Method) > > > > > > at java.lang.Class.forName(Class.java:274) > > > > > > at > > > > > > org.apache.spark.serializer.JavaDeserializationStream$$ > > > > > > anon$1.resolveClass(JavaSerializer.scala:68) > > > > > > > > > > > > at > > > > > > java.io.ObjectInputStream.readNonProxyDesc( > > > > ObjectInputStream.java:1612) > > > > > > at > > > > > > java.io.ObjectInputStream.readClassDesc( > > ObjectInputStream.java:1517) > > > > > > at > > > > > > java.io.ObjectInputStream.readOrdinaryObject( > > > > > ObjectInputStream.java:1771) > > > > > > at java.io.ObjectInputStream.readObject0(ObjectInputStream. > > > > > java:1350) > > > > > > at > > > > > > java.io.ObjectInputStream.defaultReadFields( > > > > ObjectInputStream.java:1990) > > > > > > at > > > > > > java.io.ObjectInputStream.readSerialData( > > > ObjectInputStream.java:1915) > > > > > > at > > > > > > java.io.ObjectInputStream.readOrdinaryObject( > > > > > ObjectInputStream.java:1798) > > > > > > at java.io.ObjectInputStream.readObject0(ObjectInputStream. > > > > > java:1350) > > > > > > at java.io.ObjectInputStream.readObject(ObjectInputStream. > > > > java:370) > > > > > > at > > > > > > org.apache.spark.serializer.JavaDeserializationStream. > > > > > > readObject(JavaSerializer.scala:76) > > > > > > > > > > > > at > > > > > > org.apache.spark.serializer.JavaSerializerInstance. > > > > > > deserialize(JavaSerializer.scala:115) > > > > > > > > > > > > at > > > > > > org.apache.spark.scheduler.ShuffleMapTask.runTask( > > > > > ShuffleMapTask.scala:64) > > > > > > at > > > > > > org.apache.spark.scheduler.ShuffleMapTask.runTask( > > > > > ShuffleMapTask.scala:41) > > > > > > at org.apache.spark.scheduler.Task.run(Task.scala:89) > > > > > > at > > > > > > org.apache.spark.executor.Executor$TaskRunner.run( > > > Executor.scala:227) > > > > > > at > > > > > > java.util.concurrent.ThreadPoolExecutor.runWorker( > > > > > > ThreadPoolExecutor.java:1145) > > > > > > > > > > > > at > > > > > > java.util.concurrent.ThreadPoolExecutor$Worker.run( > > > > > > ThreadPoolExecutor.java:615) > > > > > > > > > > > > at java.lang.Thread.run(Thread.java:745) > > > > > > > > > > > > Driver stacktrace: > > > > > > at org.apache.spark.scheduler.DAGScheduler.org > $apache$spark$ > > > > > > scheduler$DAGScheduler$$failJobAndIndependentStages( > > > > > > DAGScheduler.scala:1431) > > > > > > > > > > > > at > > > > > > org.apache.spark.scheduler.DAGScheduler$$anonfun$ > > abortStage$1.apply( > > > > > > DAGScheduler.scala:1419) > > > > > > > > > > > > at > > > > > > org.apache.spark.scheduler.DAGScheduler$$anonfun$ > > abortStage$1.apply( > > > > > > DAGScheduler.scala:1418) > > > > > > > > > > > > at > > > > > > scala.collection.mutable.ResizableArray$class.foreach( > > > > > > ResizableArray.scala:59) > > > > > > > > > > > > at scala.collection.mutable.ArrayBuffer.foreach( > > > > > ArrayBuffer.scala:47) > > > > > > at > > > > > > org.apache.spark.scheduler.DAGScheduler.abortStage( > > > > > > DAGScheduler.scala:1418) > > > > > > at > > > > > > org.apache.spark.scheduler.DAGScheduler$$anonfun$ > > > > > > handleTaskSetFailed$1.apply(DAGScheduler.scala:799) > > > > > > > > > > > > at > > > > > > org.apache.spark.scheduler.DAGScheduler$$anonfun$ > > > > > > handleTaskSetFailed$1.apply(DAGScheduler.scala:799) > > > > > > > > > > > > at scala.Option.foreach(Option.scala:236) > > > > > > at > > > > > > org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed( > > > > > > DAGScheduler.scala:799) > > > > > > > > > > > > at > > > > > > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop. > > > > > > doOnReceive(DAGScheduler.scala:1640) > > > > > > > > > > > > at > > > > > > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop. > > > > > > onReceive(DAGScheduler.scala:1599) > > > > > > > > > > > > at > > > > > > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop. > > > > > > onReceive(DAGScheduler.scala:1588) > > > > > > > > > > > > at org.apache.spark.util.EventLoop$$anon$1.run( > > > > EventLoop.scala:48) > > > > > > at > > > > > > org.apache.spark.scheduler.DAGScheduler.runJob( > > > DAGScheduler.scala:620) > > > > > > at org.apache.spark.SparkContext.runJob(SparkContext.scala: > > > 1832) > > > > > > at org.apache.spark.SparkContext.runJob(SparkContext.scala: > > > 1845) > > > > > > at org.apache.spark.SparkContext.runJob(SparkContext.scala: > > > 1858) > > > > > > at org.apache.spark.SparkContext.runJob(SparkContext.scala: > > > 1929) > > > > > > at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD. > > > > scala:927) > > > > > > at > > > > > > org.apache.spark.rdd.RDDOperationScope$.withScope( > > > > > > RDDOperationScope.scala:150) > > > > > > > > > > > > at > > > > > > org.apache.spark.rdd.RDDOperationScope$.withScope( > > > > > > RDDOperationScope.scala:111) > > > > > > > > > > > > at org.apache.spark.rdd.RDD.withScope(RDD.scala:316) > > > > > > at org.apache.spark.rdd.RDD.collect(RDD.scala:926) > > > > > > at > > > > > > org.apache.carbondata.spark.util.GlobalDictionaryUtil$. > > > > > > generateGlobalDictionary(GlobalDictionaryUtil.scala:742) > > > > > > > > > > > > at > > > > > > org.apache.spark.sql.execution.command.LoadTable. > > > > > > run(carbonTableSchema.scala:577) > > > > > > > > > > > > at > > > > > > org.apache.spark.sql.execution.ExecutedCommand. > > > > > > sideEffectResult$lzycompute(commands.scala:58) > > > > > > > > > > > > at > > > > > > org.apache.spark.sql.execution.ExecutedCommand. > > > > > sideEffectResult(commands. > > > > > > scala:56) > > > > > > > > > > > > at > > > > > > org.apache.spark.sql.execution.ExecutedCommand. > > > > > > doExecute(commands.scala:70) > > > > > > at > > > > > > org.apache.spark.sql.execution.SparkPlan$$anonfun$ > > > > > > execute$5.apply(SparkPlan.scala:132) > > > > > > > > > > > > at > > > > > > org.apache.spark.sql.execution.SparkPlan$$anonfun$ > > > > > > execute$5.apply(SparkPlan.scala:130) > > > > > > > > > > > > at > > > > > > org.apache.spark.rdd.RDDOperationScope$.withScope( > > > > > > RDDOperationScope.scala:150) > > > > > > > > > > > > at > > > > > > org.apache.spark.sql.execution.SparkPlan.execute( > > > SparkPlan.scala:130) > > > > > > at > > > > > > org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute( > > > > > > QueryExecution.scala:55) > > > > > > > > > > > > at > > > > > > org.apache.spark.sql.execution.QueryExecution. > > > > > > toRdd(QueryExecution.scala:55) > > > > > > > > > > > > at org.apache.spark.sql.DataFrame.<init>(DataFrame. > scala:145) > > > > > > at org.apache.spark.sql.DataFrame.<init>(DataFrame. > scala:130) > > > > > > at org.apache.spark.sql.CarbonContext.sql( > > > > CarbonContext.scala:139) > > > > > > at > > > > > > $line22.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.< > > > > > > init>(<console>:33) > > > > > > > > > > > > at > > > > > > $line22.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init> > > > > > (<console>:38) > > > > > > at > > > > > > $line22.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(< > > > console>:40) > > > > > > at $line22.$read$$iwC$$iwC$$iwC$$ > > iwC$$iwC$$iwC.<init>(<console> > > > > :42) > > > > > > at $line22.$read$$iwC$$iwC$$iwC$$ > > iwC$$iwC.<init>(<console>:44) > > > > > > at $line22.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:46) > > > > > > at $line22.$read$$iwC$$iwC$$iwC.<init>(<console>:48) > > > > > > at $line22.$read$$iwC$$iwC.<init>(<console>:50) > > > > > > at $line22.$read$$iwC.<init>(<console>:52) > > > > > > at $line22.$read.<init>(<console>:54) > > > > > > at $line22.$read$.<init>(<console>:58) > > > > > > at $line22.$read$.<clinit>(<console>) > > > > > > at $line22.$eval$.<init>(<console>:7) > > > > > > at $line22.$eval$.<clinit>(<console>) > > > > > > at $line22.$eval.$print(<console>) > > > > > > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native > > Method) > > > > > > at > > > > > > sun.reflect.NativeMethodAccessorImpl.invoke( > > > > > NativeMethodAccessorImpl.java: > > > > > > 57) > > > > > > > > > > > > at > > > > > > sun.reflect.DelegatingMethodAccessorImpl.invoke( > > > > > > DelegatingMethodAccessorImpl.java:43) > > > > > > > > > > > > at java.lang.reflect.Method.invoke(Method.java:606) > > > > > > at > > > > > > org.apache.spark.repl.SparkIMain$ReadEvalPrint.call( > > > > > SparkIMain.scala:1065) > > > > > > at > > > > > > org.apache.spark.repl.SparkIMain$Request.loadAndRun( > > > > > SparkIMain.scala:1346) > > > > > > at > > > > > > org.apache.spark.repl.SparkIMain.loadAndRunReq$1( > > > SparkIMain.scala:840) > > > > > > at org.apache.spark.repl.SparkIMain.interpret( > > > > SparkIMain.scala:871) > > > > > > at org.apache.spark.repl.SparkIMain.interpret( > > > > SparkIMain.scala:819) > > > > > > at > > > > > > org.apache.spark.repl.SparkILoop.reallyInterpret$1( > > > > SparkILoop.scala:857) > > > > > > at > > > > > > org.apache.spark.repl.SparkILoop.interpretStartingWith( > > > > > > SparkILoop.scala:902) > > > > > > > > > > > > at org.apache.spark.repl.SparkILoop.command(SparkILoop. > > > scala:814) > > > > > > at > > > > > > org.apache.spark.repl.SparkILoop.processLine$1( > > SparkILoop.scala:657) > > > > > > at org.apache.spark.repl.SparkILoop.innerLoop$1( > > > > > SparkILoop.scala:665) > > > > > > at org.apache.spark.repl.SparkILoop.org$apache$spark$ > > > > > > repl$SparkILoop$$loop(SparkILoop.scala:670) > > > > > > > > > > > > at > > > > > > org.apache.spark.repl.SparkILoop$$anonfun$org$ > > > > > > apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp( > > > > > SparkILoop.scala:997) > > > > > > > > > > > > at > > > > > > org.apache.spark.repl.SparkILoop$$anonfun$org$ > > > > > > apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop. > > scala:945) > > > > > > > > > > > > at > > > > > > org.apache.spark.repl.SparkILoop$$anonfun$org$ > > > > > > apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop. > > scala:945) > > > > > > > > > > > > at > > > > > > scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader( > > > > > > ScalaClassLoader.scala:135) > > > > > > > > > > > > at org.apache.spark.repl.SparkILoop.org$apache$spark$ > > > > > > repl$SparkILoop$$process(SparkILoop.scala:945) > > > > > > > > > > > > at org.apache.spark.repl.SparkILoop.process(SparkILoop. > > > > scala:1059) > > > > > > at org.apache.spark.repl.Main$.main(Main.scala:31) > > > > > > at org.apache.spark.repl.Main.main(Main.scala) > > > > > > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native > > Method) > > > > > > at > > > > > > sun.reflect.NativeMethodAccessorImpl.invoke( > > > > > NativeMethodAccessorImpl.java: > > > > > > 57) > > > > > > > > > > > > at > > > > > > sun.reflect.DelegatingMethodAccessorImpl.invoke( > > > > > > DelegatingMethodAccessorImpl.java:43) > > > > > > > > > > > > at java.lang.reflect.Method.invoke(Method.java:606) > > > > > > at > > > > > > org.apache.spark.deploy.SparkSubmit$.org$apache$spark$ > > > > > > deploy$SparkSubmit$$runMain(SparkSubmit.scala:735) > > > > > > > > > > > > at > > > > > > org.apache.spark.deploy.SparkSubmit$.doRunMain$1( > > > > SparkSubmit.scala:181) > > > > > > at org.apache.spark.deploy.SparkSubmit$.submit( > > > > > SparkSubmit.scala:206) > > > > > > at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit. > > > > scala:121) > > > > > > at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit. > > scala) > > > > > > Caused by: java.lang.ClassNotFoundException: > > > > > > org.apache.carbondata.spark.rdd.CarbonBlockDistinctValuesCombi > > neRDD > > > > > > at > > > > > > org.apache.spark.repl.ExecutorClassLoader.findClass( > > > > > > ExecutorClassLoader.scala:84) > > > > > > > > > > > > at java.lang.ClassLoader.loadClass(ClassLoader.java:425) > > > > > > at java.lang.ClassLoader.loadClass(ClassLoader.java:358) > > > > > > at java.lang.Class.forName0(Native Method) > > > > > > at java.lang.Class.forName(Class.java:274) > > > > > > at > > > > > > org.apache.spark.serializer.JavaDeserializationStream$$ > > > > > > anon$1.resolveClass(JavaSerializer.scala:68) > > > > > > > > > > > > at > > > > > > java.io.ObjectInputStream.readNonProxyDesc( > > > > ObjectInputStream.java:1612) > > > > > > at > > > > > > java.io.ObjectInputStream.readClassDesc( > > ObjectInputStream.java:1517) > > > > > > at > > > > > > java.io.ObjectInputStream.readOrdinaryObject( > > > > > ObjectInputStream.java:1771) > > > > > > at java.io.ObjectInputStream.readObject0(ObjectInputStream. > > > > > java:1350) > > > > > > at > > > > > > java.io.ObjectInputStream.defaultReadFields( > > > > ObjectInputStream.java:1990) > > > > > > at > > > > > > java.io.ObjectInputStream.readSerialData( > > > ObjectInputStream.java:1915) > > > > > > at > > > > > > java.io.ObjectInputStream.readOrdinaryObject( > > > > > ObjectInputStream.java:1798) > > > > > > at java.io.ObjectInputStream.readObject0(ObjectInputStream. > > > > > java:1350) > > > > > > at java.io.ObjectInputStream.readObject(ObjectInputStream. > > > > java:370) > > > > > > at > > > > > > org.apache.spark.serializer.JavaDeserializationStream. > > > > > > readObject(JavaSerializer.scala:76) > > > > > > > > > > > > at > > > > > > org.apache.spark.serializer.JavaSerializerInstance. > > > > > > deserialize(JavaSerializer.scala:115) > > > > > > > > > > > > at > > > > > > org.apache.spark.scheduler.ShuffleMapTask.runTask( > > > > > ShuffleMapTask.scala:64) > > > > > > at > > > > > > org.apache.spark.scheduler.ShuffleMapTask.runTask( > > > > > ShuffleMapTask.scala:41) > > > > > > at org.apache.spark.scheduler.Task.run(Task.scala:89) > > > > > > at > > > > > > org.apache.spark.executor.Executor$TaskRunner.run( > > > Executor.scala:227) > > > > > > at > > > > > > java.util.concurrent.ThreadPoolExecutor.runWorker( > > > > > > ThreadPoolExecutor.java:1145) > > > > > > > > > > > > at > > > > > > java.util.concurrent.ThreadPoolExecutor$Worker.run( > > > > > > ThreadPoolExecutor.java:615) > > > > > > > > > > > > at java.lang.Thread.run(Thread.java:745) > > > > > > > > > > > > > > > > > > > > > > > > > > > -- > > > Regards > > > Liang > > > > > > > > > -- > Thanks & Regards, > Ravi > |
Hi,
How did you create CarbonContext? Can you check whether you have provided same store path in carbon.properties and the CarbonContext. Regards, Ravindra. On 20 February 2017 at 12:26, Xiaoqiao He <[hidden email]> wrote: > Hi Ravindra, > > Thanks for your suggestions. But another problem met when I create table > and load data. > > 1. I follow README to compile and build CarbonData actually, via > https://github.com/apache/incubator-carbondata/blob/master/build/README.md > : > > > mvn -DskipTests -Pspark-1.6 -Dspark.version=1.6.2 clean package > > > 2. I think the exceptions mentioned above (ClassNotFoundException/'exists > and does not match'), is related to configuration item of > 'spark.executor.extraClassPath'. Since when i trace executor logs, i found > it tries to load Class from the same path as spark.executor.extraClassPath > config and it can not found local (this local path is valid only for > driver), and throw exception. When I remove this item in configuration and > run the same command with --jar parameter, then not throw this exception > again. > > 3. but when i create table following quick-start as below: > > > scala> cc.sql("CREATE TABLE IF NOT EXISTS sample (id string, name string, > > city string, age Int) STORED BY 'carbondata'") > > > there is some info logs such as: > > > INFO 20-02 12:00:35,690 - main Query [CREATE TABLE TEST.SAMPLE USING > > CARBONDATA OPTIONS (TABLENAME "TEST.SAMPLE", TABLEPATH > > "/HOME/PATH/HEXIAOQIAO/CARBON.STORE/TEST/SAMPLE") ] > > and* TABLEPATH looks not the proper path (I have no idea why this path is > not HDFS path)*, and then load data as blow but another exception throws. > > > scala> cc.sql("LOAD DATA INPATH > > 'hdfs://hacluster/user/hadoop-data/sample.csv' INTO TABLE sample") > > > there is some info logs such as: > > > INFO 20-02 12:01:27,608 - main HDFS lock > > path:hdfs://hacluster/home/path/hexiaoqiao/carbon.store/ > test/sample/meta.lock > > *this lock path is not the expected hdfs path, it looks [hdfs > scheme://authority] + local setup path of carbondata. (is storelocation not > active?)* > and throw exception: > > > INFO 20-02 12:01:42,668 - Table MetaData Unlocked Successfully after > data > > load > > java.lang.RuntimeException: Table is locked for updation. Please try > after > > some time > > at scala.sys.package$.error(package.scala:27) > > at > > org.apache.spark.sql.execution.command.LoadTable. > run(carbonTableSchema.scala:360) > > at > > org.apache.spark.sql.execution.ExecutedCommand. > sideEffectResult$lzycompute(commands.scala:58) > > at > > org.apache.spark.sql.execution.ExecutedCommand. > sideEffectResult(commands.scala:56) > > at > > org.apache.spark.sql.execution.ExecutedCommand. > doExecute(commands.scala:70) > > ...... > > > CarbonData Configuration: > carbon.storelocation=hdfs://hacluster/tmp/carbondata/carbon.store > carbon.lock.type=HDFSLOCK > FYI. > > Regards, > Hexiaoqiao > > > On Sat, Feb 18, 2017 at 3:26 PM, Ravindra Pesala <[hidden email]> > wrote: > > > Hi Xiaoqiao, > > > > Is the problem still exists? > > Can you try with clean build with "mvn clean -DskipTests -Pspark-1.6 > > package" command. > > > > Regards, > > Ravindra. > > > > On 16 February 2017 at 08:36, Xiaoqiao He <[hidden email]> wrote: > > > > > hi Liang Chen, > > > > > > Thank for your help. It is true that i install and configure carbondata > > on > > > "spark on yarn" cluster following installation guide ( > > > https://github.com/apache/incubator-carbondata/blob/ > > > master/docs/installation-guide.md#installing-and- > > > configuring-carbondata-on-spark-on-yarn-cluster > > > ). > > > > > > Best Regards, > > > Heixaoqiao > > > > > > > > > On Thu, Feb 16, 2017 at 7:47 AM, Liang Chen <[hidden email]> > > > wrote: > > > > > > > Hi He xiaoqiao > > > > > > > > Quick start is local model spark. > > > > Your case is yarn cluster , please check : > > > > https://github.com/apache/incubator-carbondata/blob/ > > > > master/docs/installation-guide.md > > > > > > > > Regards > > > > Liang > > > > > > > > 2017-02-15 3:29 GMT-08:00 Xiaoqiao He <[hidden email]>: > > > > > > > > > hi Manish Gupta, > > > > > > > > > > Thanks for you focus, actually i try to load data following > > > > > https://github.com/apache/incubator-carbondata/blob/ > > > > > master/docs/quick-start-guide.md > > > > > for deploying carbondata-1.0.0. > > > > > > > > > > 1.when i execute carbondata by `bin/spark-shell`, it throws as > above. > > > > > 2.when i execute carbondata by `bin/spark-shell --jars > > > > > carbonlib/carbondata_2.10-1.0.0-incubating-shade-hadoop2.7.1.jar`, > > it > > > > > throws another exception as below: > > > > > > > > > > org.apache.spark.SparkException: Job aborted due to stage failure: > > > Task > > > > 0 > > > > > > in stage 0.0 failed 4 times, most recent failure: Lost task 0.3 > in > > > > stage > > > > > > 0.0 (TID 3, [task hostname]): org.apache.spark.SparkException: > > File > > > > > > ./carbondata_2.10-1.0.0-incubating-shade-hadoop2.7.1.jar exists > > and > > > > does > > > > > > not match contents of > > > > > > http://master:50843/jars/carbondata_2.10-1.0.0- > > > > > incubating-shade-hadoop2.7.1.jar > > > > > > > > > > > > > > > I check the assembly jar and CarbonBlockDistinctValuesCombineRDD > is > > > > > present > > > > > actually. > > > > > > > > > > anyone who meet the same problem? > > > > > > > > > > Best Regards, > > > > > Hexiaoqiao > > > > > > > > > > > > > > > On Wed, Feb 15, 2017 at 12:56 AM, manish gupta < > > > > [hidden email]> > > > > > wrote: > > > > > > > > > > > Hi, > > > > > > > > > > > > I think the carbon jar is compiled properly. Can you use any > > > decompiler > > > > > and > > > > > > decompile carbondata-spark-common-1.1.0-incubating-SNAPSHOT.jar > > > > present > > > > > in > > > > > > spark-common module target folder and check whether the required > > > class > > > > > file > > > > > > org.apache.carbondata.spark.rdd.CarbonBlockDistinctValuesCombi > > neRDD > > > is > > > > > > present or not. > > > > > > > > > > > > If you are using only the assembly jar then decompile and check > in > > > > > assembly > > > > > > jar. > > > > > > > > > > > > Regards > > > > > > Manish Gupta > > > > > > > > > > > > On Tue, Feb 14, 2017 at 11:19 AM, Xiaoqiao He < > [hidden email] > > > > > > > > wrote: > > > > > > > > > > > > > hi, dev, > > > > > > > > > > > > > > The latest release version apache-carbondata-1.0.0- > > incubating-rc2 > > > > > which > > > > > > > takes Spark-1.6.2 to build throws exception ` > > > > > > > java.lang.ClassNotFoundException: > > > > > > > org.apache.carbondata.spark.rdd.CarbonBlockDistinctValuesCombi > > > neRDD` > > > > > > when > > > > > > > i > > > > > > > load data following Quick Start Guide. > > > > > > > > > > > > > > Env: > > > > > > > a. CarbonData-1.0.0-incubating-rc2 > > > > > > > b. Spark-1.6.2 > > > > > > > c. Hadoop-2.7.1 > > > > > > > d. CarbonData on "Spark on YARN" Cluster and run yarn-client > > mode. > > > > > > > > > > > > > > any suggestions? Thank you. > > > > > > > > > > > > > > The exception stack trace as below: > > > > > > > > > > > > > > -------- > > > > > > > ERROR 14-02 12:21:02,005 - main generate global dictionary > failed > > > > > > > org.apache.spark.SparkException: Job aborted due to stage > > failure: > > > > > Task > > > > > > 0 > > > > > > > in stage 0.0 failed 4 times, most recent failure: Lost task 0.3 > > in > > > > > stage > > > > > > > 0.0 (TID 3, nodemanger): java.lang.ClassNotFoundException: > > > > > > > org.apache.carbondata.spark.rdd.CarbonBlockDistinctValuesCombi > > > neRDD > > > > > > > at > > > > > > > org.apache.spark.repl.ExecutorClassLoader.findClass( > > > > > > > ExecutorClassLoader.scala:84) > > > > > > > > > > > > > > at java.lang.ClassLoader.loadClass(ClassLoader.java:425) > > > > > > > at java.lang.ClassLoader.loadClass(ClassLoader.java:358) > > > > > > > at java.lang.Class.forName0(Native Method) > > > > > > > at java.lang.Class.forName(Class.java:274) > > > > > > > at > > > > > > > org.apache.spark.serializer.JavaDeserializationStream$$ > > > > > > > anon$1.resolveClass(JavaSerializer.scala:68) > > > > > > > > > > > > > > at > > > > > > > java.io.ObjectInputStream.readNonProxyDesc( > > > > > ObjectInputStream.java:1612) > > > > > > > at > > > > > > > java.io.ObjectInputStream.readClassDesc( > > > ObjectInputStream.java:1517) > > > > > > > at > > > > > > > java.io.ObjectInputStream.readOrdinaryObject( > > > > > > ObjectInputStream.java:1771) > > > > > > > at java.io.ObjectInputStream. > readObject0(ObjectInputStream. > > > > > > java:1350) > > > > > > > at > > > > > > > java.io.ObjectInputStream.defaultReadFields( > > > > > ObjectInputStream.java:1990) > > > > > > > at > > > > > > > java.io.ObjectInputStream.readSerialData( > > > > ObjectInputStream.java:1915) > > > > > > > at > > > > > > > java.io.ObjectInputStream.readOrdinaryObject( > > > > > > ObjectInputStream.java:1798) > > > > > > > at java.io.ObjectInputStream. > readObject0(ObjectInputStream. > > > > > > java:1350) > > > > > > > at java.io.ObjectInputStream. > readObject(ObjectInputStream. > > > > > java:370) > > > > > > > at > > > > > > > org.apache.spark.serializer.JavaDeserializationStream. > > > > > > > readObject(JavaSerializer.scala:76) > > > > > > > > > > > > > > at > > > > > > > org.apache.spark.serializer.JavaSerializerInstance. > > > > > > > deserialize(JavaSerializer.scala:115) > > > > > > > > > > > > > > at > > > > > > > org.apache.spark.scheduler.ShuffleMapTask.runTask( > > > > > > ShuffleMapTask.scala:64) > > > > > > > at > > > > > > > org.apache.spark.scheduler.ShuffleMapTask.runTask( > > > > > > ShuffleMapTask.scala:41) > > > > > > > at org.apache.spark.scheduler.Task.run(Task.scala:89) > > > > > > > at > > > > > > > org.apache.spark.executor.Executor$TaskRunner.run( > > > > Executor.scala:227) > > > > > > > at > > > > > > > java.util.concurrent.ThreadPoolExecutor.runWorker( > > > > > > > ThreadPoolExecutor.java:1145) > > > > > > > > > > > > > > at > > > > > > > java.util.concurrent.ThreadPoolExecutor$Worker.run( > > > > > > > ThreadPoolExecutor.java:615) > > > > > > > > > > > > > > at java.lang.Thread.run(Thread.java:745) > > > > > > > > > > > > > > Driver stacktrace: > > > > > > > at org.apache.spark.scheduler.DAGScheduler.org > > $apache$spark$ > > > > > > > scheduler$DAGScheduler$$failJobAndIndependentStages( > > > > > > > DAGScheduler.scala:1431) > > > > > > > > > > > > > > at > > > > > > > org.apache.spark.scheduler.DAGScheduler$$anonfun$ > > > abortStage$1.apply( > > > > > > > DAGScheduler.scala:1419) > > > > > > > > > > > > > > at > > > > > > > org.apache.spark.scheduler.DAGScheduler$$anonfun$ > > > abortStage$1.apply( > > > > > > > DAGScheduler.scala:1418) > > > > > > > > > > > > > > at > > > > > > > scala.collection.mutable.ResizableArray$class.foreach( > > > > > > > ResizableArray.scala:59) > > > > > > > > > > > > > > at scala.collection.mutable.ArrayBuffer.foreach( > > > > > > ArrayBuffer.scala:47) > > > > > > > at > > > > > > > org.apache.spark.scheduler.DAGScheduler.abortStage( > > > > > > > DAGScheduler.scala:1418) > > > > > > > at > > > > > > > org.apache.spark.scheduler.DAGScheduler$$anonfun$ > > > > > > > handleTaskSetFailed$1.apply(DAGScheduler.scala:799) > > > > > > > > > > > > > > at > > > > > > > org.apache.spark.scheduler.DAGScheduler$$anonfun$ > > > > > > > handleTaskSetFailed$1.apply(DAGScheduler.scala:799) > > > > > > > > > > > > > > at scala.Option.foreach(Option.scala:236) > > > > > > > at > > > > > > > org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed( > > > > > > > DAGScheduler.scala:799) > > > > > > > > > > > > > > at > > > > > > > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop. > > > > > > > doOnReceive(DAGScheduler.scala:1640) > > > > > > > > > > > > > > at > > > > > > > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop. > > > > > > > onReceive(DAGScheduler.scala:1599) > > > > > > > > > > > > > > at > > > > > > > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop. > > > > > > > onReceive(DAGScheduler.scala:1588) > > > > > > > > > > > > > > at org.apache.spark.util.EventLoop$$anon$1.run( > > > > > EventLoop.scala:48) > > > > > > > at > > > > > > > org.apache.spark.scheduler.DAGScheduler.runJob( > > > > DAGScheduler.scala:620) > > > > > > > at org.apache.spark.SparkContext. > runJob(SparkContext.scala: > > > > 1832) > > > > > > > at org.apache.spark.SparkContext. > runJob(SparkContext.scala: > > > > 1845) > > > > > > > at org.apache.spark.SparkContext. > runJob(SparkContext.scala: > > > > 1858) > > > > > > > at org.apache.spark.SparkContext. > runJob(SparkContext.scala: > > > > 1929) > > > > > > > at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD. > > > > > scala:927) > > > > > > > at > > > > > > > org.apache.spark.rdd.RDDOperationScope$.withScope( > > > > > > > RDDOperationScope.scala:150) > > > > > > > > > > > > > > at > > > > > > > org.apache.spark.rdd.RDDOperationScope$.withScope( > > > > > > > RDDOperationScope.scala:111) > > > > > > > > > > > > > > at org.apache.spark.rdd.RDD.withScope(RDD.scala:316) > > > > > > > at org.apache.spark.rdd.RDD.collect(RDD.scala:926) > > > > > > > at > > > > > > > org.apache.carbondata.spark.util.GlobalDictionaryUtil$. > > > > > > > generateGlobalDictionary(GlobalDictionaryUtil.scala:742) > > > > > > > > > > > > > > at > > > > > > > org.apache.spark.sql.execution.command.LoadTable. > > > > > > > run(carbonTableSchema.scala:577) > > > > > > > > > > > > > > at > > > > > > > org.apache.spark.sql.execution.ExecutedCommand. > > > > > > > sideEffectResult$lzycompute(commands.scala:58) > > > > > > > > > > > > > > at > > > > > > > org.apache.spark.sql.execution.ExecutedCommand. > > > > > > sideEffectResult(commands. > > > > > > > scala:56) > > > > > > > > > > > > > > at > > > > > > > org.apache.spark.sql.execution.ExecutedCommand. > > > > > > > doExecute(commands.scala:70) > > > > > > > at > > > > > > > org.apache.spark.sql.execution.SparkPlan$$anonfun$ > > > > > > > execute$5.apply(SparkPlan.scala:132) > > > > > > > > > > > > > > at > > > > > > > org.apache.spark.sql.execution.SparkPlan$$anonfun$ > > > > > > > execute$5.apply(SparkPlan.scala:130) > > > > > > > > > > > > > > at > > > > > > > org.apache.spark.rdd.RDDOperationScope$.withScope( > > > > > > > RDDOperationScope.scala:150) > > > > > > > > > > > > > > at > > > > > > > org.apache.spark.sql.execution.SparkPlan.execute( > > > > SparkPlan.scala:130) > > > > > > > at > > > > > > > org.apache.spark.sql.execution.QueryExecution. > toRdd$lzycompute( > > > > > > > QueryExecution.scala:55) > > > > > > > > > > > > > > at > > > > > > > org.apache.spark.sql.execution.QueryExecution. > > > > > > > toRdd(QueryExecution.scala:55) > > > > > > > > > > > > > > at org.apache.spark.sql.DataFrame.<init>(DataFrame. > > scala:145) > > > > > > > at org.apache.spark.sql.DataFrame.<init>(DataFrame. > > scala:130) > > > > > > > at org.apache.spark.sql.CarbonContext.sql( > > > > > CarbonContext.scala:139) > > > > > > > at > > > > > > > $line22.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.< > > > > > > > init>(<console>:33) > > > > > > > > > > > > > > at > > > > > > > $line22.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init> > > > > > > (<console>:38) > > > > > > > at > > > > > > > $line22.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(< > > > > console>:40) > > > > > > > at $line22.$read$$iwC$$iwC$$iwC$$ > > > iwC$$iwC$$iwC.<init>(<console> > > > > > :42) > > > > > > > at $line22.$read$$iwC$$iwC$$iwC$$ > > > iwC$$iwC.<init>(<console>:44) > > > > > > > at $line22.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:46) > > > > > > > at $line22.$read$$iwC$$iwC$$iwC.<init>(<console>:48) > > > > > > > at $line22.$read$$iwC$$iwC.<init>(<console>:50) > > > > > > > at $line22.$read$$iwC.<init>(<console>:52) > > > > > > > at $line22.$read.<init>(<console>:54) > > > > > > > at $line22.$read$.<init>(<console>:58) > > > > > > > at $line22.$read$.<clinit>(<console>) > > > > > > > at $line22.$eval$.<init>(<console>:7) > > > > > > > at $line22.$eval$.<clinit>(<console>) > > > > > > > at $line22.$eval.$print(<console>) > > > > > > > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native > > > Method) > > > > > > > at > > > > > > > sun.reflect.NativeMethodAccessorImpl.invoke( > > > > > > NativeMethodAccessorImpl.java: > > > > > > > 57) > > > > > > > > > > > > > > at > > > > > > > sun.reflect.DelegatingMethodAccessorImpl.invoke( > > > > > > > DelegatingMethodAccessorImpl.java:43) > > > > > > > > > > > > > > at java.lang.reflect.Method.invoke(Method.java:606) > > > > > > > at > > > > > > > org.apache.spark.repl.SparkIMain$ReadEvalPrint.call( > > > > > > SparkIMain.scala:1065) > > > > > > > at > > > > > > > org.apache.spark.repl.SparkIMain$Request.loadAndRun( > > > > > > SparkIMain.scala:1346) > > > > > > > at > > > > > > > org.apache.spark.repl.SparkIMain.loadAndRunReq$1( > > > > SparkIMain.scala:840) > > > > > > > at org.apache.spark.repl.SparkIMain.interpret( > > > > > SparkIMain.scala:871) > > > > > > > at org.apache.spark.repl.SparkIMain.interpret( > > > > > SparkIMain.scala:819) > > > > > > > at > > > > > > > org.apache.spark.repl.SparkILoop.reallyInterpret$1( > > > > > SparkILoop.scala:857) > > > > > > > at > > > > > > > org.apache.spark.repl.SparkILoop.interpretStartingWith( > > > > > > > SparkILoop.scala:902) > > > > > > > > > > > > > > at org.apache.spark.repl.SparkILoop.command(SparkILoop. > > > > scala:814) > > > > > > > at > > > > > > > org.apache.spark.repl.SparkILoop.processLine$1( > > > SparkILoop.scala:657) > > > > > > > at org.apache.spark.repl.SparkILoop.innerLoop$1( > > > > > > SparkILoop.scala:665) > > > > > > > at org.apache.spark.repl.SparkILoop.org$apache$spark$ > > > > > > > repl$SparkILoop$$loop(SparkILoop.scala:670) > > > > > > > > > > > > > > at > > > > > > > org.apache.spark.repl.SparkILoop$$anonfun$org$ > > > > > > > apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp( > > > > > > SparkILoop.scala:997) > > > > > > > > > > > > > > at > > > > > > > org.apache.spark.repl.SparkILoop$$anonfun$org$ > > > > > > > apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop. > > > scala:945) > > > > > > > > > > > > > > at > > > > > > > org.apache.spark.repl.SparkILoop$$anonfun$org$ > > > > > > > apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop. > > > scala:945) > > > > > > > > > > > > > > at > > > > > > > scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader( > > > > > > > ScalaClassLoader.scala:135) > > > > > > > > > > > > > > at org.apache.spark.repl.SparkILoop.org$apache$spark$ > > > > > > > repl$SparkILoop$$process(SparkILoop.scala:945) > > > > > > > > > > > > > > at org.apache.spark.repl.SparkILoop.process(SparkILoop. > > > > > scala:1059) > > > > > > > at org.apache.spark.repl.Main$.main(Main.scala:31) > > > > > > > at org.apache.spark.repl.Main.main(Main.scala) > > > > > > > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native > > > Method) > > > > > > > at > > > > > > > sun.reflect.NativeMethodAccessorImpl.invoke( > > > > > > NativeMethodAccessorImpl.java: > > > > > > > 57) > > > > > > > > > > > > > > at > > > > > > > sun.reflect.DelegatingMethodAccessorImpl.invoke( > > > > > > > DelegatingMethodAccessorImpl.java:43) > > > > > > > > > > > > > > at java.lang.reflect.Method.invoke(Method.java:606) > > > > > > > at > > > > > > > org.apache.spark.deploy.SparkSubmit$.org$apache$spark$ > > > > > > > deploy$SparkSubmit$$runMain(SparkSubmit.scala:735) > > > > > > > > > > > > > > at > > > > > > > org.apache.spark.deploy.SparkSubmit$.doRunMain$1( > > > > > SparkSubmit.scala:181) > > > > > > > at org.apache.spark.deploy.SparkSubmit$.submit( > > > > > > SparkSubmit.scala:206) > > > > > > > at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit. > > > > > scala:121) > > > > > > > at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit. > > > scala) > > > > > > > Caused by: java.lang.ClassNotFoundException: > > > > > > > org.apache.carbondata.spark.rdd.CarbonBlockDistinctValuesCombi > > > neRDD > > > > > > > at > > > > > > > org.apache.spark.repl.ExecutorClassLoader.findClass( > > > > > > > ExecutorClassLoader.scala:84) > > > > > > > > > > > > > > at java.lang.ClassLoader.loadClass(ClassLoader.java:425) > > > > > > > at java.lang.ClassLoader.loadClass(ClassLoader.java:358) > > > > > > > at java.lang.Class.forName0(Native Method) > > > > > > > at java.lang.Class.forName(Class.java:274) > > > > > > > at > > > > > > > org.apache.spark.serializer.JavaDeserializationStream$$ > > > > > > > anon$1.resolveClass(JavaSerializer.scala:68) > > > > > > > > > > > > > > at > > > > > > > java.io.ObjectInputStream.readNonProxyDesc( > > > > > ObjectInputStream.java:1612) > > > > > > > at > > > > > > > java.io.ObjectInputStream.readClassDesc( > > > ObjectInputStream.java:1517) > > > > > > > at > > > > > > > java.io.ObjectInputStream.readOrdinaryObject( > > > > > > ObjectInputStream.java:1771) > > > > > > > at java.io.ObjectInputStream. > readObject0(ObjectInputStream. > > > > > > java:1350) > > > > > > > at > > > > > > > java.io.ObjectInputStream.defaultReadFields( > > > > > ObjectInputStream.java:1990) > > > > > > > at > > > > > > > java.io.ObjectInputStream.readSerialData( > > > > ObjectInputStream.java:1915) > > > > > > > at > > > > > > > java.io.ObjectInputStream.readOrdinaryObject( > > > > > > ObjectInputStream.java:1798) > > > > > > > at java.io.ObjectInputStream. > readObject0(ObjectInputStream. > > > > > > java:1350) > > > > > > > at java.io.ObjectInputStream. > readObject(ObjectInputStream. > > > > > java:370) > > > > > > > at > > > > > > > org.apache.spark.serializer.JavaDeserializationStream. > > > > > > > readObject(JavaSerializer.scala:76) > > > > > > > > > > > > > > at > > > > > > > org.apache.spark.serializer.JavaSerializerInstance. > > > > > > > deserialize(JavaSerializer.scala:115) > > > > > > > > > > > > > > at > > > > > > > org.apache.spark.scheduler.ShuffleMapTask.runTask( > > > > > > ShuffleMapTask.scala:64) > > > > > > > at > > > > > > > org.apache.spark.scheduler.ShuffleMapTask.runTask( > > > > > > ShuffleMapTask.scala:41) > > > > > > > at org.apache.spark.scheduler.Task.run(Task.scala:89) > > > > > > > at > > > > > > > org.apache.spark.executor.Executor$TaskRunner.run( > > > > Executor.scala:227) > > > > > > > at > > > > > > > java.util.concurrent.ThreadPoolExecutor.runWorker( > > > > > > > ThreadPoolExecutor.java:1145) > > > > > > > > > > > > > > at > > > > > > > java.util.concurrent.ThreadPoolExecutor$Worker.run( > > > > > > > ThreadPoolExecutor.java:615) > > > > > > > > > > > > > > at java.lang.Thread.run(Thread.java:745) > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > -- > > > > Regards > > > > Liang > > > > > > > > > > > > > > > -- > > Thanks & Regards, > > Ravi > > > -- Thanks & Regards, Ravi |
Hi,
Please create the carbon context as follows. val cc = new CarbonContext(sc, storeLocation) Here storeLocation is hdfs://hacluster/tmp/carbondata/carbon.store in your case. Regards, Ravindra On 21 February 2017 at 08:30, Ravindra Pesala <[hidden email]> wrote: > Hi, > > How did you create CarbonContext? > Can you check whether you have provided same store path in > carbon.properties and the CarbonContext. > > Regards, > Ravindra. > > On 20 February 2017 at 12:26, Xiaoqiao He <[hidden email]> wrote: > >> Hi Ravindra, >> >> Thanks for your suggestions. But another problem met when I create table >> and load data. >> >> 1. I follow README to compile and build CarbonData actually, via >> https://github.com/apache/incubator-carbondata/blob/master/ >> build/README.md : >> >> > mvn -DskipTests -Pspark-1.6 -Dspark.version=1.6.2 clean package >> >> >> 2. I think the exceptions mentioned above (ClassNotFoundException/'exists >> and does not match'), is related to configuration item of >> 'spark.executor.extraClassPath'. Since when i trace executor logs, i >> found >> it tries to load Class from the same path as spark.executor.extraClassPath >> config and it can not found local (this local path is valid only for >> driver), and throw exception. When I remove this item in configuration and >> run the same command with --jar parameter, then not throw this exception >> again. >> >> 3. but when i create table following quick-start as below: >> >> > scala> cc.sql("CREATE TABLE IF NOT EXISTS sample (id string, name >> string, >> > city string, age Int) STORED BY 'carbondata'") >> >> >> there is some info logs such as: >> >> > INFO 20-02 12:00:35,690 - main Query [CREATE TABLE TEST.SAMPLE USING >> > CARBONDATA OPTIONS (TABLENAME "TEST.SAMPLE", TABLEPATH >> > "/HOME/PATH/HEXIAOQIAO/CARBON.STORE/TEST/SAMPLE") ] >> >> and* TABLEPATH looks not the proper path (I have no idea why this path is >> not HDFS path)*, and then load data as blow but another exception throws. >> >> > scala> cc.sql("LOAD DATA INPATH >> > 'hdfs://hacluster/user/hadoop-data/sample.csv' INTO TABLE sample") >> >> >> there is some info logs such as: >> >> > INFO 20-02 12:01:27,608 - main HDFS lock >> > path:hdfs://hacluster/home/path/hexiaoqiao/carbon.store/test >> /sample/meta.lock >> >> *this lock path is not the expected hdfs path, it looks [hdfs >> scheme://authority] + local setup path of carbondata. (is storelocation >> not >> active?)* >> and throw exception: >> >> > INFO 20-02 12:01:42,668 - Table MetaData Unlocked Successfully after >> data >> > load >> > java.lang.RuntimeException: Table is locked for updation. Please try >> after >> > some time >> > at scala.sys.package$.error(package.scala:27) >> > at >> > org.apache.spark.sql.execution.command.LoadTable.run( >> carbonTableSchema.scala:360) >> > at >> > org.apache.spark.sql.execution.ExecutedCommand.sideEffectRes >> ult$lzycompute(commands.scala:58) >> > at >> > org.apache.spark.sql.execution.ExecutedCommand.sideEffectRes >> ult(commands.scala:56) >> > at >> > org.apache.spark.sql.execution.ExecutedCommand.doExecute( >> commands.scala:70) >> >> ...... >> >> >> CarbonData Configuration: >> carbon.storelocation=hdfs://hacluster/tmp/carbondata/carbon.store >> carbon.lock.type=HDFSLOCK >> FYI. >> >> Regards, >> Hexiaoqiao >> >> >> On Sat, Feb 18, 2017 at 3:26 PM, Ravindra Pesala <[hidden email]> >> wrote: >> >> > Hi Xiaoqiao, >> > >> > Is the problem still exists? >> > Can you try with clean build with "mvn clean -DskipTests -Pspark-1.6 >> > package" command. >> > >> > Regards, >> > Ravindra. >> > >> > On 16 February 2017 at 08:36, Xiaoqiao He <[hidden email]> wrote: >> > >> > > hi Liang Chen, >> > > >> > > Thank for your help. It is true that i install and configure >> carbondata >> > on >> > > "spark on yarn" cluster following installation guide ( >> > > https://github.com/apache/incubator-carbondata/blob/ >> > > master/docs/installation-guide.md#installing-and- >> > > configuring-carbondata-on-spark-on-yarn-cluster >> > > ). >> > > >> > > Best Regards, >> > > Heixaoqiao >> > > >> > > >> > > On Thu, Feb 16, 2017 at 7:47 AM, Liang Chen <[hidden email]> >> > > wrote: >> > > >> > > > Hi He xiaoqiao >> > > > >> > > > Quick start is local model spark. >> > > > Your case is yarn cluster , please check : >> > > > https://github.com/apache/incubator-carbondata/blob/ >> > > > master/docs/installation-guide.md >> > > > >> > > > Regards >> > > > Liang >> > > > >> > > > 2017-02-15 3:29 GMT-08:00 Xiaoqiao He <[hidden email]>: >> > > > >> > > > > hi Manish Gupta, >> > > > > >> > > > > Thanks for you focus, actually i try to load data following >> > > > > https://github.com/apache/incubator-carbondata/blob/ >> > > > > master/docs/quick-start-guide.md >> > > > > for deploying carbondata-1.0.0. >> > > > > >> > > > > 1.when i execute carbondata by `bin/spark-shell`, it throws as >> above. >> > > > > 2.when i execute carbondata by `bin/spark-shell --jars >> > > > > carbonlib/carbondata_2.10-1.0.0-incubating-shade-hadoop2.7.1 >> .jar`, >> > it >> > > > > throws another exception as below: >> > > > > >> > > > > org.apache.spark.SparkException: Job aborted due to stage >> failure: >> > > Task >> > > > 0 >> > > > > > in stage 0.0 failed 4 times, most recent failure: Lost task 0.3 >> in >> > > > stage >> > > > > > 0.0 (TID 3, [task hostname]): org.apache.spark.SparkException: >> > File >> > > > > > ./carbondata_2.10-1.0.0-incubating-shade-hadoop2.7.1.jar exists >> > and >> > > > does >> > > > > > not match contents of >> > > > > > http://master:50843/jars/carbondata_2.10-1.0.0- >> > > > > incubating-shade-hadoop2.7.1.jar >> > > > > >> > > > > >> > > > > I check the assembly jar and CarbonBlockDistinctValuesCombineRDD >> is >> > > > > present >> > > > > actually. >> > > > > >> > > > > anyone who meet the same problem? >> > > > > >> > > > > Best Regards, >> > > > > Hexiaoqiao >> > > > > >> > > > > >> > > > > On Wed, Feb 15, 2017 at 12:56 AM, manish gupta < >> > > > [hidden email]> >> > > > > wrote: >> > > > > >> > > > > > Hi, >> > > > > > >> > > > > > I think the carbon jar is compiled properly. Can you use any >> > > decompiler >> > > > > and >> > > > > > decompile carbondata-spark-common-1.1.0-incubating-SNAPSHOT.jar >> > > > present >> > > > > in >> > > > > > spark-common module target folder and check whether the required >> > > class >> > > > > file >> > > > > > org.apache.carbondata.spark.rdd.CarbonBlockDistinctValuesCombi >> > neRDD >> > > is >> > > > > > present or not. >> > > > > > >> > > > > > If you are using only the assembly jar then decompile and check >> in >> > > > > assembly >> > > > > > jar. >> > > > > > >> > > > > > Regards >> > > > > > Manish Gupta >> > > > > > >> > > > > > On Tue, Feb 14, 2017 at 11:19 AM, Xiaoqiao He < >> [hidden email] >> > > >> > > > > wrote: >> > > > > > >> > > > > > > hi, dev, >> > > > > > > >> > > > > > > The latest release version apache-carbondata-1.0.0- >> > incubating-rc2 >> > > > > which >> > > > > > > takes Spark-1.6.2 to build throws exception ` >> > > > > > > java.lang.ClassNotFoundException: >> > > > > > > org.apache.carbondata.spark.rdd.CarbonBlockDistinctValuesCom >> bi >> > > neRDD` >> > > > > > when >> > > > > > > i >> > > > > > > load data following Quick Start Guide. >> > > > > > > >> > > > > > > Env: >> > > > > > > a. CarbonData-1.0.0-incubating-rc2 >> > > > > > > b. Spark-1.6.2 >> > > > > > > c. Hadoop-2.7.1 >> > > > > > > d. CarbonData on "Spark on YARN" Cluster and run yarn-client >> > mode. >> > > > > > > >> > > > > > > any suggestions? Thank you. >> > > > > > > >> > > > > > > The exception stack trace as below: >> > > > > > > >> > > > > > > -------- >> > > > > > > ERROR 14-02 12:21:02,005 - main generate global dictionary >> failed >> > > > > > > org.apache.spark.SparkException: Job aborted due to stage >> > failure: >> > > > > Task >> > > > > > 0 >> > > > > > > in stage 0.0 failed 4 times, most recent failure: Lost task >> 0.3 >> > in >> > > > > stage >> > > > > > > 0.0 (TID 3, nodemanger): java.lang.ClassNotFoundException: >> > > > > > > org.apache.carbondata.spark.rdd.CarbonBlockDistinctValuesCom >> bi >> > > neRDD >> > > > > > > at >> > > > > > > org.apache.spark.repl.ExecutorClassLoader.findClass( >> > > > > > > ExecutorClassLoader.scala:84) >> > > > > > > >> > > > > > > at java.lang.ClassLoader.loadClass(ClassLoader.java:425) >> > > > > > > at java.lang.ClassLoader.loadClass(ClassLoader.java:358) >> > > > > > > at java.lang.Class.forName0(Native Method) >> > > > > > > at java.lang.Class.forName(Class.java:274) >> > > > > > > at >> > > > > > > org.apache.spark.serializer.JavaDeserializationStream$$ >> > > > > > > anon$1.resolveClass(JavaSerializer.scala:68) >> > > > > > > >> > > > > > > at >> > > > > > > java.io.ObjectInputStream.readNonProxyDesc( >> > > > > ObjectInputStream.java:1612) >> > > > > > > at >> > > > > > > java.io.ObjectInputStream.readClassDesc( >> > > ObjectInputStream.java:1517) >> > > > > > > at >> > > > > > > java.io.ObjectInputStream.readOrdinaryObject( >> > > > > > ObjectInputStream.java:1771) >> > > > > > > at java.io.ObjectInputStream.read >> Object0(ObjectInputStream. >> > > > > > java:1350) >> > > > > > > at >> > > > > > > java.io.ObjectInputStream.defaultReadFields( >> > > > > ObjectInputStream.java:1990) >> > > > > > > at >> > > > > > > java.io.ObjectInputStream.readSerialData( >> > > > ObjectInputStream.java:1915) >> > > > > > > at >> > > > > > > java.io.ObjectInputStream.readOrdinaryObject( >> > > > > > ObjectInputStream.java:1798) >> > > > > > > at java.io.ObjectInputStream.read >> Object0(ObjectInputStream. >> > > > > > java:1350) >> > > > > > > at java.io.ObjectInputStream.read >> Object(ObjectInputStream. >> > > > > java:370) >> > > > > > > at >> > > > > > > org.apache.spark.serializer.JavaDeserializationStream. >> > > > > > > readObject(JavaSerializer.scala:76) >> > > > > > > >> > > > > > > at >> > > > > > > org.apache.spark.serializer.JavaSerializerInstance. >> > > > > > > deserialize(JavaSerializer.scala:115) >> > > > > > > >> > > > > > > at >> > > > > > > org.apache.spark.scheduler.ShuffleMapTask.runTask( >> > > > > > ShuffleMapTask.scala:64) >> > > > > > > at >> > > > > > > org.apache.spark.scheduler.ShuffleMapTask.runTask( >> > > > > > ShuffleMapTask.scala:41) >> > > > > > > at org.apache.spark.scheduler.Task.run(Task.scala:89) >> > > > > > > at >> > > > > > > org.apache.spark.executor.Executor$TaskRunner.run( >> > > > Executor.scala:227) >> > > > > > > at >> > > > > > > java.util.concurrent.ThreadPoolExecutor.runWorker( >> > > > > > > ThreadPoolExecutor.java:1145) >> > > > > > > >> > > > > > > at >> > > > > > > java.util.concurrent.ThreadPoolExecutor$Worker.run( >> > > > > > > ThreadPoolExecutor.java:615) >> > > > > > > >> > > > > > > at java.lang.Thread.run(Thread.java:745) >> > > > > > > >> > > > > > > Driver stacktrace: >> > > > > > > at org.apache.spark.scheduler.DAGScheduler.org >> > $apache$spark$ >> > > > > > > scheduler$DAGScheduler$$failJobAndIndependentStages( >> > > > > > > DAGScheduler.scala:1431) >> > > > > > > >> > > > > > > at >> > > > > > > org.apache.spark.scheduler.DAGScheduler$$anonfun$ >> > > abortStage$1.apply( >> > > > > > > DAGScheduler.scala:1419) >> > > > > > > >> > > > > > > at >> > > > > > > org.apache.spark.scheduler.DAGScheduler$$anonfun$ >> > > abortStage$1.apply( >> > > > > > > DAGScheduler.scala:1418) >> > > > > > > >> > > > > > > at >> > > > > > > scala.collection.mutable.ResizableArray$class.foreach( >> > > > > > > ResizableArray.scala:59) >> > > > > > > >> > > > > > > at scala.collection.mutable.ArrayBuffer.foreach( >> > > > > > ArrayBuffer.scala:47) >> > > > > > > at >> > > > > > > org.apache.spark.scheduler.DAGScheduler.abortStage( >> > > > > > > DAGScheduler.scala:1418) >> > > > > > > at >> > > > > > > org.apache.spark.scheduler.DAGScheduler$$anonfun$ >> > > > > > > handleTaskSetFailed$1.apply(DAGScheduler.scala:799) >> > > > > > > >> > > > > > > at >> > > > > > > org.apache.spark.scheduler.DAGScheduler$$anonfun$ >> > > > > > > handleTaskSetFailed$1.apply(DAGScheduler.scala:799) >> > > > > > > >> > > > > > > at scala.Option.foreach(Option.scala:236) >> > > > > > > at >> > > > > > > org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed( >> > > > > > > DAGScheduler.scala:799) >> > > > > > > >> > > > > > > at >> > > > > > > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop. >> > > > > > > doOnReceive(DAGScheduler.scala:1640) >> > > > > > > >> > > > > > > at >> > > > > > > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop. >> > > > > > > onReceive(DAGScheduler.scala:1599) >> > > > > > > >> > > > > > > at >> > > > > > > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop. >> > > > > > > onReceive(DAGScheduler.scala:1588) >> > > > > > > >> > > > > > > at org.apache.spark.util.EventLoop$$anon$1.run( >> > > > > EventLoop.scala:48) >> > > > > > > at >> > > > > > > org.apache.spark.scheduler.DAGScheduler.runJob( >> > > > DAGScheduler.scala:620) >> > > > > > > at org.apache.spark.SparkContext. >> runJob(SparkContext.scala: >> > > > 1832) >> > > > > > > at org.apache.spark.SparkContext. >> runJob(SparkContext.scala: >> > > > 1845) >> > > > > > > at org.apache.spark.SparkContext. >> runJob(SparkContext.scala: >> > > > 1858) >> > > > > > > at org.apache.spark.SparkContext. >> runJob(SparkContext.scala: >> > > > 1929) >> > > > > > > at org.apache.spark.rdd.RDD$$anon >> fun$collect$1.apply(RDD. >> > > > > scala:927) >> > > > > > > at >> > > > > > > org.apache.spark.rdd.RDDOperationScope$.withScope( >> > > > > > > RDDOperationScope.scala:150) >> > > > > > > >> > > > > > > at >> > > > > > > org.apache.spark.rdd.RDDOperationScope$.withScope( >> > > > > > > RDDOperationScope.scala:111) >> > > > > > > >> > > > > > > at org.apache.spark.rdd.RDD.withScope(RDD.scala:316) >> > > > > > > at org.apache.spark.rdd.RDD.collect(RDD.scala:926) >> > > > > > > at >> > > > > > > org.apache.carbondata.spark.util.GlobalDictionaryUtil$. >> > > > > > > generateGlobalDictionary(GlobalDictionaryUtil.scala:742) >> > > > > > > >> > > > > > > at >> > > > > > > org.apache.spark.sql.execution.command.LoadTable. >> > > > > > > run(carbonTableSchema.scala:577) >> > > > > > > >> > > > > > > at >> > > > > > > org.apache.spark.sql.execution.ExecutedCommand. >> > > > > > > sideEffectResult$lzycompute(commands.scala:58) >> > > > > > > >> > > > > > > at >> > > > > > > org.apache.spark.sql.execution.ExecutedCommand. >> > > > > > sideEffectResult(commands. >> > > > > > > scala:56) >> > > > > > > >> > > > > > > at >> > > > > > > org.apache.spark.sql.execution.ExecutedCommand. >> > > > > > > doExecute(commands.scala:70) >> > > > > > > at >> > > > > > > org.apache.spark.sql.execution.SparkPlan$$anonfun$ >> > > > > > > execute$5.apply(SparkPlan.scala:132) >> > > > > > > >> > > > > > > at >> > > > > > > org.apache.spark.sql.execution.SparkPlan$$anonfun$ >> > > > > > > execute$5.apply(SparkPlan.scala:130) >> > > > > > > >> > > > > > > at >> > > > > > > org.apache.spark.rdd.RDDOperationScope$.withScope( >> > > > > > > RDDOperationScope.scala:150) >> > > > > > > >> > > > > > > at >> > > > > > > org.apache.spark.sql.execution.SparkPlan.execute( >> > > > SparkPlan.scala:130) >> > > > > > > at >> > > > > > > org.apache.spark.sql.execution.QueryExecution.toRdd$ >> lzycompute( >> > > > > > > QueryExecution.scala:55) >> > > > > > > >> > > > > > > at >> > > > > > > org.apache.spark.sql.execution.QueryExecution. >> > > > > > > toRdd(QueryExecution.scala:55) >> > > > > > > >> > > > > > > at org.apache.spark.sql.DataFrame.<init>(DataFrame. >> > scala:145) >> > > > > > > at org.apache.spark.sql.DataFrame.<init>(DataFrame. >> > scala:130) >> > > > > > > at org.apache.spark.sql.CarbonContext.sql( >> > > > > CarbonContext.scala:139) >> > > > > > > at >> > > > > > > $line22.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.< >> > > > > > > init>(<console>:33) >> > > > > > > >> > > > > > > at >> > > > > > > $line22.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init> >> > > > > > (<console>:38) >> > > > > > > at >> > > > > > > $line22.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(< >> > > > console>:40) >> > > > > > > at $line22.$read$$iwC$$iwC$$iwC$$ >> > > iwC$$iwC$$iwC.<init>(<console> >> > > > > :42) >> > > > > > > at $line22.$read$$iwC$$iwC$$iwC$$ >> > > iwC$$iwC.<init>(<console>:44) >> > > > > > > at $line22.$read$$iwC$$iwC$$iwC$$ >> iwC.<init>(<console>:46) >> > > > > > > at $line22.$read$$iwC$$iwC$$iwC.<init>(<console>:48) >> > > > > > > at $line22.$read$$iwC$$iwC.<init>(<console>:50) >> > > > > > > at $line22.$read$$iwC.<init>(<console>:52) >> > > > > > > at $line22.$read.<init>(<console>:54) >> > > > > > > at $line22.$read$.<init>(<console>:58) >> > > > > > > at $line22.$read$.<clinit>(<console>) >> > > > > > > at $line22.$eval$.<init>(<console>:7) >> > > > > > > at $line22.$eval$.<clinit>(<console>) >> > > > > > > at $line22.$eval.$print(<console>) >> > > > > > > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native >> > > Method) >> > > > > > > at >> > > > > > > sun.reflect.NativeMethodAccessorImpl.invoke( >> > > > > > NativeMethodAccessorImpl.java: >> > > > > > > 57) >> > > > > > > >> > > > > > > at >> > > > > > > sun.reflect.DelegatingMethodAccessorImpl.invoke( >> > > > > > > DelegatingMethodAccessorImpl.java:43) >> > > > > > > >> > > > > > > at java.lang.reflect.Method.invoke(Method.java:606) >> > > > > > > at >> > > > > > > org.apache.spark.repl.SparkIMain$ReadEvalPrint.call( >> > > > > > SparkIMain.scala:1065) >> > > > > > > at >> > > > > > > org.apache.spark.repl.SparkIMain$Request.loadAndRun( >> > > > > > SparkIMain.scala:1346) >> > > > > > > at >> > > > > > > org.apache.spark.repl.SparkIMain.loadAndRunReq$1( >> > > > SparkIMain.scala:840) >> > > > > > > at org.apache.spark.repl.SparkIMain.interpret( >> > > > > SparkIMain.scala:871) >> > > > > > > at org.apache.spark.repl.SparkIMain.interpret( >> > > > > SparkIMain.scala:819) >> > > > > > > at >> > > > > > > org.apache.spark.repl.SparkILoop.reallyInterpret$1( >> > > > > SparkILoop.scala:857) >> > > > > > > at >> > > > > > > org.apache.spark.repl.SparkILoop.interpretStartingWith( >> > > > > > > SparkILoop.scala:902) >> > > > > > > >> > > > > > > at org.apache.spark.repl.SparkILoop.command(SparkILoop. >> > > > scala:814) >> > > > > > > at >> > > > > > > org.apache.spark.repl.SparkILoop.processLine$1( >> > > SparkILoop.scala:657) >> > > > > > > at org.apache.spark.repl.SparkILoop.innerLoop$1( >> > > > > > SparkILoop.scala:665) >> > > > > > > at org.apache.spark.repl.SparkILoop.org$apache$spark$ >> > > > > > > repl$SparkILoop$$loop(SparkILoop.scala:670) >> > > > > > > >> > > > > > > at >> > > > > > > org.apache.spark.repl.SparkILoop$$anonfun$org$ >> > > > > > > apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp( >> > > > > > SparkILoop.scala:997) >> > > > > > > >> > > > > > > at >> > > > > > > org.apache.spark.repl.SparkILoop$$anonfun$org$ >> > > > > > > apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop. >> > > scala:945) >> > > > > > > >> > > > > > > at >> > > > > > > org.apache.spark.repl.SparkILoop$$anonfun$org$ >> > > > > > > apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop. >> > > scala:945) >> > > > > > > >> > > > > > > at >> > > > > > > scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader( >> > > > > > > ScalaClassLoader.scala:135) >> > > > > > > >> > > > > > > at org.apache.spark.repl.SparkILoop.org$apache$spark$ >> > > > > > > repl$SparkILoop$$process(SparkILoop.scala:945) >> > > > > > > >> > > > > > > at org.apache.spark.repl.SparkILoop.process(SparkILoop. >> > > > > scala:1059) >> > > > > > > at org.apache.spark.repl.Main$.main(Main.scala:31) >> > > > > > > at org.apache.spark.repl.Main.main(Main.scala) >> > > > > > > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native >> > > Method) >> > > > > > > at >> > > > > > > sun.reflect.NativeMethodAccessorImpl.invoke( >> > > > > > NativeMethodAccessorImpl.java: >> > > > > > > 57) >> > > > > > > >> > > > > > > at >> > > > > > > sun.reflect.DelegatingMethodAccessorImpl.invoke( >> > > > > > > DelegatingMethodAccessorImpl.java:43) >> > > > > > > >> > > > > > > at java.lang.reflect.Method.invoke(Method.java:606) >> > > > > > > at >> > > > > > > org.apache.spark.deploy.SparkSubmit$.org$apache$spark$ >> > > > > > > deploy$SparkSubmit$$runMain(SparkSubmit.scala:735) >> > > > > > > >> > > > > > > at >> > > > > > > org.apache.spark.deploy.SparkSubmit$.doRunMain$1( >> > > > > SparkSubmit.scala:181) >> > > > > > > at org.apache.spark.deploy.SparkSubmit$.submit( >> > > > > > SparkSubmit.scala:206) >> > > > > > > at org.apache.spark.deploy.SparkS >> ubmit$.main(SparkSubmit. >> > > > > scala:121) >> > > > > > > at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit. >> > > scala) >> > > > > > > Caused by: java.lang.ClassNotFoundException: >> > > > > > > org.apache.carbondata.spark.rdd.CarbonBlockDistinctValuesCom >> bi >> > > neRDD >> > > > > > > at >> > > > > > > org.apache.spark.repl.ExecutorClassLoader.findClass( >> > > > > > > ExecutorClassLoader.scala:84) >> > > > > > > >> > > > > > > at java.lang.ClassLoader.loadClass(ClassLoader.java:425) >> > > > > > > at java.lang.ClassLoader.loadClass(ClassLoader.java:358) >> > > > > > > at java.lang.Class.forName0(Native Method) >> > > > > > > at java.lang.Class.forName(Class.java:274) >> > > > > > > at >> > > > > > > org.apache.spark.serializer.JavaDeserializationStream$$ >> > > > > > > anon$1.resolveClass(JavaSerializer.scala:68) >> > > > > > > >> > > > > > > at >> > > > > > > java.io.ObjectInputStream.readNonProxyDesc( >> > > > > ObjectInputStream.java:1612) >> > > > > > > at >> > > > > > > java.io.ObjectInputStream.readClassDesc( >> > > ObjectInputStream.java:1517) >> > > > > > > at >> > > > > > > java.io.ObjectInputStream.readOrdinaryObject( >> > > > > > ObjectInputStream.java:1771) >> > > > > > > at java.io.ObjectInputStream.read >> Object0(ObjectInputStream. >> > > > > > java:1350) >> > > > > > > at >> > > > > > > java.io.ObjectInputStream.defaultReadFields( >> > > > > ObjectInputStream.java:1990) >> > > > > > > at >> > > > > > > java.io.ObjectInputStream.readSerialData( >> > > > ObjectInputStream.java:1915) >> > > > > > > at >> > > > > > > java.io.ObjectInputStream.readOrdinaryObject( >> > > > > > ObjectInputStream.java:1798) >> > > > > > > at java.io.ObjectInputStream.read >> Object0(ObjectInputStream. >> > > > > > java:1350) >> > > > > > > at java.io.ObjectInputStream.read >> Object(ObjectInputStream. >> > > > > java:370) >> > > > > > > at >> > > > > > > org.apache.spark.serializer.JavaDeserializationStream. >> > > > > > > readObject(JavaSerializer.scala:76) >> > > > > > > >> > > > > > > at >> > > > > > > org.apache.spark.serializer.JavaSerializerInstance. >> > > > > > > deserialize(JavaSerializer.scala:115) >> > > > > > > >> > > > > > > at >> > > > > > > org.apache.spark.scheduler.ShuffleMapTask.runTask( >> > > > > > ShuffleMapTask.scala:64) >> > > > > > > at >> > > > > > > org.apache.spark.scheduler.ShuffleMapTask.runTask( >> > > > > > ShuffleMapTask.scala:41) >> > > > > > > at org.apache.spark.scheduler.Task.run(Task.scala:89) >> > > > > > > at >> > > > > > > org.apache.spark.executor.Executor$TaskRunner.run( >> > > > Executor.scala:227) >> > > > > > > at >> > > > > > > java.util.concurrent.ThreadPoolExecutor.runWorker( >> > > > > > > ThreadPoolExecutor.java:1145) >> > > > > > > >> > > > > > > at >> > > > > > > java.util.concurrent.ThreadPoolExecutor$Worker.run( >> > > > > > > ThreadPoolExecutor.java:615) >> > > > > > > >> > > > > > > at java.lang.Thread.run(Thread.java:745) >> > > > > > > >> > > > > > >> > > > > >> > > > >> > > > >> > > > >> > > > -- >> > > > Regards >> > > > Liang >> > > > >> > > >> > >> > >> > >> > -- >> > Thanks & Regards, >> > Ravi >> > >> > > > > -- > Thanks & Regards, > Ravi > -- Thanks & Regards, Ravi |
Hi All,
Thx for ur help, this problem has been solved and execute well with CarbonData on 'Spark on Yarn' as expect. I think there are some misleading when following [Installation Guide <https://github.com/apache/incubator-carbondata/blob/master/docs/installation-guide.md>] and [Quick Start <https://github.com/apache/incubator-carbondata/blob/master/docs/quick-start-guide.md>] to run CarbonData with Spark on Yarn. 1. Suggest to launch CarbonData as `./bin/spark-shell --jars <carbondata assembly jar path>` and do not config parameter `spark.executor.extraClassPath` since executor try to load CarbonData class using this path but this is valid only for driver. 2. Make sure that create an instance of CarbonContext in the manner: `val cc = new CarbonContext(sc, storePath)`, if create it as `val cc = new CarbonContext(sc, storePath)`, it will use strange path as data directory and will throw some exceptions such as `Table MetaData Unlocked Successfully`, etc. 3. Please backport PR#608 <https://github.com/apache/incubator-carbondata/pull/608> if HDFS is federation arch. and using viewfs scheme, cause it only check `hdfs://` as a valid hdfs scheme and `viewfs://` ignore. Regards, Hexiaoqiao On Tue, Feb 21, 2017 at 5:00 PM, Ravindra Pesala <[hidden email]> wrote: > Hi, > > Please create the carbon context as follows. > > val cc = new CarbonContext(sc, storeLocation) > > Here storeLocation is hdfs://hacluster/tmp/carbondata/carbon.store in > your case. > > > Regards, > Ravindra > > On 21 February 2017 at 08:30, Ravindra Pesala <[hidden email]> > wrote: > > > Hi, > > > > How did you create CarbonContext? > > Can you check whether you have provided same store path in > > carbon.properties and the CarbonContext. > > > > Regards, > > Ravindra. > > > > On 20 February 2017 at 12:26, Xiaoqiao He <[hidden email]> wrote: > > > >> Hi Ravindra, > >> > >> Thanks for your suggestions. But another problem met when I create table > >> and load data. > >> > >> 1. I follow README to compile and build CarbonData actually, via > >> https://github.com/apache/incubator-carbondata/blob/master/ > >> build/README.md : > >> > >> > mvn -DskipTests -Pspark-1.6 -Dspark.version=1.6.2 clean package > >> > >> > >> 2. I think the exceptions mentioned above (ClassNotFoundException/' > exists > >> and does not match'), is related to configuration item of > >> 'spark.executor.extraClassPath'. Since when i trace executor logs, i > >> found > >> it tries to load Class from the same path as > spark.executor.extraClassPath > >> config and it can not found local (this local path is valid only for > >> driver), and throw exception. When I remove this item in configuration > and > >> run the same command with --jar parameter, then not throw this exception > >> again. > >> > >> 3. but when i create table following quick-start as below: > >> > >> > scala> cc.sql("CREATE TABLE IF NOT EXISTS sample (id string, name > >> string, > >> > city string, age Int) STORED BY 'carbondata'") > >> > >> > >> there is some info logs such as: > >> > >> > INFO 20-02 12:00:35,690 - main Query [CREATE TABLE TEST.SAMPLE USING > >> > CARBONDATA OPTIONS (TABLENAME "TEST.SAMPLE", TABLEPATH > >> > "/HOME/PATH/HEXIAOQIAO/CARBON.STORE/TEST/SAMPLE") ] > >> > >> and* TABLEPATH looks not the proper path (I have no idea why this path > is > >> not HDFS path)*, and then load data as blow but another exception > throws. > >> > >> > scala> cc.sql("LOAD DATA INPATH > >> > 'hdfs://hacluster/user/hadoop-data/sample.csv' INTO TABLE sample") > >> > >> > >> there is some info logs such as: > >> > >> > INFO 20-02 12:01:27,608 - main HDFS lock > >> > path:hdfs://hacluster/home/path/hexiaoqiao/carbon.store/test > >> /sample/meta.lock > >> > >> *this lock path is not the expected hdfs path, it looks [hdfs > >> scheme://authority] + local setup path of carbondata. (is storelocation > >> not > >> active?)* > >> and throw exception: > >> > >> > INFO 20-02 12:01:42,668 - Table MetaData Unlocked Successfully after > >> data > >> > load > >> > java.lang.RuntimeException: Table is locked for updation. Please try > >> after > >> > some time > >> > at scala.sys.package$.error(package.scala:27) > >> > at > >> > org.apache.spark.sql.execution.command.LoadTable.run( > >> carbonTableSchema.scala:360) > >> > at > >> > org.apache.spark.sql.execution.ExecutedCommand.sideEffectRes > >> ult$lzycompute(commands.scala:58) > >> > at > >> > org.apache.spark.sql.execution.ExecutedCommand.sideEffectRes > >> ult(commands.scala:56) > >> > at > >> > org.apache.spark.sql.execution.ExecutedCommand.doExecute( > >> commands.scala:70) > >> > >> ...... > >> > >> > >> CarbonData Configuration: > >> carbon.storelocation=hdfs://hacluster/tmp/carbondata/carbon.store > >> carbon.lock.type=HDFSLOCK > >> FYI. > >> > >> Regards, > >> Hexiaoqiao > >> > >> > >> On Sat, Feb 18, 2017 at 3:26 PM, Ravindra Pesala <[hidden email] > > > >> wrote: > >> > >> > Hi Xiaoqiao, > >> > > >> > Is the problem still exists? > >> > Can you try with clean build with "mvn clean -DskipTests -Pspark-1.6 > >> > package" command. > >> > > >> > Regards, > >> > Ravindra. > >> > > >> > On 16 February 2017 at 08:36, Xiaoqiao He <[hidden email]> > wrote: > >> > > >> > > hi Liang Chen, > >> > > > >> > > Thank for your help. It is true that i install and configure > >> carbondata > >> > on > >> > > "spark on yarn" cluster following installation guide ( > >> > > https://github.com/apache/incubator-carbondata/blob/ > >> > > master/docs/installation-guide.md#installing-and- > >> > > configuring-carbondata-on-spark-on-yarn-cluster > >> > > ). > >> > > > >> > > Best Regards, > >> > > Heixaoqiao > >> > > > >> > > > >> > > On Thu, Feb 16, 2017 at 7:47 AM, Liang Chen < > [hidden email]> > >> > > wrote: > >> > > > >> > > > Hi He xiaoqiao > >> > > > > >> > > > Quick start is local model spark. > >> > > > Your case is yarn cluster , please check : > >> > > > https://github.com/apache/incubator-carbondata/blob/ > >> > > > master/docs/installation-guide.md > >> > > > > >> > > > Regards > >> > > > Liang > >> > > > > >> > > > 2017-02-15 3:29 GMT-08:00 Xiaoqiao He <[hidden email]>: > >> > > > > >> > > > > hi Manish Gupta, > >> > > > > > >> > > > > Thanks for you focus, actually i try to load data following > >> > > > > https://github.com/apache/incubator-carbondata/blob/ > >> > > > > master/docs/quick-start-guide.md > >> > > > > for deploying carbondata-1.0.0. > >> > > > > > >> > > > > 1.when i execute carbondata by `bin/spark-shell`, it throws as > >> above. > >> > > > > 2.when i execute carbondata by `bin/spark-shell --jars > >> > > > > carbonlib/carbondata_2.10-1.0.0-incubating-shade-hadoop2.7.1 > >> .jar`, > >> > it > >> > > > > throws another exception as below: > >> > > > > > >> > > > > org.apache.spark.SparkException: Job aborted due to stage > >> failure: > >> > > Task > >> > > > 0 > >> > > > > > in stage 0.0 failed 4 times, most recent failure: Lost task > 0.3 > >> in > >> > > > stage > >> > > > > > 0.0 (TID 3, [task hostname]): org.apache.spark. > SparkException: > >> > File > >> > > > > > ./carbondata_2.10-1.0.0-incubating-shade-hadoop2.7.1.jar > exists > >> > and > >> > > > does > >> > > > > > not match contents of > >> > > > > > http://master:50843/jars/carbondata_2.10-1.0.0- > >> > > > > incubating-shade-hadoop2.7.1.jar > >> > > > > > >> > > > > > >> > > > > I check the assembly jar and CarbonBlockDistinctValuesCombi > neRDD > >> is > >> > > > > present > >> > > > > actually. > >> > > > > > >> > > > > anyone who meet the same problem? > >> > > > > > >> > > > > Best Regards, > >> > > > > Hexiaoqiao > >> > > > > > >> > > > > > >> > > > > On Wed, Feb 15, 2017 at 12:56 AM, manish gupta < > >> > > > [hidden email]> > >> > > > > wrote: > >> > > > > > >> > > > > > Hi, > >> > > > > > > >> > > > > > I think the carbon jar is compiled properly. Can you use any > >> > > decompiler > >> > > > > and > >> > > > > > decompile carbondata-spark-common-1.1.0- > incubating-SNAPSHOT.jar > >> > > > present > >> > > > > in > >> > > > > > spark-common module target folder and check whether the > required > >> > > class > >> > > > > file > >> > > > > > org.apache.carbondata.spark.rdd. > CarbonBlockDistinctValuesCombi > >> > neRDD > >> > > is > >> > > > > > present or not. > >> > > > > > > >> > > > > > If you are using only the assembly jar then decompile and > check > >> in > >> > > > > assembly > >> > > > > > jar. > >> > > > > > > >> > > > > > Regards > >> > > > > > Manish Gupta > >> > > > > > > >> > > > > > On Tue, Feb 14, 2017 at 11:19 AM, Xiaoqiao He < > >> [hidden email] > >> > > > >> > > > > wrote: > >> > > > > > > >> > > > > > > hi, dev, > >> > > > > > > > >> > > > > > > The latest release version apache-carbondata-1.0.0- > >> > incubating-rc2 > >> > > > > which > >> > > > > > > takes Spark-1.6.2 to build throws exception ` > >> > > > > > > java.lang.ClassNotFoundException: > >> > > > > > > org.apache.carbondata.spark.rdd. > CarbonBlockDistinctValuesCom > >> bi > >> > > neRDD` > >> > > > > > when > >> > > > > > > i > >> > > > > > > load data following Quick Start Guide. > >> > > > > > > > >> > > > > > > Env: > >> > > > > > > a. CarbonData-1.0.0-incubating-rc2 > >> > > > > > > b. Spark-1.6.2 > >> > > > > > > c. Hadoop-2.7.1 > >> > > > > > > d. CarbonData on "Spark on YARN" Cluster and run yarn-client > >> > mode. > >> > > > > > > > >> > > > > > > any suggestions? Thank you. > >> > > > > > > > >> > > > > > > The exception stack trace as below: > >> > > > > > > > >> > > > > > > -------- > >> > > > > > > ERROR 14-02 12:21:02,005 - main generate global dictionary > >> failed > >> > > > > > > org.apache.spark.SparkException: Job aborted due to stage > >> > failure: > >> > > > > Task > >> > > > > > 0 > >> > > > > > > in stage 0.0 failed 4 times, most recent failure: Lost task > >> 0.3 > >> > in > >> > > > > stage > >> > > > > > > 0.0 (TID 3, nodemanger): java.lang.ClassNotFoundException: > >> > > > > > > org.apache.carbondata.spark.rdd. > CarbonBlockDistinctValuesCom > >> bi > >> > > neRDD > >> > > > > > > at > >> > > > > > > org.apache.spark.repl.ExecutorClassLoader.findClass( > >> > > > > > > ExecutorClassLoader.scala:84) > >> > > > > > > > >> > > > > > > at java.lang.ClassLoader.loadClass(ClassLoader.java: > 425) > >> > > > > > > at java.lang.ClassLoader.loadClass(ClassLoader.java: > 358) > >> > > > > > > at java.lang.Class.forName0(Native Method) > >> > > > > > > at java.lang.Class.forName(Class.java:274) > >> > > > > > > at > >> > > > > > > org.apache.spark.serializer.JavaDeserializationStream$$ > >> > > > > > > anon$1.resolveClass(JavaSerializer.scala:68) > >> > > > > > > > >> > > > > > > at > >> > > > > > > java.io.ObjectInputStream.readNonProxyDesc( > >> > > > > ObjectInputStream.java:1612) > >> > > > > > > at > >> > > > > > > java.io.ObjectInputStream.readClassDesc( > >> > > ObjectInputStream.java:1517) > >> > > > > > > at > >> > > > > > > java.io.ObjectInputStream.readOrdinaryObject( > >> > > > > > ObjectInputStream.java:1771) > >> > > > > > > at java.io.ObjectInputStream.read > >> Object0(ObjectInputStream. > >> > > > > > java:1350) > >> > > > > > > at > >> > > > > > > java.io.ObjectInputStream.defaultReadFields( > >> > > > > ObjectInputStream.java:1990) > >> > > > > > > at > >> > > > > > > java.io.ObjectInputStream.readSerialData( > >> > > > ObjectInputStream.java:1915) > >> > > > > > > at > >> > > > > > > java.io.ObjectInputStream.readOrdinaryObject( > >> > > > > > ObjectInputStream.java:1798) > >> > > > > > > at java.io.ObjectInputStream.read > >> Object0(ObjectInputStream. > >> > > > > > java:1350) > >> > > > > > > at java.io.ObjectInputStream.read > >> Object(ObjectInputStream. > >> > > > > java:370) > >> > > > > > > at > >> > > > > > > org.apache.spark.serializer.JavaDeserializationStream. > >> > > > > > > readObject(JavaSerializer.scala:76) > >> > > > > > > > >> > > > > > > at > >> > > > > > > org.apache.spark.serializer.JavaSerializerInstance. > >> > > > > > > deserialize(JavaSerializer.scala:115) > >> > > > > > > > >> > > > > > > at > >> > > > > > > org.apache.spark.scheduler.ShuffleMapTask.runTask( > >> > > > > > ShuffleMapTask.scala:64) > >> > > > > > > at > >> > > > > > > org.apache.spark.scheduler.ShuffleMapTask.runTask( > >> > > > > > ShuffleMapTask.scala:41) > >> > > > > > > at org.apache.spark.scheduler.Task.run(Task.scala:89) > >> > > > > > > at > >> > > > > > > org.apache.spark.executor.Executor$TaskRunner.run( > >> > > > Executor.scala:227) > >> > > > > > > at > >> > > > > > > java.util.concurrent.ThreadPoolExecutor.runWorker( > >> > > > > > > ThreadPoolExecutor.java:1145) > >> > > > > > > > >> > > > > > > at > >> > > > > > > java.util.concurrent.ThreadPoolExecutor$Worker.run( > >> > > > > > > ThreadPoolExecutor.java:615) > >> > > > > > > > >> > > > > > > at java.lang.Thread.run(Thread.java:745) > >> > > > > > > > >> > > > > > > Driver stacktrace: > >> > > > > > > at org.apache.spark.scheduler.DAGScheduler.org > >> > $apache$spark$ > >> > > > > > > scheduler$DAGScheduler$$failJobAndIndependentStages( > >> > > > > > > DAGScheduler.scala:1431) > >> > > > > > > > >> > > > > > > at > >> > > > > > > org.apache.spark.scheduler.DAGScheduler$$anonfun$ > >> > > abortStage$1.apply( > >> > > > > > > DAGScheduler.scala:1419) > >> > > > > > > > >> > > > > > > at > >> > > > > > > org.apache.spark.scheduler.DAGScheduler$$anonfun$ > >> > > abortStage$1.apply( > >> > > > > > > DAGScheduler.scala:1418) > >> > > > > > > > >> > > > > > > at > >> > > > > > > scala.collection.mutable.ResizableArray$class.foreach( > >> > > > > > > ResizableArray.scala:59) > >> > > > > > > > >> > > > > > > at scala.collection.mutable.ArrayBuffer.foreach( > >> > > > > > ArrayBuffer.scala:47) > >> > > > > > > at > >> > > > > > > org.apache.spark.scheduler.DAGScheduler.abortStage( > >> > > > > > > DAGScheduler.scala:1418) > >> > > > > > > at > >> > > > > > > org.apache.spark.scheduler.DAGScheduler$$anonfun$ > >> > > > > > > handleTaskSetFailed$1.apply(DAGScheduler.scala:799) > >> > > > > > > > >> > > > > > > at > >> > > > > > > org.apache.spark.scheduler.DAGScheduler$$anonfun$ > >> > > > > > > handleTaskSetFailed$1.apply(DAGScheduler.scala:799) > >> > > > > > > > >> > > > > > > at scala.Option.foreach(Option.scala:236) > >> > > > > > > at > >> > > > > > > org.apache.spark.scheduler.DAGScheduler. > handleTaskSetFailed( > >> > > > > > > DAGScheduler.scala:799) > >> > > > > > > > >> > > > > > > at > >> > > > > > > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop. > >> > > > > > > doOnReceive(DAGScheduler.scala:1640) > >> > > > > > > > >> > > > > > > at > >> > > > > > > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop. > >> > > > > > > onReceive(DAGScheduler.scala:1599) > >> > > > > > > > >> > > > > > > at > >> > > > > > > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop. > >> > > > > > > onReceive(DAGScheduler.scala:1588) > >> > > > > > > > >> > > > > > > at org.apache.spark.util.EventLoop$$anon$1.run( > >> > > > > EventLoop.scala:48) > >> > > > > > > at > >> > > > > > > org.apache.spark.scheduler.DAGScheduler.runJob( > >> > > > DAGScheduler.scala:620) > >> > > > > > > at org.apache.spark.SparkContext. > >> runJob(SparkContext.scala: > >> > > > 1832) > >> > > > > > > at org.apache.spark.SparkContext. > >> runJob(SparkContext.scala: > >> > > > 1845) > >> > > > > > > at org.apache.spark.SparkContext. > >> runJob(SparkContext.scala: > >> > > > 1858) > >> > > > > > > at org.apache.spark.SparkContext. > >> runJob(SparkContext.scala: > >> > > > 1929) > >> > > > > > > at org.apache.spark.rdd.RDD$$anon > >> fun$collect$1.apply(RDD. > >> > > > > scala:927) > >> > > > > > > at > >> > > > > > > org.apache.spark.rdd.RDDOperationScope$.withScope( > >> > > > > > > RDDOperationScope.scala:150) > >> > > > > > > > >> > > > > > > at > >> > > > > > > org.apache.spark.rdd.RDDOperationScope$.withScope( > >> > > > > > > RDDOperationScope.scala:111) > >> > > > > > > > >> > > > > > > at org.apache.spark.rdd.RDD.withScope(RDD.scala:316) > >> > > > > > > at org.apache.spark.rdd.RDD.collect(RDD.scala:926) > >> > > > > > > at > >> > > > > > > org.apache.carbondata.spark.util.GlobalDictionaryUtil$. > >> > > > > > > generateGlobalDictionary(GlobalDictionaryUtil.scala:742) > >> > > > > > > > >> > > > > > > at > >> > > > > > > org.apache.spark.sql.execution.command.LoadTable. > >> > > > > > > run(carbonTableSchema.scala:577) > >> > > > > > > > >> > > > > > > at > >> > > > > > > org.apache.spark.sql.execution.ExecutedCommand. > >> > > > > > > sideEffectResult$lzycompute(commands.scala:58) > >> > > > > > > > >> > > > > > > at > >> > > > > > > org.apache.spark.sql.execution.ExecutedCommand. > >> > > > > > sideEffectResult(commands. > >> > > > > > > scala:56) > >> > > > > > > > >> > > > > > > at > >> > > > > > > org.apache.spark.sql.execution.ExecutedCommand. > >> > > > > > > doExecute(commands.scala:70) > >> > > > > > > at > >> > > > > > > org.apache.spark.sql.execution.SparkPlan$$anonfun$ > >> > > > > > > execute$5.apply(SparkPlan.scala:132) > >> > > > > > > > >> > > > > > > at > >> > > > > > > org.apache.spark.sql.execution.SparkPlan$$anonfun$ > >> > > > > > > execute$5.apply(SparkPlan.scala:130) > >> > > > > > > > >> > > > > > > at > >> > > > > > > org.apache.spark.rdd.RDDOperationScope$.withScope( > >> > > > > > > RDDOperationScope.scala:150) > >> > > > > > > > >> > > > > > > at > >> > > > > > > org.apache.spark.sql.execution.SparkPlan.execute( > >> > > > SparkPlan.scala:130) > >> > > > > > > at > >> > > > > > > org.apache.spark.sql.execution.QueryExecution.toRdd$ > >> lzycompute( > >> > > > > > > QueryExecution.scala:55) > >> > > > > > > > >> > > > > > > at > >> > > > > > > org.apache.spark.sql.execution.QueryExecution. > >> > > > > > > toRdd(QueryExecution.scala:55) > >> > > > > > > > >> > > > > > > at org.apache.spark.sql.DataFrame.<init>(DataFrame. > >> > scala:145) > >> > > > > > > at org.apache.spark.sql.DataFrame.<init>(DataFrame. > >> > scala:130) > >> > > > > > > at org.apache.spark.sql.CarbonContext.sql( > >> > > > > CarbonContext.scala:139) > >> > > > > > > at > >> > > > > > > $line22.$read$$iwC$$iwC$$iwC$$ > iwC$$iwC$$iwC$$iwC$$iwC$$iwC.< > >> > > > > > > init>(<console>:33) > >> > > > > > > > >> > > > > > > at > >> > > > > > > $line22.$read$$iwC$$iwC$$iwC$$ > iwC$$iwC$$iwC$$iwC$$iwC.<init> > >> > > > > > (<console>:38) > >> > > > > > > at > >> > > > > > > $line22.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(< > >> > > > console>:40) > >> > > > > > > at $line22.$read$$iwC$$iwC$$iwC$$ > >> > > iwC$$iwC$$iwC.<init>(<console> > >> > > > > :42) > >> > > > > > > at $line22.$read$$iwC$$iwC$$iwC$$ > >> > > iwC$$iwC.<init>(<console>:44) > >> > > > > > > at $line22.$read$$iwC$$iwC$$iwC$$ > >> iwC.<init>(<console>:46) > >> > > > > > > at $line22.$read$$iwC$$iwC$$iwC.<init>(<console>:48) > >> > > > > > > at $line22.$read$$iwC$$iwC.<init>(<console>:50) > >> > > > > > > at $line22.$read$$iwC.<init>(<console>:52) > >> > > > > > > at $line22.$read.<init>(<console>:54) > >> > > > > > > at $line22.$read$.<init>(<console>:58) > >> > > > > > > at $line22.$read$.<clinit>(<console>) > >> > > > > > > at $line22.$eval$.<init>(<console>:7) > >> > > > > > > at $line22.$eval$.<clinit>(<console>) > >> > > > > > > at $line22.$eval.$print(<console>) > >> > > > > > > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native > >> > > Method) > >> > > > > > > at > >> > > > > > > sun.reflect.NativeMethodAccessorImpl.invoke( > >> > > > > > NativeMethodAccessorImpl.java: > >> > > > > > > 57) > >> > > > > > > > >> > > > > > > at > >> > > > > > > sun.reflect.DelegatingMethodAccessorImpl.invoke( > >> > > > > > > DelegatingMethodAccessorImpl.java:43) > >> > > > > > > > >> > > > > > > at java.lang.reflect.Method.invoke(Method.java:606) > >> > > > > > > at > >> > > > > > > org.apache.spark.repl.SparkIMain$ReadEvalPrint.call( > >> > > > > > SparkIMain.scala:1065) > >> > > > > > > at > >> > > > > > > org.apache.spark.repl.SparkIMain$Request.loadAndRun( > >> > > > > > SparkIMain.scala:1346) > >> > > > > > > at > >> > > > > > > org.apache.spark.repl.SparkIMain.loadAndRunReq$1( > >> > > > SparkIMain.scala:840) > >> > > > > > > at org.apache.spark.repl.SparkIMain.interpret( > >> > > > > SparkIMain.scala:871) > >> > > > > > > at org.apache.spark.repl.SparkIMain.interpret( > >> > > > > SparkIMain.scala:819) > >> > > > > > > at > >> > > > > > > org.apache.spark.repl.SparkILoop.reallyInterpret$1( > >> > > > > SparkILoop.scala:857) > >> > > > > > > at > >> > > > > > > org.apache.spark.repl.SparkILoop.interpretStartingWith( > >> > > > > > > SparkILoop.scala:902) > >> > > > > > > > >> > > > > > > at org.apache.spark.repl. > SparkILoop.command(SparkILoop. > >> > > > scala:814) > >> > > > > > > at > >> > > > > > > org.apache.spark.repl.SparkILoop.processLine$1( > >> > > SparkILoop.scala:657) > >> > > > > > > at org.apache.spark.repl.SparkILoop.innerLoop$1( > >> > > > > > SparkILoop.scala:665) > >> > > > > > > at org.apache.spark.repl.SparkILoop.org$apache$spark$ > >> > > > > > > repl$SparkILoop$$loop(SparkILoop.scala:670) > >> > > > > > > > >> > > > > > > at > >> > > > > > > org.apache.spark.repl.SparkILoop$$anonfun$org$ > >> > > > > > > apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp( > >> > > > > > SparkILoop.scala:997) > >> > > > > > > > >> > > > > > > at > >> > > > > > > org.apache.spark.repl.SparkILoop$$anonfun$org$ > >> > > > > > > apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop. > >> > > scala:945) > >> > > > > > > > >> > > > > > > at > >> > > > > > > org.apache.spark.repl.SparkILoop$$anonfun$org$ > >> > > > > > > apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop. > >> > > scala:945) > >> > > > > > > > >> > > > > > > at > >> > > > > > > scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader( > >> > > > > > > ScalaClassLoader.scala:135) > >> > > > > > > > >> > > > > > > at org.apache.spark.repl.SparkILoop.org$apache$spark$ > >> > > > > > > repl$SparkILoop$$process(SparkILoop.scala:945) > >> > > > > > > > >> > > > > > > at org.apache.spark.repl. > SparkILoop.process(SparkILoop. > >> > > > > scala:1059) > >> > > > > > > at org.apache.spark.repl.Main$.main(Main.scala:31) > >> > > > > > > at org.apache.spark.repl.Main.main(Main.scala) > >> > > > > > > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native > >> > > Method) > >> > > > > > > at > >> > > > > > > sun.reflect.NativeMethodAccessorImpl.invoke( > >> > > > > > NativeMethodAccessorImpl.java: > >> > > > > > > 57) > >> > > > > > > > >> > > > > > > at > >> > > > > > > sun.reflect.DelegatingMethodAccessorImpl.invoke( > >> > > > > > > DelegatingMethodAccessorImpl.java:43) > >> > > > > > > > >> > > > > > > at java.lang.reflect.Method.invoke(Method.java:606) > >> > > > > > > at > >> > > > > > > org.apache.spark.deploy.SparkSubmit$.org$apache$spark$ > >> > > > > > > deploy$SparkSubmit$$runMain(SparkSubmit.scala:735) > >> > > > > > > > >> > > > > > > at > >> > > > > > > org.apache.spark.deploy.SparkSubmit$.doRunMain$1( > >> > > > > SparkSubmit.scala:181) > >> > > > > > > at org.apache.spark.deploy.SparkSubmit$.submit( > >> > > > > > SparkSubmit.scala:206) > >> > > > > > > at org.apache.spark.deploy.SparkS > >> ubmit$.main(SparkSubmit. > >> > > > > scala:121) > >> > > > > > > at org.apache.spark.deploy. > SparkSubmit.main(SparkSubmit. > >> > > scala) > >> > > > > > > Caused by: java.lang.ClassNotFoundException: > >> > > > > > > org.apache.carbondata.spark.rdd. > CarbonBlockDistinctValuesCom > >> bi > >> > > neRDD > >> > > > > > > at > >> > > > > > > org.apache.spark.repl.ExecutorClassLoader.findClass( > >> > > > > > > ExecutorClassLoader.scala:84) > >> > > > > > > > >> > > > > > > at java.lang.ClassLoader.loadClass(ClassLoader.java: > 425) > >> > > > > > > at java.lang.ClassLoader.loadClass(ClassLoader.java: > 358) > >> > > > > > > at java.lang.Class.forName0(Native Method) > >> > > > > > > at java.lang.Class.forName(Class.java:274) > >> > > > > > > at > >> > > > > > > org.apache.spark.serializer.JavaDeserializationStream$$ > >> > > > > > > anon$1.resolveClass(JavaSerializer.scala:68) > >> > > > > > > > >> > > > > > > at > >> > > > > > > java.io.ObjectInputStream.readNonProxyDesc( > >> > > > > ObjectInputStream.java:1612) > >> > > > > > > at > >> > > > > > > java.io.ObjectInputStream.readClassDesc( > >> > > ObjectInputStream.java:1517) > >> > > > > > > at > >> > > > > > > java.io.ObjectInputStream.readOrdinaryObject( > >> > > > > > ObjectInputStream.java:1771) > >> > > > > > > at java.io.ObjectInputStream.read > >> Object0(ObjectInputStream. > >> > > > > > java:1350) > >> > > > > > > at > >> > > > > > > java.io.ObjectInputStream.defaultReadFields( > >> > > > > ObjectInputStream.java:1990) > >> > > > > > > at > >> > > > > > > java.io.ObjectInputStream.readSerialData( > >> > > > ObjectInputStream.java:1915) > >> > > > > > > at > >> > > > > > > java.io.ObjectInputStream.readOrdinaryObject( > >> > > > > > ObjectInputStream.java:1798) > >> > > > > > > at java.io.ObjectInputStream.read > >> Object0(ObjectInputStream. > >> > > > > > java:1350) > >> > > > > > > at java.io.ObjectInputStream.read > >> Object(ObjectInputStream. > >> > > > > java:370) > >> > > > > > > at > >> > > > > > > org.apache.spark.serializer.JavaDeserializationStream. > >> > > > > > > readObject(JavaSerializer.scala:76) > >> > > > > > > > >> > > > > > > at > >> > > > > > > org.apache.spark.serializer.JavaSerializerInstance. > >> > > > > > > deserialize(JavaSerializer.scala:115) > >> > > > > > > > >> > > > > > > at > >> > > > > > > org.apache.spark.scheduler.ShuffleMapTask.runTask( > >> > > > > > ShuffleMapTask.scala:64) > >> > > > > > > at > >> > > > > > > org.apache.spark.scheduler.ShuffleMapTask.runTask( > >> > > > > > ShuffleMapTask.scala:41) > >> > > > > > > at org.apache.spark.scheduler.Task.run(Task.scala:89) > >> > > > > > > at > >> > > > > > > org.apache.spark.executor.Executor$TaskRunner.run( > >> > > > Executor.scala:227) > >> > > > > > > at > >> > > > > > > java.util.concurrent.ThreadPoolExecutor.runWorker( > >> > > > > > > ThreadPoolExecutor.java:1145) > >> > > > > > > > >> > > > > > > at > >> > > > > > > java.util.concurrent.ThreadPoolExecutor$Worker.run( > >> > > > > > > ThreadPoolExecutor.java:615) > >> > > > > > > > >> > > > > > > at java.lang.Thread.run(Thread.java:745) > >> > > > > > > > >> > > > > > > >> > > > > > >> > > > > >> > > > > >> > > > > >> > > > -- > >> > > > Regards > >> > > > Liang > >> > > > > >> > > > >> > > >> > > >> > > >> > -- > >> > Thanks & Regards, > >> > Ravi > >> > > >> > > > > > > > > -- > > Thanks & Regards, > > Ravi > > > > > > -- > Thanks & Regards, > Ravi > |
Free forum by Nabble | Edit this page |