[ https://issues.apache.org/jira/browse/CARBONDATA-1619?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] wyp updated CARBONDATA-1619: ---------------------------- Description: If you loading data to a carbondata table with {{overwrite=true}} many times will cause {{NullPointerException}}. The following is the code snippet: {code} Welcome to ____ __ / __/__ ___ _____/ /__ _\ \/ _ \/ _ `/ __/ '_/ /___/ .__/\_,_/_/ /_/\_\ version 2.1.0 /_/ Using Scala version 2.11.8 (Java HotSpot(TM) 64-Bit Server VM, Java 1.8.0_77) Type in expressions to have them evaluated. Type :help for more information. scala> import org.apache.spark.sql.SparkSession import org.apache.spark.sql.SparkSession scala> import org.apache.spark.sql.CarbonSession._ import org.apache.spark.sql.CarbonSession._ scala> val carbon = SparkSession.builder().config(sc.getConf).getOrCreateCarbonSession("hdfs://mycluster/user/wyp/carb") 17/10/26 12:58:25 WARN spark.SparkContext: Using an existing SparkContext; some configuration may not take effect. 17/10/26 12:58:25 WARN util.CarbonProperties: main The custom block distribution value "null" is invalid. Using the default value "false 17/10/26 12:58:25 WARN util.CarbonProperties: main The enable vector reader value "null" is invalid. Using the default value "true 17/10/26 12:58:25 WARN util.CarbonProperties: main The value "LOCALLOCK" configured for key carbon.lock.type is invalid for current file system. Use the default value HDFSLOCK instead. 17/10/26 12:58:43 WARN metastore.ObjectStore: Failed to get database global_temp, returning NoSuchObjectException carbon: org.apache.spark.sql.SparkSession = org.apache.spark.sql.CarbonSession@718b9d56 scala> carbon.sql("CREATE TABLE temp.my_table(id bigint) STORED BY 'carbondata'") 17/10/26 12:59:03 AUDIT command.CreateTable: [l-sparkcluster1.test.com][wyp][Thread-1]Creating Table with Database name [temp] and Table name [my_table] 17/10/26 12:59:03 WARN hive.HiveExternalCatalog: Couldn't find corresponding Hive SerDe for data source provider org.apache.spark.sql.CarbonSource. Persisting data source table `temp`.`my_table` into Hive metastore in Spark SQL specific format, which is NOT compatible with Hive. 17/10/26 12:59:03 AUDIT command.CreateTable: [l-sparkcluster1.test.com][wyp][Thread-1]Table created with Database name [temp] and Table name [my_table] res0: org.apache.spark.sql.DataFrame = [] scala> carbon.sql("insert overwrite table temp.my_table select id from co.order_common_p where dt = '2010-10'") 17/10/26 12:59:23 AUDIT rdd.CarbonDataRDDFactory$: [l-sparkcluster1.test.com][wyp][Thread-1]Data load request has been received for table temp.my_table 17/10/26 12:59:23 WARN util.CarbonDataProcessorUtil: main sort scope is set to LOCAL_SORT 17/10/26 12:59:26 AUDIT rdd.CarbonDataRDDFactory$: [l-sparkcluster1.test.com][wyp][Thread-1]Data load is successful for temp.my_table res1: org.apache.spark.sql.DataFrame = [] scala> carbon.sql("insert overwrite table temp.my_table select id from co.order_common_p where dt = '2010-10'") 17/10/26 12:59:33 AUDIT rdd.CarbonDataRDDFactory$: [l-sparkcluster1.test.com][wyp][Thread-1]Data load request has been received for table temp.my_table 17/10/26 12:59:33 WARN util.CarbonDataProcessorUtil: main sort scope is set to LOCAL_SORT 17/10/26 12:59:52 AUDIT rdd.CarbonDataRDDFactory$: [l-sparkcluster1.test.com][wyp][Thread-1]Data load is successful for temp.my_table res2: org.apache.spark.sql.DataFrame = [] scala> carbon.sql("insert overwrite table temp.my_table select id from co.order_common_p where dt = '2012-10'") 17/10/26 13:00:05 AUDIT rdd.CarbonDataRDDFactory$: [l-sparkcluster1.test.com][wyp][Thread-1]Data load request has been received for table temp.my_table 17/10/26 13:00:05 WARN util.CarbonDataProcessorUtil: main sort scope is set to LOCAL_SORT 17/10/26 13:00:08 ERROR filesystem.AbstractDFSCarbonFile: main Exception occurred:File does not exist: hdfs://mycluster/user/wyp/carb/temp/my_table/Fact/Part0/Segment_0 17/10/26 13:00:09 ERROR command.LoadTable: main java.lang.NullPointerException at org.apache.carbondata.core.datastore.filesystem.AbstractDFSCarbonFile.isDirectory(AbstractDFSCarbonFile.java:88) at org.apache.carbondata.core.util.CarbonUtil.deleteRecursive(CarbonUtil.java:364) at org.apache.carbondata.core.util.CarbonUtil.access$100(CarbonUtil.java:93) at org.apache.carbondata.core.util.CarbonUtil$2.run(CarbonUtil.java:326) at org.apache.carbondata.core.util.CarbonUtil$2.run(CarbonUtil.java:322) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1491) at org.apache.carbondata.core.util.CarbonUtil.deleteFoldersAndFiles(CarbonUtil.java:322) at org.apache.carbondata.spark.load.CarbonLoaderUtil.recordLoadMetadata(CarbonLoaderUtil.java:333) at org.apache.carbondata.spark.rdd.CarbonDataRDDFactory$.updateStatus$1(CarbonDataRDDFactory.scala:595) at org.apache.carbondata.spark.rdd.CarbonDataRDDFactory$.loadCarbonData(CarbonDataRDDFactory.scala:1107) at org.apache.spark.sql.execution.command.LoadTable.processData(carbonTableSchema.scala:1046) at org.apache.spark.sql.execution.command.LoadTable.run(carbonTableSchema.scala:754) at org.apache.spark.sql.execution.command.LoadTableByInsert.processData(carbonTableSchema.scala:651) at org.apache.spark.sql.execution.command.LoadTableByInsert.run(carbonTableSchema.scala:637) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:58) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:56) at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:74) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113) at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:87) at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:87) at org.apache.spark.sql.Dataset.<init>(Dataset.scala:185) at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:64) at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:592) at $line20.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:31) at $line20.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:36) at $line20.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:38) at $line20.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:40) at $line20.$read$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:42) at $line20.$read$$iw$$iw$$iw$$iw$$iw.<init>(<console>:44) at $line20.$read$$iw$$iw$$iw$$iw.<init>(<console>:46) at $line20.$read$$iw$$iw$$iw.<init>(<console>:48) at $line20.$read$$iw$$iw.<init>(<console>:50) at $line20.$read$$iw.<init>(<console>:52) at $line20.$read.<init>(<console>:54) at $line20.$read$.<init>(<console>:58) at $line20.$read$.<clinit>(<console>) at $line20.$eval$.$print$lzycompute(<console>:7) at $line20.$eval$.$print(<console>:6) at $line20.$eval.$print(<console>) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at scala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:786) at scala.tools.nsc.interpreter.IMain$Request.loadAndRun(IMain.scala:1047) at scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:638) at scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:637) at scala.reflect.internal.util.ScalaClassLoader$class.asContext(ScalaClassLoader.scala:31) at scala.reflect.internal.util.AbstractFileClassLoader.asContext(AbstractFileClassLoader.scala:19) at scala.tools.nsc.interpreter.IMain$WrappedRequest.loadAndRunReq(IMain.scala:637) at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:569) at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:565) at scala.tools.nsc.interpreter.ILoop.interpretStartingWith(ILoop.scala:807) at scala.tools.nsc.interpreter.ILoop.command(ILoop.scala:681) at scala.tools.nsc.interpreter.ILoop.processLine(ILoop.scala:395) at scala.tools.nsc.interpreter.ILoop.loop(ILoop.scala:415) at scala.tools.nsc.interpreter.ILoop$$anonfun$process$1.apply$mcZ$sp(ILoop.scala:923) at scala.tools.nsc.interpreter.ILoop$$anonfun$process$1.apply(ILoop.scala:909) at scala.tools.nsc.interpreter.ILoop$$anonfun$process$1.apply(ILoop.scala:909) at scala.reflect.internal.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:97) at scala.tools.nsc.interpreter.ILoop.process(ILoop.scala:909) at org.apache.spark.repl.Main$.doMain(Main.scala:68) at org.apache.spark.repl.Main$.main(Main.scala:51) at org.apache.spark.repl.Main.main(Main.scala) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:738) at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:187) at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:212) at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:126) at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) 17/10/26 13:00:09 AUDIT command.LoadTable: [l-sparkcluster1.test.com][wyp][Thread-1]Dataload failure for temp.my_table. Please check the logs java.lang.NullPointerException at org.apache.carbondata.core.datastore.filesystem.AbstractDFSCarbonFile.isDirectory(AbstractDFSCarbonFile.java:88) at org.apache.carbondata.core.util.CarbonUtil.deleteRecursive(CarbonUtil.java:364) at org.apache.carbondata.core.util.CarbonUtil.access$100(CarbonUtil.java:93) at org.apache.carbondata.core.util.CarbonUtil$2.run(CarbonUtil.java:326) at org.apache.carbondata.core.util.CarbonUtil$2.run(CarbonUtil.java:322) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1491) at org.apache.carbondata.core.util.CarbonUtil.deleteFoldersAndFiles(CarbonUtil.java:322) at org.apache.carbondata.spark.load.CarbonLoaderUtil.recordLoadMetadata(CarbonLoaderUtil.java:333) at org.apache.carbondata.spark.rdd.CarbonDataRDDFactory$.updateStatus$1(CarbonDataRDDFactory.scala:595) at org.apache.carbondata.spark.rdd.CarbonDataRDDFactory$.loadCarbonData(CarbonDataRDDFactory.scala:1107) at org.apache.spark.sql.execution.command.LoadTable.processData(carbonTableSchema.scala:1046) at org.apache.spark.sql.execution.command.LoadTable.run(carbonTableSchema.scala:754) at org.apache.spark.sql.execution.command.LoadTableByInsert.processData(carbonTableSchema.scala:651) at org.apache.spark.sql.execution.command.LoadTableByInsert.run(carbonTableSchema.scala:637) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:58) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:56) at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:74) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113) at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:87) at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:87) at org.apache.spark.sql.Dataset.<init>(Dataset.scala:185) at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:64) at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:592) ... 50 elided scala> {code} As you see, if you run {{insert overwrite table xxxx}} sql greater than or equal to three times, then you will get {{java.lang.NullPointerException}}. was: If you loading data to a carbondata table with {{overwrite=true}} many times will cause {{NullPointerException}}. The following is the code snippet: {code} Welcome to ____ __ / __/__ ___ _____/ /__ _\ \/ _ \/ _ `/ __/ '_/ /___/ .__/\_,_/_/ /_/\_\ version 2.1.0 /_/ Using Scala version 2.11.8 (Java HotSpot(TM) 64-Bit Server VM, Java 1.8.0_77) Type in expressions to have them evaluated. Type :help for more information. scala> import org.apache.spark.sql.SparkSession import org.apache.spark.sql.SparkSession scala> import org.apache.spark.sql.CarbonSession._ import org.apache.spark.sql.CarbonSession._ scala> val carbon = SparkSession.builder().config(sc.getConf).getOrCreateCarbonSession("hdfs://mycluster/user/wyp/carb") 17/10/26 12:58:25 WARN spark.SparkContext: Using an existing SparkContext; some configuration may not take effect. 17/10/26 12:58:25 WARN util.CarbonProperties: main The custom block distribution value "null" is invalid. Using the default value "false 17/10/26 12:58:25 WARN util.CarbonProperties: main The enable vector reader value "null" is invalid. Using the default value "true 17/10/26 12:58:25 WARN util.CarbonProperties: main The value "LOCALLOCK" configured for key carbon.lock.type is invalid for current file system. Use the default value HDFSLOCK instead. 17/10/26 12:58:43 WARN metastore.ObjectStore: Failed to get database global_temp, returning NoSuchObjectException carbon: org.apache.spark.sql.SparkSession = org.apache.spark.sql.CarbonSession@718b9d56 scala> carbon.sql("CREATE TABLE temp.my_table(id bigint) STORED BY 'carbondata'") 17/10/26 12:59:03 AUDIT command.CreateTable: [l-sparkcluster1.test.com][wyp][Thread-1]Creating Table with Database name [temp] and Table name [my_table] 17/10/26 12:59:03 WARN hive.HiveExternalCatalog: Couldn't find corresponding Hive SerDe for data source provider org.apache.spark.sql.CarbonSource. Persisting data source table `temp`.`my_table` into Hive metastore in Spark SQL specific format, which is NOT compatible with Hive. 17/10/26 12:59:03 AUDIT command.CreateTable: [l-sparkcluster1.test.com][wyp][Thread-1]Table created with Database name [temp] and Table name [my_table] res0: org.apache.spark.sql.DataFrame = [] scala> carbon.sql("insert overwrite table temp.my_table select id from co.order_common_p where dt = '2010-10'") 17/10/26 12:59:23 AUDIT rdd.CarbonDataRDDFactory$: [l-sparkcluster1.test.com][wyp][Thread-1]Data load request has been received for table temp.my_table 17/10/26 12:59:23 WARN util.CarbonDataProcessorUtil: main sort scope is set to LOCAL_SORT 17/10/26 12:59:26 AUDIT rdd.CarbonDataRDDFactory$: [l-sparkcluster1.test.com][wyp][Thread-1]Data load is successful for temp.my_table res1: org.apache.spark.sql.DataFrame = [] scala> carbon.sql("insert overwrite table temp.my_table select id from co.order_common_p where dt = '2010-10'") 17/10/26 12:59:33 AUDIT rdd.CarbonDataRDDFactory$: [l-sparkcluster1.test.com][wyp][Thread-1]Data load request has been received for table temp.my_table 17/10/26 12:59:33 WARN util.CarbonDataProcessorUtil: main sort scope is set to LOCAL_SORT 17/10/26 12:59:52 AUDIT rdd.CarbonDataRDDFactory$: [l-sparkcluster1.test.com][wyp][Thread-1]Data load is successful for temp.my_table res2: org.apache.spark.sql.DataFrame = [] scala> carbon.sql("insert overwrite table temp.my_table select id from co.order_common_p where dt = '2012-10'") 17/10/26 13:00:05 AUDIT rdd.CarbonDataRDDFactory$: [l-sparkcluster1.test.com][wyp][Thread-1]Data load request has been received for table temp.my_table 17/10/26 13:00:05 WARN util.CarbonDataProcessorUtil: main sort scope is set to LOCAL_SORT 17/10/26 13:00:08 ERROR filesystem.AbstractDFSCarbonFile: main Exception occurred:File does not exist: hdfs://mycluster/user/wyp/carb/temp/my_table/Fact/Part0/Segment_0 17/10/26 13:00:09 ERROR command.LoadTable: main java.lang.NullPointerException at org.apache.carbondata.core.datastore.filesystem.AbstractDFSCarbonFile.isDirectory(AbstractDFSCarbonFile.java:88) at org.apache.carbondata.core.util.CarbonUtil.deleteRecursive(CarbonUtil.java:364) at org.apache.carbondata.core.util.CarbonUtil.access$100(CarbonUtil.java:93) at org.apache.carbondata.core.util.CarbonUtil$2.run(CarbonUtil.java:326) at org.apache.carbondata.core.util.CarbonUtil$2.run(CarbonUtil.java:322) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1491) at org.apache.carbondata.core.util.CarbonUtil.deleteFoldersAndFiles(CarbonUtil.java:322) at org.apache.carbondata.spark.load.CarbonLoaderUtil.recordLoadMetadata(CarbonLoaderUtil.java:333) at org.apache.carbondata.spark.rdd.CarbonDataRDDFactory$.updateStatus$1(CarbonDataRDDFactory.scala:595) at org.apache.carbondata.spark.rdd.CarbonDataRDDFactory$.loadCarbonData(CarbonDataRDDFactory.scala:1107) at org.apache.spark.sql.execution.command.LoadTable.processData(carbonTableSchema.scala:1046) at org.apache.spark.sql.execution.command.LoadTable.run(carbonTableSchema.scala:754) at org.apache.spark.sql.execution.command.LoadTableByInsert.processData(carbonTableSchema.scala:651) at org.apache.spark.sql.execution.command.LoadTableByInsert.run(carbonTableSchema.scala:637) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:58) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:56) at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:74) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113) at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:87) at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:87) at org.apache.spark.sql.Dataset.<init>(Dataset.scala:185) at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:64) at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:592) at $line20.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:31) at $line20.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:36) at $line20.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:38) at $line20.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:40) at $line20.$read$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:42) at $line20.$read$$iw$$iw$$iw$$iw$$iw.<init>(<console>:44) at $line20.$read$$iw$$iw$$iw$$iw.<init>(<console>:46) at $line20.$read$$iw$$iw$$iw.<init>(<console>:48) at $line20.$read$$iw$$iw.<init>(<console>:50) at $line20.$read$$iw.<init>(<console>:52) at $line20.$read.<init>(<console>:54) at $line20.$read$.<init>(<console>:58) at $line20.$read$.<clinit>(<console>) at $line20.$eval$.$print$lzycompute(<console>:7) at $line20.$eval$.$print(<console>:6) at $line20.$eval.$print(<console>) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at scala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:786) at scala.tools.nsc.interpreter.IMain$Request.loadAndRun(IMain.scala:1047) at scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:638) at scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:637) at scala.reflect.internal.util.ScalaClassLoader$class.asContext(ScalaClassLoader.scala:31) at scala.reflect.internal.util.AbstractFileClassLoader.asContext(AbstractFileClassLoader.scala:19) at scala.tools.nsc.interpreter.IMain$WrappedRequest.loadAndRunReq(IMain.scala:637) at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:569) at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:565) at scala.tools.nsc.interpreter.ILoop.interpretStartingWith(ILoop.scala:807) at scala.tools.nsc.interpreter.ILoop.command(ILoop.scala:681) at scala.tools.nsc.interpreter.ILoop.processLine(ILoop.scala:395) at scala.tools.nsc.interpreter.ILoop.loop(ILoop.scala:415) at scala.tools.nsc.interpreter.ILoop$$anonfun$process$1.apply$mcZ$sp(ILoop.scala:923) at scala.tools.nsc.interpreter.ILoop$$anonfun$process$1.apply(ILoop.scala:909) at scala.tools.nsc.interpreter.ILoop$$anonfun$process$1.apply(ILoop.scala:909) at scala.reflect.internal.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:97) at scala.tools.nsc.interpreter.ILoop.process(ILoop.scala:909) at org.apache.spark.repl.Main$.doMain(Main.scala:68) at org.apache.spark.repl.Main$.main(Main.scala:51) at org.apache.spark.repl.Main.main(Main.scala) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:738) at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:187) at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:212) at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:126) at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) 17/10/26 13:00:09 AUDIT command.LoadTable: [l-sparkcluster1.test.com][wyp][Thread-1]Dataload failure for temp.my_table. Please check the logs java.lang.NullPointerException at org.apache.carbondata.core.datastore.filesystem.AbstractDFSCarbonFile.isDirectory(AbstractDFSCarbonFile.java:88) at org.apache.carbondata.core.util.CarbonUtil.deleteRecursive(CarbonUtil.java:364) at org.apache.carbondata.core.util.CarbonUtil.access$100(CarbonUtil.java:93) at org.apache.carbondata.core.util.CarbonUtil$2.run(CarbonUtil.java:326) at org.apache.carbondata.core.util.CarbonUtil$2.run(CarbonUtil.java:322) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1491) at org.apache.carbondata.core.util.CarbonUtil.deleteFoldersAndFiles(CarbonUtil.java:322) at org.apache.carbondata.spark.load.CarbonLoaderUtil.recordLoadMetadata(CarbonLoaderUtil.java:333) at org.apache.carbondata.spark.rdd.CarbonDataRDDFactory$.updateStatus$1(CarbonDataRDDFactory.scala:595) at org.apache.carbondata.spark.rdd.CarbonDataRDDFactory$.loadCarbonData(CarbonDataRDDFactory.scala:1107) at org.apache.spark.sql.execution.command.LoadTable.processData(carbonTableSchema.scala:1046) at org.apache.spark.sql.execution.command.LoadTable.run(carbonTableSchema.scala:754) at org.apache.spark.sql.execution.command.LoadTableByInsert.processData(carbonTableSchema.scala:651) at org.apache.spark.sql.execution.command.LoadTableByInsert.run(carbonTableSchema.scala:637) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:58) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:56) at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:74) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113) at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:87) at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:87) at org.apache.spark.sql.Dataset.<init>(Dataset.scala:185) at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:64) at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:592) ... 50 elided scala> {code} As you see, if you run {{insert overwrite table xxxx }} sql greater than or equal to three times, then you will get {{java.lang.NullPointerException}}. > Loading data to a carbondata table with overwrite=true many times will cause NullPointerException > ------------------------------------------------------------------------------------------------- > > Key: CARBONDATA-1619 > URL: https://issues.apache.org/jira/browse/CARBONDATA-1619 > Project: CarbonData > Issue Type: Bug > Components: sql > Affects Versions: 1.2.0 > Reporter: wyp > > If you loading data to a carbondata table with {{overwrite=true}} many times will cause {{NullPointerException}}. The following is the code snippet: > {code} > Welcome to > ____ __ > / __/__ ___ _____/ /__ > _\ \/ _ \/ _ `/ __/ '_/ > /___/ .__/\_,_/_/ /_/\_\ version 2.1.0 > /_/ > > Using Scala version 2.11.8 (Java HotSpot(TM) 64-Bit Server VM, Java 1.8.0_77) > Type in expressions to have them evaluated. > Type :help for more information. > scala> import org.apache.spark.sql.SparkSession > import org.apache.spark.sql.SparkSession > scala> import org.apache.spark.sql.CarbonSession._ > import org.apache.spark.sql.CarbonSession._ > scala> val carbon = SparkSession.builder().config(sc.getConf).getOrCreateCarbonSession("hdfs://mycluster/user/wyp/carb") > 17/10/26 12:58:25 WARN spark.SparkContext: Using an existing SparkContext; some configuration may not take effect. > 17/10/26 12:58:25 WARN util.CarbonProperties: main The custom block distribution value "null" is invalid. Using the default value "false > 17/10/26 12:58:25 WARN util.CarbonProperties: main The enable vector reader value "null" is invalid. Using the default value "true > 17/10/26 12:58:25 WARN util.CarbonProperties: main The value "LOCALLOCK" configured for key carbon.lock.type is invalid for current file system. Use the default value HDFSLOCK instead. > 17/10/26 12:58:43 WARN metastore.ObjectStore: Failed to get database global_temp, returning NoSuchObjectException > carbon: org.apache.spark.sql.SparkSession = org.apache.spark.sql.CarbonSession@718b9d56 > scala> carbon.sql("CREATE TABLE temp.my_table(id bigint) STORED BY 'carbondata'") > 17/10/26 12:59:03 AUDIT command.CreateTable: [l-sparkcluster1.test.com][wyp][Thread-1]Creating Table with Database name [temp] and Table name [my_table] > 17/10/26 12:59:03 WARN hive.HiveExternalCatalog: Couldn't find corresponding Hive SerDe for data source provider org.apache.spark.sql.CarbonSource. Persisting data source table `temp`.`my_table` into Hive metastore in Spark SQL specific format, which is NOT compatible with Hive. > 17/10/26 12:59:03 AUDIT command.CreateTable: [l-sparkcluster1.test.com][wyp][Thread-1]Table created with Database name [temp] and Table name [my_table] > res0: org.apache.spark.sql.DataFrame = [] > scala> carbon.sql("insert overwrite table temp.my_table select id from co.order_common_p where dt = '2010-10'") > 17/10/26 12:59:23 AUDIT rdd.CarbonDataRDDFactory$: [l-sparkcluster1.test.com][wyp][Thread-1]Data load request has been received for table temp.my_table > 17/10/26 12:59:23 WARN util.CarbonDataProcessorUtil: main sort scope is set to LOCAL_SORT > 17/10/26 12:59:26 AUDIT rdd.CarbonDataRDDFactory$: [l-sparkcluster1.test.com][wyp][Thread-1]Data load is successful for temp.my_table > res1: org.apache.spark.sql.DataFrame = [] > scala> carbon.sql("insert overwrite table temp.my_table select id from co.order_common_p where dt = '2010-10'") > 17/10/26 12:59:33 AUDIT rdd.CarbonDataRDDFactory$: [l-sparkcluster1.test.com][wyp][Thread-1]Data load request has been received for table temp.my_table > 17/10/26 12:59:33 WARN util.CarbonDataProcessorUtil: main sort scope is set to LOCAL_SORT > 17/10/26 12:59:52 AUDIT rdd.CarbonDataRDDFactory$: [l-sparkcluster1.test.com][wyp][Thread-1]Data load is successful for temp.my_table > res2: org.apache.spark.sql.DataFrame = [] > scala> carbon.sql("insert overwrite table temp.my_table select id from co.order_common_p where dt = '2012-10'") > 17/10/26 13:00:05 AUDIT rdd.CarbonDataRDDFactory$: [l-sparkcluster1.test.com][wyp][Thread-1]Data load request has been received for table temp.my_table > 17/10/26 13:00:05 WARN util.CarbonDataProcessorUtil: main sort scope is set to LOCAL_SORT > 17/10/26 13:00:08 ERROR filesystem.AbstractDFSCarbonFile: main Exception occurred:File does not exist: hdfs://mycluster/user/wyp/carb/temp/my_table/Fact/Part0/Segment_0 > 17/10/26 13:00:09 ERROR command.LoadTable: main > java.lang.NullPointerException > at org.apache.carbondata.core.datastore.filesystem.AbstractDFSCarbonFile.isDirectory(AbstractDFSCarbonFile.java:88) > at org.apache.carbondata.core.util.CarbonUtil.deleteRecursive(CarbonUtil.java:364) > at org.apache.carbondata.core.util.CarbonUtil.access$100(CarbonUtil.java:93) > at org.apache.carbondata.core.util.CarbonUtil$2.run(CarbonUtil.java:326) > at org.apache.carbondata.core.util.CarbonUtil$2.run(CarbonUtil.java:322) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1491) > at org.apache.carbondata.core.util.CarbonUtil.deleteFoldersAndFiles(CarbonUtil.java:322) > at org.apache.carbondata.spark.load.CarbonLoaderUtil.recordLoadMetadata(CarbonLoaderUtil.java:333) > at org.apache.carbondata.spark.rdd.CarbonDataRDDFactory$.updateStatus$1(CarbonDataRDDFactory.scala:595) > at org.apache.carbondata.spark.rdd.CarbonDataRDDFactory$.loadCarbonData(CarbonDataRDDFactory.scala:1107) > at org.apache.spark.sql.execution.command.LoadTable.processData(carbonTableSchema.scala:1046) > at org.apache.spark.sql.execution.command.LoadTable.run(carbonTableSchema.scala:754) > at org.apache.spark.sql.execution.command.LoadTableByInsert.processData(carbonTableSchema.scala:651) > at org.apache.spark.sql.execution.command.LoadTableByInsert.run(carbonTableSchema.scala:637) > at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:58) > at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:56) > at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:74) > at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) > at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) > at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135) > at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132) > at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113) > at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:87) > at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:87) > at org.apache.spark.sql.Dataset.<init>(Dataset.scala:185) > at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:64) > at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:592) > at $line20.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:31) > at $line20.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:36) > at $line20.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:38) > at $line20.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:40) > at $line20.$read$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:42) > at $line20.$read$$iw$$iw$$iw$$iw$$iw.<init>(<console>:44) > at $line20.$read$$iw$$iw$$iw$$iw.<init>(<console>:46) > at $line20.$read$$iw$$iw$$iw.<init>(<console>:48) > at $line20.$read$$iw$$iw.<init>(<console>:50) > at $line20.$read$$iw.<init>(<console>:52) > at $line20.$read.<init>(<console>:54) > at $line20.$read$.<init>(<console>:58) > at $line20.$read$.<clinit>(<console>) > at $line20.$eval$.$print$lzycompute(<console>:7) > at $line20.$eval$.$print(<console>:6) > at $line20.$eval.$print(<console>) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at scala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:786) > at scala.tools.nsc.interpreter.IMain$Request.loadAndRun(IMain.scala:1047) > at scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:638) > at scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:637) > at scala.reflect.internal.util.ScalaClassLoader$class.asContext(ScalaClassLoader.scala:31) > at scala.reflect.internal.util.AbstractFileClassLoader.asContext(AbstractFileClassLoader.scala:19) > at scala.tools.nsc.interpreter.IMain$WrappedRequest.loadAndRunReq(IMain.scala:637) > at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:569) > at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:565) > at scala.tools.nsc.interpreter.ILoop.interpretStartingWith(ILoop.scala:807) > at scala.tools.nsc.interpreter.ILoop.command(ILoop.scala:681) > at scala.tools.nsc.interpreter.ILoop.processLine(ILoop.scala:395) > at scala.tools.nsc.interpreter.ILoop.loop(ILoop.scala:415) > at scala.tools.nsc.interpreter.ILoop$$anonfun$process$1.apply$mcZ$sp(ILoop.scala:923) > at scala.tools.nsc.interpreter.ILoop$$anonfun$process$1.apply(ILoop.scala:909) > at scala.tools.nsc.interpreter.ILoop$$anonfun$process$1.apply(ILoop.scala:909) > at scala.reflect.internal.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:97) > at scala.tools.nsc.interpreter.ILoop.process(ILoop.scala:909) > at org.apache.spark.repl.Main$.doMain(Main.scala:68) > at org.apache.spark.repl.Main$.main(Main.scala:51) > at org.apache.spark.repl.Main.main(Main.scala) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:738) > at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:187) > at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:212) > at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:126) > at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) > 17/10/26 13:00:09 AUDIT command.LoadTable: [l-sparkcluster1.test.com][wyp][Thread-1]Dataload failure for temp.my_table. Please check the logs > java.lang.NullPointerException > at org.apache.carbondata.core.datastore.filesystem.AbstractDFSCarbonFile.isDirectory(AbstractDFSCarbonFile.java:88) > at org.apache.carbondata.core.util.CarbonUtil.deleteRecursive(CarbonUtil.java:364) > at org.apache.carbondata.core.util.CarbonUtil.access$100(CarbonUtil.java:93) > at org.apache.carbondata.core.util.CarbonUtil$2.run(CarbonUtil.java:326) > at org.apache.carbondata.core.util.CarbonUtil$2.run(CarbonUtil.java:322) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1491) > at org.apache.carbondata.core.util.CarbonUtil.deleteFoldersAndFiles(CarbonUtil.java:322) > at org.apache.carbondata.spark.load.CarbonLoaderUtil.recordLoadMetadata(CarbonLoaderUtil.java:333) > at org.apache.carbondata.spark.rdd.CarbonDataRDDFactory$.updateStatus$1(CarbonDataRDDFactory.scala:595) > at org.apache.carbondata.spark.rdd.CarbonDataRDDFactory$.loadCarbonData(CarbonDataRDDFactory.scala:1107) > at org.apache.spark.sql.execution.command.LoadTable.processData(carbonTableSchema.scala:1046) > at org.apache.spark.sql.execution.command.LoadTable.run(carbonTableSchema.scala:754) > at org.apache.spark.sql.execution.command.LoadTableByInsert.processData(carbonTableSchema.scala:651) > at org.apache.spark.sql.execution.command.LoadTableByInsert.run(carbonTableSchema.scala:637) > at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:58) > at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:56) > at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:74) > at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) > at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) > at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135) > at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132) > at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113) > at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:87) > at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:87) > at org.apache.spark.sql.Dataset.<init>(Dataset.scala:185) > at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:64) > at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:592) > ... 50 elided > scala> > {code} > As you see, if you run {{insert overwrite table xxxx}} sql greater than or equal to three times, then you will get {{java.lang.NullPointerException}}. -- This message was sent by Atlassian JIRA (v6.4.14#64029) |
Free forum by Nabble | Edit this page |