dev spark2.1+ carbondata1.1.1 When executed SQL "delete from e_carbon.offer_inst_c where offer_inst_id in (select offer_inst_id from cache_offer_inst_U)", following info will occur: "Delete data operation is failed: [[java.io .IOException: java.lang.Exception: Invalid tuple id 0/927.37/0-11_batchno0-0-1514560063689" After that, I found some records are deleted, not only the data in table cache_offer_inst_U, but also the whole data block of "0-11_batchno0-0-1514560063689". For example, there are 100,000 records in the table "cache_offer_inst_U" , but actually it delete 2,000,000 records. Detailed log java.lang.Exception: ===outer exception:[[[java.lang.Exception: step2.6 1 times failed,error executing sql:[ delete from e_carbon.offer_inst_c where offer_inst_id in (select offer_inst_ id from cache_offer_inst_U)] causeerror:[[[NULL]]] error:[[[java.lang.RuntimeException: Delete data operation is failed: [[java.io.IOException: java.lang.Exception: Invalid tuple id 0/92 7.37/0-11_batchno0-0-1514560063689 at org.apache.carbondata.hadoop.CarbonInputFormat.getSplits(CarbonInputFormat.java:351) at org.apache.carbondata.hadoop.CarbonInputFormat.getS plits(CarbonInputFormat.java:269) at org.apache.carbondata.spark.rdd.CarbonScanRDD.getPartitions(CarbonScanRDD.scala:81) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply( RDD.scala:252) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) at scala.Option.getOrElse(Option.scala:121) at org.apache.spark.rdd.RDD.partitions(RDD.scala:2 50) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) at org.apache.spark. rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) at scala.Option.getOrElse(Option.scala:121) at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) at org.apache.spark.rdd.MapPar titionsRDD.getPartitions(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply( RDD.scala:250) at scala.Option.getOrElse(Option.scala:121) at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitio nsRDD.scala:35) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) at scala.Option.getO rElse(Option.scala:121) at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) at org.apache. spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250) at scala.Option.getOrElse(Option.scala:121) at org.a pache.spark.rdd.RDD.partitions(RDD.scala:250) at org.apache.spark.ShuffleDependency.<init>(Dependency.scala:91) at org.apache.spark.rdd.ShuffledRDD.getDependencies(ShuffledRDD.sc ala:91) at org.apache.spark.rdd.RDD$$anonfun$dependencies$2.apply(RDD.scala:239) at org.apache.spark.rdd.RDD$$anonfun$dependencies$2.apply(RDD.scala:237) at scala.Opt ion.getOrElse(Option.scala:121) at org.apache.spark.rdd.RDD.dependencies(RDD.scala:237) at org.apache.spark.scheduler.DAGScheduler.getShuffleDependencies(DAGScheduler.scala:4 24) at org.apache.spark.scheduler.DAGScheduler.getOrCreateParentStages(DAGScheduler.scala:373) at org.apache.spark.scheduler.DAGScheduler.createResultStage(DAGScheduler.scala: 360) at org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:838) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala: 1613) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1605) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGSche duler.scala:1594) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:628) at org.apache. spark.SparkContext.runJob(SparkContext.scala:1925) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1938) at org.apache.spark.SparkContext.runJob(SparkContext.sca la:1951) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1965) at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:936) at org.apache.spark.rdd.RD DOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) at org.apache.spark.rdd.RDD.withScop e(RDD.scala:362) at org.apache.spark.rdd.RDD.collect(RDD.scala:935) at org.apache.spark.sql.execution.command.deleteExecution$.deleteDeltaExecution(IUDCommands.scala:606) at org .apache.spark.sql.execution.command.ProjectForDeleteCommand.run(IUDCommands.scala:98) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(comman ds.scala:58) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:56) at org.apache.spark.sql.execution.command.ExecutedCommandExec.doEx ecute(commands.scala:74) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute $1.apply(SparkPlan.scala:114) at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135) at org.apache.spark.rdd.RDDOperationScope$.withScope(R DDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113) at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:92) at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:92) at org.apache.spark.sql.Dataset.<init>(Dataset.scala:185) at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:64) at org.apache.spark.sql.SparkSession.sql(SparkSessio n.scala:592) at cn.ffcs.carbon.clienttouch.ProcessOracle6$.exeSqlWithRetry(ProcessOracle6.scala:564) at cn.ffcs.carbon.clienttouch.ProcessOracle6$$anonfun$triggeredAppendB atch$2.apply(ProcessOracle6.scala:281) at cn.ffcs.carbon.clienttouch.ProcessOracle6$$anonfun$triggeredAppendBatch$2.apply(ProcessOracle6.scala:277) at scala.collection.IndexedSeqO
yixu2001 |
Can you please re-verify this issue in the latest code base?
On Tue, Jan 2, 2018 at 2:50 PM, yixu2001 <[hidden email]> wrote: > dev > > spark2.1+ carbondata1.1.1 > > When executed SQL "delete from e_carbon.offer_inst_c > where offer_inst_id in (select offer_inst_id from cache_offer_inst_U)", > following info will occur: > "Delete data operation is failed: [[java.io > > .IOException: java.lang.Exception: Invalid tuple id 0/ > 927.37/0-11_batchno0-0-1514560063689" > > After that, I found some records are deleted, not only > the data in table cache_offer_inst_U, but also the whole > data block of "0-11_batchno0-0-1514560063689". > For example, there are 100,000 records in the table " > cache_offer_inst_U" , but actually it delete 2,000,000 records. > > Detailed log > java.lang.Exception: ===outer exception:[[[java.lang. > Exception: step2.6 1 times failed,error executing sql:[ > delete from e_carbon.offer_inst_c where offer_inst_id in > (select offer_inst_ id from cache_offer_inst_U)] > causeerror:[[[NULL]]] error:[[[java.lang.RuntimeException: > Delete data operation is failed: [[java.io.IOException: > java.lang.Exception: Invalid tuple id 0/92 7.37/0-11_ > batchno0-0-1514560063689 at org.apache.carbondata.hadoop. > CarbonInputFormat.getSplits(CarbonInputFormat.java:351) > at org.apache.carbondata.hadoop.CarbonInputFormat.getS > plits(CarbonInputFormat.java:269) at org.apache. > carbondata.spark.rdd.CarbonScanRDD.getPartitions( > CarbonScanRDD.scala:81) at org.apache.spark.rdd.RDD$$ > anonfun$partitions$2.apply( RDD.scala:252) at org. > apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala: > 250) at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:2 50) > at org.apache.spark.rdd.MapPartitionsRDD.getPartitions( > MapPartitionsRDD.scala:35) at org.apache.spark.rdd. > RDD$$anonfun$partitions$2.apply(RDD.scala:252) at org. > apache.spark. rdd.RDD$$anonfun$partitions$2.apply( > RDD.scala:250) at scala.Option.getOrElse(Option.scala: > 121) at org.apache.spark.rdd.RDD.partitions(RDD.scala: > 250) at org.apache.spark.rdd.MapPar titionsRDD.getPartitions( > MapPartitionsRDD.scala:35) at org.apache.spark.rdd. > RDD$$anonfun$partitions$2.apply(RDD.scala:252) at org. > apache.spark.rdd.RDD$$anonfun$partitions$2.apply( RDD. > scala:250) at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) > at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitio > nsRDD.scala:35) at org.apache.spark.rdd.RDD$$anonfun$ > partitions$2.apply(RDD.scala:252) at org.apache.spark.rdd. > RDD$$anonfun$partitions$2.apply(RDD.scala:250) at > scala.Option.getO rElse(Option.scala:121) at org. > apache.spark.rdd.RDD.partitions(RDD.scala:250) at org.apache.spark.rdd. > MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache. spark.rdd.RDD$$anonfun$partitions$2. > apply(RDD.scala:252) at org.apache.spark.rdd.RDD$$anonfun$ > partitions$2.apply(RDD.scala:250) at scala.Option. > getOrElse(Option.scala:121) at org.a pache.spark. > rdd.RDD.partitions(RDD.scala:250) at org.apache.spark. > ShuffleDependency.<init>(Dependency.scala:91) at > org.apache.spark.rdd.ShuffledRDD.getDependencies( > ShuffledRDD.sc ala:91) at org.apache.spark.rdd. > RDD$$anonfun$dependencies$2.apply(RDD.scala:239) > at org.apache.spark.rdd.RDD$$anonfun$dependencies$2.apply( > RDD.scala:237) at scala.Opt ion.getOrElse( > Option.scala:121) at org.apache.spark.rdd.RDD. > dependencies(RDD.scala:237) at org.apache.spark. > scheduler.DAGScheduler.getShuffleDependencies( > DAGScheduler.scala:4 24) at org.apache.spark. > scheduler.DAGScheduler.getOrCreateParentStages( > DAGScheduler.scala:373) at org.apache.spark.scheduler.DAGScheduler. > createResultStage(DAGScheduler.scala: 360) at org.apache.spark. > scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:838) at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop. > doOnReceive(DAGScheduler.scala: 1613) at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop. > onReceive(DAGScheduler.scala:1605) at org.apache.spark.scheduler. > DAGSchedulerEventProcessLoop.onReceive(DAGSche duler. > scala:1594) at org.apache.spark.util.EventLoop$$anon$1. > run(EventLoop.scala:48) at org.apache.spark.scheduler. > DAGScheduler.runJob(DAGScheduler.scala:628) > at org.apache. spark.SparkContext.runJob( > SparkContext.scala:1925) at org.apache.spark.SparkContext.runJob( > SparkContext.scala:1938) at org.apache.spark.SparkContext.runJob( > SparkContext.sca la:1951) at org.apache.spark. > SparkContext.runJob(SparkContext.scala:1965) > at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD. > scala:936) at org.apache.spark.rdd.RD DOperationScope$.withScope( > RDDOperationScope.scala:151) at org.apache.spark.rdd. > RDDOperationScope$.withScope(RDDOperationScope.scala:112) > at org.apache.spark.rdd.RDD.withScop e(RDD.scala: > 362) at org.apache.spark.rdd.RDD.collect(RDD.scala:935) > at org.apache.spark.sql.execution.command.deleteExecution$. > deleteDeltaExecution(IUDCommands.scala:606) at org .apache.spark.sql. > execution.command.ProjectForDeleteCommand.run( > IUDCommands.scala:98) at org.apache.spark.sql.execution.command. > ExecutedCommandExec.sideEffectResult$lzycompute( > comman ds.scala:58) at org.apache.spark.sql.execution.command. > ExecutedCommandExec.sideEffectResult(commands. > scala:56) at org.apache.spark.sql.execution.command. > ExecutedCommandExec.doEx ecute(commands.scala:74) > at org.apache.spark.sql.execution.SparkPlan$$anonfun$ > execute$1.apply(SparkPlan.scala:114) at org. > apache.spark.sql.execution.SparkPlan$$anonfun$execute > $1.apply(SparkPlan.scala:114) at org.apache.spark. > sql.execution.SparkPlan$$anonfun$executeQuery$1.apply( > SparkPlan.scala:135) at org.apache.spark.rdd. > RDDOperationScope$.withScope(R DDOperationScope.scala: > 151) at org.apache.spark.sql.execution.SparkPlan. > executeQuery(SparkPlan.scala:132) at org.apache.spark. > sql.execution.SparkPlan.execute(SparkPlan.scala:113) > at org.apache.spark.sql.execution.QueryExecution.toRdd$ > lzycompute(QueryExecution.scala:92) at org.apache. > spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:92) > at org.apache.spark.sql.Dataset.<init>(Dataset. > scala:185) at org.apache.spark.sql.Dataset$. > ofRows(Dataset.scala:64) at org.apache.spark.sql. > SparkSession.sql(SparkSessio n.scala:592) at cn. > ffcs.carbon.clienttouch.ProcessOracle6$.exeSqlWithRetry( > ProcessOracle6.scala:564) at cn.ffcs.carbon. > clienttouch.ProcessOracle6$$anonfun$triggeredAppendB > atch$2.apply(ProcessOracle6.scala:281) at cn.ffcs.carbon.clienttouch. > ProcessOracle6$$anonfun$triggeredAppendBatch$2.apply( > ProcessOracle6.scala:277) at scala.collection.IndexedSeqO > ------------------------------ > yixu2001 > -- Thanks Sounak |
Free forum by Nabble | Edit this page |