dev
Repeat the following statement so many times on an table with 400000000 records. "update e_carbon.prod_inst_his_c A set (a.ETL_date,)=(select b.etl_dateID from cache_prod_inst_his_u b where a.his_id=b.his_id)" After that, using statement "select count(*) from e_carbon.prod_inst_his_c" on the table. And found that the query speed slowed down, while the number of tasks increased. Is the query slowing down due to the fragmentations produced by the update operation? Or is there any other reasons? I have tried to use statement "ALTER table e_carbon.prod_inst_his_c COMPACT 'MAJOR'" to compact the table, But after that, the query statement "select count(*) from e_carbon.prod_inst_his_c" failed with following infomation" [Stage 14:=============> (70 + 4) / 281]17/10/18 17:46:46 WARN scheduler.TaskSetManager: Lost task 136.0 in stage 14.0 (TID 908, HDD015, executor 57): org.apache.carbondata.core.datastore.exception.IndexBuilderException: Block B-tree loading failed at org.apache.carbondata.core.datastore.BlockIndexStore.fillLoadedBlocks(BlockIndexStore.java:264) at org.apache.carbondata.core.datastore.BlockIndexStore.getAll(BlockIndexStore.java:189) at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.initQuery(AbstractQueryExecutor.java:131) at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.getBlockExecutionInfos(AbstractQueryExecutor.java:186) at org.apache.carbondata.core.scan.executor.impl.VectorDetailQueryExecutor.execute(VectorDetailQueryExecutor.java:36) at org.apache.carbondata.spark.vectorreader.VectorizedCarbonRecordReader.initialize(VectorizedCarbonRecordReader.java:112) at org.apache.carbondata.spark.rdd.CarbonScanRDD.compute(CarbonScanRDD.scala:204) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) at org.apache.spark.scheduler.Task.run(Task.scala:99) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:322) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) Caused by: java.util.concurrent.ExecutionException: java.io.EOFException: Cannot seek to negative offset at java.util.concurrent.FutureTask.report(FutureTask.java:122) at java.util.concurrent.FutureTask.get(FutureTask.java:192) at org.apache.carbondata.core.datastore.BlockIndexStore.fillLoadedBlocks(BlockIndexStore.java:254) ... 21 more Caused by: java.io.EOFException: Cannot seek to negative offset at org.apache.hadoop.hdfs.DFSInputStream.seek(DFSInputStream.java:1527) at org.apache.hadoop.fs.FSDataInputStream.seek(FSDataInputStream.java:62) at org.apache.carbondata.core.datastore.impl.DFSFileHolderImpl.readLong(DFSFileHolderImpl.java:129) at org.apache.carbondata.core.util.CarbonUtil.calculateMetaSize(CarbonUtil.java:943) at org.apache.carbondata.core.datastore.AbstractBlockIndexStoreCache.checkAndLoadTableBlocks(AbstractBlockIndexStoreCache.java:93) at org.apache.carbondata.core.datastore.BlockIndexStore.loadBlock(BlockIndexStore.java:304) at org.apache.carbondata.core.datastore.BlockIndexStore.get(BlockIndexStore.java:109) at org.apache.carbondata.core.datastore.BlockIndexStore$BlockLoaderThread.call(BlockIndexStore.java:294) at org.apache.carbondata.core.datastore.BlockIndexStore$BlockLoaderThread.call(BlockIndexStore.java:284) at java.util.concurrent.FutureTask.run(FutureTask.java:266) ... 3 more [Stage 14:===============> (78 + 4) / 281]17/10/18 17:46:47 ERROR scheduler.TaskSetManager: Task 136 in stage 14.0 failed 10 times; aborting job 17/10/18 17:46:48 WARN scheduler.TaskSetManager: Lost task 266.8 in stage 14.0 (TID 935, HDD015, executor 57): TaskKilled (killed intentionally) 17/10/18 17:46:48 WARN scheduler.TaskSetManager: Lost task 39.0 in stage 14.0 (TID 929, HDD015, executor 57): TaskKilled (killed intentionally) 17/10/18 17:46:48 WARN scheduler.TaskSetManager: Lost task 38.0 in stage 14.0 (TID 926, HDD015, executor 57): TaskKilled (killed intentionally) 17/10/18 17:46:48 WARN spark.ExecutorAllocationManager: No stages are running, but numRunningTasks != 0 17/10/18 17:46:48 WARN scheduler.TaskSetManager: Lost task 40.0 in stage 14.0 (TID 933, HDD015, executor 57): TaskKilled (killed intentionally) org.apache.spark.SparkException: Job aborted due to stage failure: Task 136 in stage 14.0 failed 10 times, most recent failure: Lost task 136.9 in stage 14.0 (TID 934, HDD015, executor 57): org.apache.carbondata.core.datastore.exception.IndexBuilderException: Block B-tree loading failed at org.apache.carbondata.core.datastore.BlockIndexStore.fillLoadedBlocks(BlockIndexStore.java:264) at org.apache.carbondata.core.datastore.BlockIndexStore.getAll(BlockIndexStore.java:189) at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.initQuery(AbstractQueryExecutor.java:131) at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.getBlockExecutionInfos(AbstractQueryExecutor.java:186) at org.apache.carbondata.core.scan.executor.impl.VectorDetailQueryExecutor.execute(VectorDetailQueryExecutor.java:36) at org.apache.carbondata.spark.vectorreader.VectorizedCarbonRecordReader.initialize(VectorizedCarbonRecordReader.java:112) at org.apache.carbondata.spark.rdd.CarbonScanRDD.compute(CarbonScanRDD.scala:204) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) at org.apache.spark.scheduler.Task.run(Task.scala:99) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:322) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) Caused by: java.util.concurrent.ExecutionException: java.io.EOFException: Cannot seek to negative offset at java.util.concurrent.FutureTask.report(FutureTask.java:122) at java.util.concurrent.FutureTask.get(FutureTask.java:192) at org.apache.carbondata.core.datastore.BlockIndexStore.fillLoadedBlocks(BlockIndexStore.java:254) ... 21 more Caused by: java.io.EOFException: Cannot seek to negative offset at org.apache.hadoop.hdfs.DFSInputStream.seek(DFSInputStream.java:1527) at org.apache.hadoop.fs.FSDataInputStream.seek(FSDataInputStream.java:62) at org.apache.carbondata.core.datastore.impl.DFSFileHolderImpl.readLong(DFSFileHolderImpl.java:129) at org.apache.carbondata.core.util.CarbonUtil.calculateMetaSize(CarbonUtil.java:943) at org.apache.carbondata.core.datastore.AbstractBlockIndexStoreCache.checkAndLoadTableBlocks(AbstractBlockIndexStoreCache.java:93) at org.apache.carbondata.core.datastore.BlockIndexStore.loadBlock(BlockIndexStore.java:304) at org.apache.carbondata.core.datastore.BlockIndexStore.get(BlockIndexStore.java:109) at org.apache.carbondata.core.datastore.BlockIndexStore$BlockLoaderThread.call(BlockIndexStore.java:294) at org.apache.carbondata.core.datastore.BlockIndexStore$BlockLoaderThread.call(BlockIndexStore.java:284) at java.util.concurrent.FutureTask.run(FutureTask.java:266) ... 3 more Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1435) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1423) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1422) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1422) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:802) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:802) at scala.Option.foreach(Option.scala:257) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:802) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1650) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1605) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1594) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:628) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1925) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1938) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1951) at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:333) at org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:38) at org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$Dataset$$execute$1$1.apply(Dataset.scala:2386) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:57) at org.apache.spark.sql.Dataset.withNewExecutionId(Dataset.scala:2788) at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$execute$1(Dataset.scala:2385) at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collect(Dataset.scala:2392) at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2128) at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2127) at org.apache.spark.sql.Dataset.withTypedCallback(Dataset.scala:2818) at org.apache.spark.sql.Dataset.head(Dataset.scala:2127) at org.apache.spark.sql.Dataset.take(Dataset.scala:2342) at org.apache.spark.sql.Dataset.showString(Dataset.scala:248) at org.apache.spark.sql.Dataset.show(Dataset.scala:638) at org.apache.spark.sql.Dataset.show(Dataset.scala:597) at org.apache.spark.sql.Dataset.show(Dataset.scala:606) ... 50 elided Caused by: org.apache.carbondata.core.datastore.exception.IndexBuilderException: Block B-tree loading failed at org.apache.carbondata.core.datastore.BlockIndexStore.fillLoadedBlocks(BlockIndexStore.java:264) at org.apache.carbondata.core.datastore.BlockIndexStore.getAll(BlockIndexStore.java:189) at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.initQuery(AbstractQueryExecutor.java:131) at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.getBlockExecutionInfos(AbstractQueryExecutor.java:186) at org.apache.carbondata.core.scan.executor.impl.VectorDetailQueryExecutor.execute(VectorDetailQueryExecutor.java:36) at org.apache.carbondata.spark.vectorreader.VectorizedCarbonRecordReader.initialize(VectorizedCarbonRecordReader.java:112) at org.apache.carbondata.spark.rdd.CarbonScanRDD.compute(CarbonScanRDD.scala:204) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) at org.apache.spark.scheduler.Task.run(Task.scala:99) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:322) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) Caused by: java.util.concurrent.ExecutionException: java.io.EOFException: Cannot seek to negative offset at java.util.concurrent.FutureTask.report(FutureTask.java:122) at java.util.concurrent.FutureTask.get(FutureTask.java:192) at org.apache.carbondata.core.datastore.BlockIndexStore.fillLoadedBlocks(BlockIndexStore.java:254) ... 21 more Caused by: java.io.EOFException: Cannot seek to negative offset at org.apache.hadoop.hdfs.DFSInputStream.seek(DFSInputStream.java:1527) at org.apache.hadoop.fs.FSDataInputStream.seek(FSDataInputStream.java:62) at org.apache.carbondata.core.datastore.impl.DFSFileHolderImpl.readLong(DFSFileHolderImpl.java:129) at org.apache.carbondata.core.util.CarbonUtil.calculateMetaSize(CarbonUtil.java:943) at org.apache.carbondata.core.datastore.AbstractBlockIndexStoreCache.checkAndLoadTableBlocks(AbstractBlockIndexStoreCache.java:93) at org.apache.carbondata.core.datastore.BlockIndexStore.loadBlock(BlockIndexStore.java:304) at org.apache.carbondata.core.datastore.BlockIndexStore.get(BlockIndexStore.java:109) at org.apache.carbondata.core.datastore.BlockIndexStore$BlockLoaderThread.call(BlockIndexStore.java:294) at org.apache.carbondata.core.datastore.BlockIndexStore$BlockLoaderThread.call(BlockIndexStore.java:284) at java.util.concurrent.FutureTask.run(FutureTask.java:266) ... 3 more yixu2001 |
Horizontal compaction is suppose to run after updates automatically which
will prevent slowdown. Log output will point if horizontal compaction ran or not. Multiple update delta and delete delta blocks will be merged together. On 19 Oct 2017 17:21, "yixu2001" <[hidden email]> wrote: > dev > Repeat the following statement so many times on an table with 400000000 > records. > "update e_carbon.prod_inst_his_c A set (a.ETL_date,)=(select b.etl_dateID > from cache_prod_inst_his_u b where a.his_id=b.his_id)" > After that, using statement "select count(*) from > e_carbon.prod_inst_his_c" on the table. > And found that the query speed slowed down, while the number of tasks > increased. > Is the query slowing down due to the fragmentations produced by the update > operation? > Or is there any other reasons? > I have tried to use statement "ALTER table e_carbon.prod_inst_his_c > COMPACT 'MAJOR'" to compact the table, > But after that, the query statement "select count(*) from > e_carbon.prod_inst_his_c" failed with following infomation" > > > [Stage 14:=============> (70 + 4) > / 281]17/10/18 17:46:46 WARN scheduler.TaskSetManager: Lost task 136.0 in > stage 14.0 (TID 908, HDD015, executor 57): org.apache.carbondata.core. > datastore.exception.IndexBuilderException: Block B-tree loading failed > at org.apache.carbondata.core.datastore.BlockIndexStore.fillLoadedBlocks( > BlockIndexStore.java:264) > at org.apache.carbondata.core.datastore.BlockIndexStore. > getAll(BlockIndexStore.java:189) > at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor. > initQuery(AbstractQueryExecutor.java:131) > at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor. > getBlockExecutionInfos(AbstractQueryExecutor.java:186) > at org.apache.carbondata.core.scan.executor.impl. > VectorDetailQueryExecutor.execute(VectorDetailQueryExecutor.java:36) > at org.apache.carbondata.spark.vectorreader.VectorizedCarbonRecordReader. > initialize(VectorizedCarbonRecordReader.java:112) > at org.apache.carbondata.spark.rdd.CarbonScanRDD.compute( > CarbonScanRDD.scala:204) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) > at org.apache.spark.rdd.MapPartitionsRDD.compute( > MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) > at org.apache.spark.rdd.MapPartitionsRDD.compute( > MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) > at org.apache.spark.scheduler.ShuffleMapTask.runTask( > ShuffleMapTask.scala:96) > at org.apache.spark.scheduler.ShuffleMapTask.runTask( > ShuffleMapTask.scala:53) > at org.apache.spark.scheduler.Task.run(Task.scala:99) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:322) > at java.util.concurrent.ThreadPoolExecutor.runWorker( > ThreadPoolExecutor.java:1142) > at java.util.concurrent.ThreadPoolExecutor$Worker.run( > ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > Caused by: java.util.concurrent.ExecutionException: java.io.EOFException: > Cannot seek to negative offset > at java.util.concurrent.FutureTask.report(FutureTask.java:122) > at java.util.concurrent.FutureTask.get(FutureTask.java:192) > at org.apache.carbondata.core.datastore.BlockIndexStore.fillLoadedBlocks( > BlockIndexStore.java:254) > ... 21 more > Caused by: java.io.EOFException: Cannot seek to negative offset > at org.apache.hadoop.hdfs.DFSInputStream.seek(DFSInputStream.java:1527) > at org.apache.hadoop.fs.FSDataInputStream.seek(FSDataInputStream.java:62) > at org.apache.carbondata.core.datastore.impl.DFSFileHolderImpl.readLong( > DFSFileHolderImpl.java:129) > at org.apache.carbondata.core.util.CarbonUtil. > calculateMetaSize(CarbonUtil.java:943) > at org.apache.carbondata.core.datastore.AbstractBlockIndexStoreCache. > checkAndLoadTableBlocks(AbstractBlockIndexStoreCache.java:93) > at org.apache.carbondata.core.datastore.BlockIndexStore. > loadBlock(BlockIndexStore.java:304) > at org.apache.carbondata.core.datastore.BlockIndexStore.get( > BlockIndexStore.java:109) > at org.apache.carbondata.core.datastore.BlockIndexStore$ > BlockLoaderThread.call(BlockIndexStore.java:294) > at org.apache.carbondata.core.datastore.BlockIndexStore$ > BlockLoaderThread.call(BlockIndexStore.java:284) > at java.util.concurrent.FutureTask.run(FutureTask.java:266) > ... 3 more > > [Stage 14:===============> (78 + 4) > / 281]17/10/18 17:46:47 ERROR scheduler.TaskSetManager: Task 136 in stage > 14.0 failed 10 times; aborting job > 17/10/18 17:46:48 WARN scheduler.TaskSetManager: Lost task 266.8 in stage > 14.0 (TID 935, HDD015, executor 57): TaskKilled (killed intentionally) > 17/10/18 17:46:48 WARN scheduler.TaskSetManager: Lost task 39.0 in stage > 14.0 (TID 929, HDD015, executor 57): TaskKilled (killed intentionally) > 17/10/18 17:46:48 WARN scheduler.TaskSetManager: Lost task 38.0 in stage > 14.0 (TID 926, HDD015, executor 57): TaskKilled (killed intentionally) > 17/10/18 17:46:48 WARN spark.ExecutorAllocationManager: No stages are > running, but numRunningTasks != 0 > 17/10/18 17:46:48 WARN scheduler.TaskSetManager: Lost task 40.0 in stage > 14.0 (TID 933, HDD015, executor 57): TaskKilled (killed intentionally) > org.apache.spark.SparkException: Job aborted due to stage failure: Task > 136 in stage 14.0 failed 10 times, most recent failure: Lost task 136.9 in > stage 14.0 (TID 934, HDD015, executor 57): org.apache.carbondata.core. > datastore.exception.IndexBuilderException: Block B-tree loading failed > at org.apache.carbondata.core.datastore.BlockIndexStore.fillLoadedBlocks( > BlockIndexStore.java:264) > at org.apache.carbondata.core.datastore.BlockIndexStore. > getAll(BlockIndexStore.java:189) > at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor. > initQuery(AbstractQueryExecutor.java:131) > at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor. > getBlockExecutionInfos(AbstractQueryExecutor.java:186) > at org.apache.carbondata.core.scan.executor.impl. > VectorDetailQueryExecutor.execute(VectorDetailQueryExecutor.java:36) > at org.apache.carbondata.spark.vectorreader.VectorizedCarbonRecordReader. > initialize(VectorizedCarbonRecordReader.java:112) > at org.apache.carbondata.spark.rdd.CarbonScanRDD.compute( > CarbonScanRDD.scala:204) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) > at org.apache.spark.rdd.MapPartitionsRDD.compute( > MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) > at org.apache.spark.rdd.MapPartitionsRDD.compute( > MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) > at org.apache.spark.scheduler.ShuffleMapTask.runTask( > ShuffleMapTask.scala:96) > at org.apache.spark.scheduler.ShuffleMapTask.runTask( > ShuffleMapTask.scala:53) > at org.apache.spark.scheduler.Task.run(Task.scala:99) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:322) > at java.util.concurrent.ThreadPoolExecutor.runWorker( > ThreadPoolExecutor.java:1142) > at java.util.concurrent.ThreadPoolExecutor$Worker.run( > ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > Caused by: java.util.concurrent.ExecutionException: java.io.EOFException: > Cannot seek to negative offset > at java.util.concurrent.FutureTask.report(FutureTask.java:122) > at java.util.concurrent.FutureTask.get(FutureTask.java:192) > at org.apache.carbondata.core.datastore.BlockIndexStore.fillLoadedBlocks( > BlockIndexStore.java:254) > ... 21 more > Caused by: java.io.EOFException: Cannot seek to negative offset > at org.apache.hadoop.hdfs.DFSInputStream.seek(DFSInputStream.java:1527) > at org.apache.hadoop.fs.FSDataInputStream.seek(FSDataInputStream.java:62) > at org.apache.carbondata.core.datastore.impl.DFSFileHolderImpl.readLong( > DFSFileHolderImpl.java:129) > at org.apache.carbondata.core.util.CarbonUtil. > calculateMetaSize(CarbonUtil.java:943) > at org.apache.carbondata.core.datastore.AbstractBlockIndexStoreCache. > checkAndLoadTableBlocks(AbstractBlockIndexStoreCache.java:93) > at org.apache.carbondata.core.datastore.BlockIndexStore. > loadBlock(BlockIndexStore.java:304) > at org.apache.carbondata.core.datastore.BlockIndexStore.get( > BlockIndexStore.java:109) > at org.apache.carbondata.core.datastore.BlockIndexStore$ > BlockLoaderThread.call(BlockIndexStore.java:294) > at org.apache.carbondata.core.datastore.BlockIndexStore$ > BlockLoaderThread.call(BlockIndexStore.java:284) > at java.util.concurrent.FutureTask.run(FutureTask.java:266) > ... 3 more > > Driver stacktrace: > at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$ > scheduler$DAGScheduler$$failJobAndIndependentStages( > DAGScheduler.scala:1435) > at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply( > DAGScheduler.scala:1423) > at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply( > DAGScheduler.scala:1422) > at scala.collection.mutable.ResizableArray$class.foreach( > ResizableArray.scala:59) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) > at org.apache.spark.scheduler.DAGScheduler.abortStage( > DAGScheduler.scala:1422) > at org.apache.spark.scheduler.DAGScheduler$$anonfun$ > handleTaskSetFailed$1.apply(DAGScheduler.scala:802) > at org.apache.spark.scheduler.DAGScheduler$$anonfun$ > handleTaskSetFailed$1.apply(DAGScheduler.scala:802) > at scala.Option.foreach(Option.scala:257) > at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed( > DAGScheduler.scala:802) > at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop. > doOnReceive(DAGScheduler.scala:1650) > at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop. > onReceive(DAGScheduler.scala:1605) > at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop. > onReceive(DAGScheduler.scala:1594) > at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) > at org.apache.spark.scheduler.DAGScheduler.runJob( > DAGScheduler.scala:628) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1925) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1938) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1951) > at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala: > 333) > at org.apache.spark.sql.execution.CollectLimitExec. > executeCollect(limit.scala:38) > at org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$ > Dataset$$execute$1$1.apply(Dataset.scala:2386) > at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId( > SQLExecution.scala:57) > at org.apache.spark.sql.Dataset.withNewExecutionId(Dataset.scala:2788) > at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$ > execute$1(Dataset.scala:2385) > at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$ > collect(Dataset.scala:2392) > at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset. > scala:2128) > at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset. > scala:2127) > at org.apache.spark.sql.Dataset.withTypedCallback(Dataset.scala:2818) > at org.apache.spark.sql.Dataset.head(Dataset.scala:2127) > at org.apache.spark.sql.Dataset.take(Dataset.scala:2342) > at org.apache.spark.sql.Dataset.showString(Dataset.scala:248) > at org.apache.spark.sql.Dataset.show(Dataset.scala:638) > at org.apache.spark.sql.Dataset.show(Dataset.scala:597) > at org.apache.spark.sql.Dataset.show(Dataset.scala:606) > ... 50 elided > Caused by: org.apache.carbondata.core.datastore.exception.IndexBuilderException: > Block B-tree loading failed > at org.apache.carbondata.core.datastore.BlockIndexStore. > fillLoadedBlocks(BlockIndexStore.java:264) > at org.apache.carbondata.core.datastore.BlockIndexStore. > getAll(BlockIndexStore.java:189) > at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor. > initQuery(AbstractQueryExecutor.java:131) > at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor. > getBlockExecutionInfos(AbstractQueryExecutor.java:186) > at org.apache.carbondata.core.scan.executor.impl. > VectorDetailQueryExecutor.execute(VectorDetailQueryExecutor.java:36) > at org.apache.carbondata.spark.vectorreader. > VectorizedCarbonRecordReader.initialize(VectorizedCarbonRecordReader. > java:112) > at org.apache.carbondata.spark.rdd.CarbonScanRDD.compute( > CarbonScanRDD.scala:204) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) > at org.apache.spark.rdd.MapPartitionsRDD.compute( > MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) > at org.apache.spark.rdd.MapPartitionsRDD.compute( > MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) > at org.apache.spark.scheduler.ShuffleMapTask.runTask( > ShuffleMapTask.scala:96) > at org.apache.spark.scheduler.ShuffleMapTask.runTask( > ShuffleMapTask.scala:53) > at org.apache.spark.scheduler.Task.run(Task.scala:99) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:322) > at java.util.concurrent.ThreadPoolExecutor.runWorker( > ThreadPoolExecutor.java:1142) > at java.util.concurrent.ThreadPoolExecutor$Worker.run( > ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > Caused by: java.util.concurrent.ExecutionException: java.io.EOFException: > Cannot seek to negative offset > at java.util.concurrent.FutureTask.report(FutureTask.java:122) > at java.util.concurrent.FutureTask.get(FutureTask.java:192) > at org.apache.carbondata.core.datastore.BlockIndexStore. > fillLoadedBlocks(BlockIndexStore.java:254) > ... 21 more > Caused by: java.io.EOFException: Cannot seek to negative offset > at org.apache.hadoop.hdfs.DFSInputStream.seek(DFSInputStream.java:1527) > at org.apache.hadoop.fs.FSDataInputStream.seek( > FSDataInputStream.java:62) > at org.apache.carbondata.core.datastore.impl.DFSFileHolderImpl.readLong( > DFSFileHolderImpl.java:129) > at org.apache.carbondata.core.util.CarbonUtil. > calculateMetaSize(CarbonUtil.java:943) > at org.apache.carbondata.core.datastore.AbstractBlockIndexStoreCache. > checkAndLoadTableBlocks(AbstractBlockIndexStoreCache.java:93) > at org.apache.carbondata.core.datastore.BlockIndexStore. > loadBlock(BlockIndexStore.java:304) > at org.apache.carbondata.core.datastore.BlockIndexStore.get( > BlockIndexStore.java:109) > at org.apache.carbondata.core.datastore.BlockIndexStore$ > BlockLoaderThread.call(BlockIndexStore.java:294) > at org.apache.carbondata.core.datastore.BlockIndexStore$ > BlockLoaderThread.call(BlockIndexStore.java:284) > at java.util.concurrent.FutureTask.run(FutureTask.java:266) > ... 3 more > > > yixu2001 > |
Free forum by Nabble | Edit this page |