[ https://issues.apache.org/jira/browse/CARBONDATA-664?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Harsh Sharma updated CARBONDATA-664: ------------------------------------ Summary: Select queries fail when BAD_RECORDS_ACTION as FORCED is used in load query. (was: Select queries fail when BAD_RECORDS_ACTION is used in load query.) > Select queries fail when BAD_RECORDS_ACTION as FORCED is used in load query. > ---------------------------------------------------------------------------- > > Key: CARBONDATA-664 > URL: https://issues.apache.org/jira/browse/CARBONDATA-664 > Project: CarbonData > Issue Type: Bug > Components: data-query > Affects Versions: 1.0.0-incubating > Environment: Spark 1.6 > Reporter: Harsh Sharma > Labels: bug > Attachments: 100_olap_C20.csv, Driver Logs, Executor Logs > > > Below scenario is working on Spark 2.1, but not on Spark 1.6 > create table VMALL_DICTIONARY_INCLUDE (imei string,deviceInformationId int,MAC string,deviceColor string,device_backColor string,modelId string,marketName string,AMSize string,ROMSize string,CUPAudit string,CPIClocked string,series string,productionDate timestamp,bomCode string,internalModels string, deliveryTime string, channelsId string, channelsName string , deliveryAreaId string, deliveryCountry string, deliveryProvince string, deliveryCity string,deliveryDistrict string, deliveryStreet string, oxSingleNumber string, ActiveCheckTime string, ActiveAreaId string, ActiveCountry string, ActiveProvince string, Activecity string, ActiveDistrict string, ActiveStreet string, ActiveOperatorId string, Active_releaseId string, Active_EMUIVersion string, Active_operaSysVersion string, Active_BacVerNumber string, Active_BacFlashVer string, Active_webUIVersion string, Active_webUITypeCarrVer string,Active_webTypeDataVerNumber string, Active_operatorsVersion string, Active_phonePADPartitionedVersions string, Latest_YEAR int, Latest_MONTH int, Latest_DAY Decimal(30,10), Latest_HOUR string, Latest_areaId string, Latest_country string, Latest_province string, Latest_city string, Latest_district string, Latest_street string, Latest_releaseId string, Latest_EMUIVersion string, Latest_operaSysVersion string, Latest_BacVerNumber string, Latest_BacFlashVer string, Latest_webUIVersion string, Latest_webUITypeCarrVer string, Latest_webTypeDataVerNumber string, Latest_operatorsVersion string, Latest_phonePADPartitionedVersions string, Latest_operatorId string, gamePointDescription string,gamePointId double,contractNumber BigInt) STORED BY 'org.apache.carbondata.format' TBLPROPERTIES('DICTIONARY_INCLUDE'='imei,deviceInformationId,productionDate,gamePointId,Latest_DAY,contractNumber'); > LOAD DATA INPATH 'hdfs://hadoop-master:54311/data/100_olap_C20.csv' INTO table VMALL_DICTIONARY_INCLUDE options('DELIMITER'=',','QUOTECHAR'='"','BAD_RECORDS_ACTION'='FORCE','FILEHEADER'='imei,deviceInformationId,MAC,deviceColor,device_backColor,modelId,marketName,AMSize,ROMSize,CUPAudit,CPIClocked,series,productionDate,bomCode,internalModels,deliveryTime,channelsId,channelsName,deliveryAreaId,deliveryCountry,deliveryProvince,deliveryCity,deliveryDistrict,deliveryStreet,oxSingleNumber,contractNumber,ActiveCheckTime,ActiveAreaId,ActiveCountry,ActiveProvince,Activecity,ActiveDistrict,ActiveStreet,ActiveOperatorId,Active_releaseId,Active_EMUIVersion,Active_operaSysVersion,Active_BacVerNumber,Active_BacFlashVer,Active_webUIVersion,Active_webUITypeCarrVer,Active_webTypeDataVerNumber,Active_operatorsVersion,Active_phonePADPartitionedVersions,Latest_YEAR,Latest_MONTH,Latest_DAY,Latest_HOUR,Latest_areaId,Latest_country,Latest_province,Latest_city,Latest_district,Latest_street,Latest_releaseId,Latest_EMUIVersion,Latest_operaSysVersion,Latest_BacVerNumber,Latest_BacFlashVer,Latest_webUIVersion,Latest_webUITypeCarrVer,Latest_webTypeDataVerNumber,Latest_operatorsVersion,Latest_phonePADPartitionedVersions,Latest_operatorId,gamePointId,gamePointDescription'); > select sum(deviceinformationId) from VMALL_DICTIONARY_INCLUDE where deviceColor ='5Device Color' and modelId != '109' or Latest_DAY > '1234567890123540.0000000000' and contractNumber == '92233720368547800' or Active_operaSysVersion like 'Operating System Version' and gamePointId <=> '8.1366141918611E39' and deviceInformationId < '1000000' and productionDate not like '2016-07-01' and imei is null and Latest_HOUR is not null and channelsId <= '7' and Latest_releaseId >= '1' and Latest_MONTH between 6 and 8 and Latest_YEAR not between 2016 and 2017 and Latest_HOUR RLIKE '12' and gamePointDescription REGEXP 'Site' and imei in ('1AA1','1AA100','1AA10','1AA1000','1AA10000','1AA100000','1AA1000000','1AA100001','1AA100002','1AA100004','','NULL') and Active_BacVerNumber not in ('Background version number1','','null'); > This scenario results in the following exception, > Error: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 48.0 failed 4 times, most recent failure: Lost task 0.3 in stage 48.0 (TID 152, hadoop-master): java.lang.RuntimeException: java.util.concurrent.ExecutionException: java.lang.NullPointerException > at org.apache.carbondata.scan.result.iterator.DetailQueryResultIterator.next(DetailQueryResultIterator.java:65) > at org.apache.carbondata.scan.result.iterator.DetailQueryResultIterator.next(DetailQueryResultIterator.java:35) > at org.apache.carbondata.scan.result.iterator.ChunkRowIterator.<init>(ChunkRowIterator.java:43) > at org.apache.carbondata.hadoop.CarbonRecordReader.initialize(CarbonRecordReader.java:81) > at org.apache.carbondata.spark.rdd.CarbonScanRDD.compute(CarbonScanRDD.scala:194) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) > at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) > at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) > at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) > at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) > at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) > at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) > at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) > at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73) > at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) > at org.apache.spark.scheduler.Task.run(Task.scala:89) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:227) > at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > Caused by: java.util.concurrent.ExecutionException: java.lang.NullPointerException > at java.util.concurrent.FutureTask.report(FutureTask.java:122) > at java.util.concurrent.FutureTask.get(FutureTask.java:192) > at org.apache.carbondata.scan.result.iterator.DetailQueryResultIterator.next(DetailQueryResultIterator.java:52) > ... 34 more > Caused by: java.lang.NullPointerException > at org.apache.carbondata.scan.result.AbstractScannedResult.getDictionaryKeyIntegerArray(AbstractScannedResult.java:187) > at org.apache.carbondata.scan.result.impl.FilterQueryScannedResult.getDictionaryKeyIntegerArray(FilterQueryScannedResult.java:53) > at org.apache.carbondata.scan.collector.impl.DictionaryBasedResultCollector.collectData(DictionaryBasedResultCollector.java:111) > at org.apache.carbondata.scan.processor.impl.DataBlockIteratorImpl.next(DataBlockIteratorImpl.java:52) > at org.apache.carbondata.scan.processor.impl.DataBlockIteratorImpl.next(DataBlockIteratorImpl.java:33) > at org.apache.carbondata.scan.result.iterator.DetailQueryResultIterator$1.call(DetailQueryResultIterator.java:78) > at org.apache.carbondata.scan.result.iterator.DetailQueryResultIterator$1.call(DetailQueryResultIterator.java:72) > at java.util.concurrent.FutureTask.run(FutureTask.java:266) > ... 3 more > Driver stacktrace: (state=,code=0) -- This message was sent by Atlassian JIRA (v6.3.4#6332) |
Free forum by Nabble | Edit this page |