[jira] [Commented] (CARBONDATA-864) After adding column using alter query, when we put any column in "Dictionary Exclude" then perform select query on that column then it will throws an exception.

classic Classic list List threaded Threaded
1 message Options
Reply | Threaded
Open this post in threaded view
|

[jira] [Commented] (CARBONDATA-864) After adding column using alter query, when we put any column in "Dictionary Exclude" then perform select query on that column then it will throws an exception.

Akash R Nilugal (Jira)

    [ https://issues.apache.org/jira/browse/CARBONDATA-864?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15960416#comment-15960416 ]

SWATI RAO commented on CARBONDATA-864:
--------------------------------------

When we put string datatype in dictionary_exclude it will display "NULL" values when perform select query.

CREATE TABLE uniqdata (CUST_ID int,CUST_NAME String,ACTIVE_EMUI_VERSION string, DOB timestamp, DOJ timestamp, BIGINT_COLUMN1 bigint,BIGINT_COLUMN2 bigint,DECIMAL_COLUMN1 decimal(30,10), DECIMAL_COLUMN2 decimal(36,10),Double_COLUMN1 double, Double_COLUMN2 double,INTEGER_COLUMN1 int) STORED BY 'org.apache.carbondata.format' TBLPROPERTIES ("TABLE_BLOCKSIZE"= "256 MB");



LOAD DATA INPATH 'HDFS_URL/BabuStore/Data/uniqdata/2000_UniqData.csv' into table uniqdata OPTIONS('DELIMITER'=',' , 'QUOTECHAR'='"','BAD_RECORDS_ACTION'='FORCE','FILEHEADER'='CUST_ID,CUST_NAME,ACTIVE_EMUI_VERSION,DOB,DOJ,BIGINT_COLUMN1,BIGINT_COLUMN2,DECIMAL_COLUMN1,DECIMAL_COLUMN2,Double_COLUMN1,Double_COLUMN2,INTEGER_COLUMN1');LOAD DATA INPATH 'HDFS_URL/BabuStore/Data/uniqdata/2000_UniqData.csv' into table uniqdata OPTIONS('DELIMITER'=',' , 'QUOTECHAR'='"','BAD_RECORDS_ACTION'='FORCE','FILEHEADER'='CUST_ID,CUST_NAME,ACTIVE_EMUI_VERSION,DOB,DOJ,BIGINT_COLUMN1,BIGINT_COLUMN2,DECIMAL_COLUMN1,DECIMAL_COLUMN2,Double_COLUMN1,Double_COLUMN2,INTEGER_COLUMN1');


ALTER TABLE uniqdata RENAME TO uniqdata1;


alter table uniqdata1 drop columns(ACTIVE_EMUI_VERSION);


alter table uniqdata1 add columns(ACTIVE_EMUI_VERSION string) TBLPROPERTIES('DICTIONARY_EXCLUDE'='ACTIVE_EMUI_VERSION', 'DEFAULT.VALUE.(ACTIVE_EMUI_VERSION'='abcd');


select distinct(ACTIVE_EMUI_VERSION) from uniqdata1 ;

Results:

0: jdbc:hive2://192.168.2.126:10000> alter table uniqdata1 drop columns(ACTIVE_EMUI_VERSION);
Error: java.lang.RuntimeException: Column active_emui_version does not exists in the table default.uniqdata1 (state=,code=0)
0: jdbc:hive2://192.168.2.126:10000>
0: jdbc:hive2://192.168.2.126:10000> alter table uniqdata1 add columns(ACTIVE_EMUI_VERSION string) TBLPROPERTIES('DICTIONARY_EXCLUDE'='ACTIVE_EMUI_VERSION', 'DEFAULT.VALUE.(ACTIVE_EMUI_VERSION'='abcd');
+---------+--+
| Result  |
+---------+--+
+---------+--+
No rows selected (0.308 seconds)
0: jdbc:hive2://192.168.2.126:10000> select distinct(ACTIVE_EMUI_VERSION) from uniqdata1 ;
+----------------------+--+
| ACTIVE_EMUI_VERSION  |
+----------------------+--+
| NULL                 |
+----------------------+--+
1 row selected (0.88 seconds)
0: jdbc:hive2://192.168.2.126:10000>


> After adding column using alter query, when we put any column in "Dictionary Exclude" then perform select query on that column then it will throws an exception.
> ----------------------------------------------------------------------------------------------------------------------------------------------------------------
>
>                 Key: CARBONDATA-864
>                 URL: https://issues.apache.org/jira/browse/CARBONDATA-864
>             Project: CarbonData
>          Issue Type: Bug
>    Affects Versions: 1.1.0-incubating
>         Environment: Spark2.1
>            Reporter: SWATI RAO
>         Attachments: 2000_UniqData.csv, error.png
>
>
> CREATE TABLE uniqdata (CUST_ID int,CUST_NAME String,ACTIVE_EMUI_VERSION string, DOB timestamp, DOJ timestamp, BIGINT_COLUMN1 bigint,BIGINT_COLUMN2 bigint,DECIMAL_COLUMN1 decimal(30,10), DECIMAL_COLUMN2 decimal(36,10),Double_COLUMN1 double, Double_COLUMN2 double,INTEGER_COLUMN1 int) STORED BY 'org.apache.carbondata.format' TBLPROPERTIES ("TABLE_BLOCKSIZE"= "256 MB");
> LOAD DATA INPATH 'HDFS_URL/BabuStore/Data/uniqdata/2000_UniqData.csv' into table uniqdata OPTIONS('DELIMITER'=',' , 'QUOTECHAR'='"','BAD_RECORDS_ACTION'='FORCE','FILEHEADER'='CUST_ID,CUST_NAME,ACTIVE_EMUI_VERSION,DOB,DOJ,BIGINT_COLUMN1,BIGINT_COLUMN2,DECIMAL_COLUMN1,DECIMAL_COLUMN2,Double_COLUMN1,Double_COLUMN2,INTEGER_COLUMN1');
> ALTER TABLE uniqdata RENAME TO uniqdata1;
> alter table uniqdata1 add columns(dict int) TBLPROPERTIES('DICTIONARY_INCLUDE'='dict','DEFAULT.VALUE.dict'= '9999');
> select distinct(dict) from uniqdata1 ;
> when we perform select query on "DICTIONARY_INCLUDE" it is working
> but when we perform select query on "DICTIONARY_EXCLUDE" then it will throws an exception:
> alter table uniqdata1 add columns(nodict string) TBLPROPERTIES('DICTIONARY_EXCLUDE'='nodict', 'DEFAULT.VALUE.NoDict'= 'abcd');
> select distinct(nodict) from uniqdata1 ;
> 0: jdbc:hive2://192.168.2.126:10000> select distinct(nodict) from uniqdata1 ;
> Error: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 21.0 failed 1 times, most recent failure: Lost task 0.0 in stage 21.0 (TID 419, localhost, executor driver): java.lang.ArrayIndexOutOfBoundsException: 4186
> at org.apache.spark.sql.execution.vectorized.OnHeapColumnVector.putByteArray(OnHeapColumnVector.java:401)
> at org.apache.spark.sql.execution.vectorized.ColumnVector.putByteArray(ColumnVector.java:569)
> at org.apache.carbondata.spark.vectorreader.ColumnarVectorWrapper.putBytes(ColumnarVectorWrapper.java:77)
> at org.apache.carbondata.spark.vectorreader.ColumnarVectorWrapper.putBytes(ColumnarVectorWrapper.java:83)
> at org.apache.carbondata.core.scan.collector.impl.RestructureBasedVectorResultCollector.fillNoDictionaryData(RestructureBasedVectorResultCollector.java:167)
> at org.apache.carbondata.core.scan.collector.impl.RestructureBasedVectorResultCollector.fillDataForNonExistingDimensions(RestructureBasedVectorResultCollector.java:122)
> at org.apache.carbondata.core.scan.collector.impl.RestructureBasedVectorResultCollector.collectVectorBatch(RestructureBasedVectorResultCollector.java:97)
> at org.apache.carbondata.core.scan.processor.impl.DataBlockIteratorImpl.processNextBatch(DataBlockIteratorImpl.java:65)
> at org.apache.carbondata.core.scan.result.iterator.VectorDetailQueryResultIterator.processNextBatch(VectorDetailQueryResultIterator.java:46)
> at org.apache.carbondata.spark.vectorreader.VectorizedCarbonRecordReader.nextBatch(VectorizedCarbonRecordReader.java:246)
> at org.apache.carbondata.spark.vectorreader.VectorizedCarbonRecordReader.nextKeyValue(VectorizedCarbonRecordReader.java:140)
> at org.apache.carbondata.spark.rdd.CarbonScanRDD$$anon$1.hasNext(CarbonScanRDD.scala:222)
> at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.scan_nextBatch$(Unknown Source)
> at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.agg_doAggregateWithKeys$(Unknown Source)
> at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.processNext(Unknown Source)
> at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
> at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$8$$anon$1.hasNext(WholeStageCodegenExec.scala:377)
> at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
> at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:126)
> at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96)
> at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53)
> at org.apache.spark.scheduler.Task.run(Task.scala:99)
> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:282)
> at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
> at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)