[ https://issues.apache.org/jira/browse/CARBONDATA-662?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15831750#comment-15831750 ] sounak chakraborty commented on CARBONDATA-662: ----------------------------------------------- Please reverify. In latest release the below output is proper. 1 row selected (0.204 seconds) 0: jdbc:hive2://localhost:10000> drop table uniqdata_date; +---------+--+| Result | +---------+--+ +---------+--+ No rows selected (0.215 seconds) 0: jdbc:hive2://localhost:10000> CREATE TABLE uniqdata_date (CUST_ID int,CUST_NAME String,ACTIVE_EMUI_VERSION string, DOB date, DOJ date, BIGINT_COLUMN1 bigint,BIGINT_COLUMN2 bigint,DECIMAL_COLUMN1 decimal(30,10), DECIMAL_COLUMN2 decimal(36,10),Double_COLUMN1 double, Double_COLUMN2 double,INTEGER_COLUMN1 int) STORED BY 'org.apache.carbondata.format' TBLPROPERTIES ("TABLE_BLOCKSIZE"= "256 MB"); +---------+--+ | Result | +---------+--+ +---------+--+ No rows selected (0.115 seconds) 0: jdbc:hive2://localhost:10000> LOAD DATA INPATH 'hdfs://localhost:54310/sc/one_row.csv' into table uniqdata_date OPTIONS('FILEHEADER'='CUST_ID,CUST_NAME,ACTIVE_EMUI_VERSION,DOB,DOJ,BIGINT_COLUMN1,BIGINT_COLUMN2,DECIMAL_COLUMN1,DECIMAL_COLUMN2,Double_COLUMN1,Double_COLUMN2,INTEGER_COLUMN1'); +---------+--+ | Result | +---------+--+ +---------+--+ No rows selected (0.913 seconds) 0: jdbc:hive2://localhost:10000> select count(CUST_NAME) from uniqdata_date; +------+--+| _c0 | +------+--+ | 1 | +------+--+ 1 row selected (0.237 seconds) 0: jdbc:hive2://localhost:10000> show segments for table uniqdata_date; +--------------------+----------+--------------------------+--------------------------+--+| SegmentSequenceId | Status | Load Start Time | Load End Time | +--------------------+----------+--------------------------+--------------------------+--+ | 0 | Success | 2017-01-20 19:00:48.798 | 2017-01-20 19:00:49.473 | +--------------------+----------+--------------------------+--------------------------+--+ 1 row selected (0.018 seconds) 0: jdbc:hive2://localhost:10000> LOAD DATA INPATH 'hdfs://localhost:54310/sc/one_row.csv' into table uniqdata_date OPTIONS('FILEHEADER'='CUST_ID,CUST_NAME,ACTIVE_EMUI_VERSION,DOB,DOJ,BIGINT_COLUMN1,BIGINT_COLUMN2,DECIMAL_COLUMN1,DECIMAL_COLUMN2,Double_COLUMN1,Double_COLUMN2,INTEGER_COLUMN1');+---------+--+ | Result | +---------+--+ +---------+--+ No rows selected (0.363 seconds) 0: jdbc:hive2://localhost:10000> select count(CUST_NAME) from uniqdata_date; +------+--+| _c0 | +------+--+ | 2 | +------+--+ 1 row selected (0.219 seconds) 0: jdbc:hive2://localhost:10000> show segments for table uniqdata_date; +--------------------+----------+--------------------------+--------------------------+--+ | SegmentSequenceId | Status | Load Start Time | Load End Time | +--------------------+----------+--------------------------+--------------------------+--+ | 1 | Success | 2017-01-20 19:01:01.961 | 2017-01-20 19:01:02.167 | | 0 | Success | 2017-01-20 19:00:48.798 | 2017-01-20 19:00:49.473 | +--------------------+----------+--------------------------+--------------------------+--+ 2 rows selected (0.017 seconds) 0: jdbc:hive2://localhost:10000> LOAD DATA INPATH 'hdfs://localhost:54310/sc/one_row.csv' into table uniqdata_date OPTIONS('FILEHEADER'='CUST_ID,CUST_NAME,ACTIVE_EMUI_VERSION,DOB,DOJ,BIGINT_COLUMN1,BIGINT_COLUMN2,DECIMAL_COLUMN1,DECIMAL_COLUMN2,Double_COLUMN1,Double_COLUMN2,INTEGER_COLUMN1'); +---------+--+ | Result | +---------+--+ +---------+--+ No rows selected (0.396 seconds) 0: jdbc:hive2://localhost:10000> select count(CUST_NAME) from uniqdata_date; +------+--+ | _c0 | +------+--+ | 3 | +------+--+ 1 row selected (0.22 seconds) 0: jdbc:hive2://localhost:10000> show segments for table uniqdata_date; +--------------------+----------+--------------------------+--------------------------+--+ | SegmentSequenceId | Status | Load Start Time | Load End Time | +--------------------+----------+--------------------------+--------------------------+--+ | 2 | Success | 2017-01-20 19:01:20.479 | 2017-01-20 19:01:20.734 | | 1 | Success | 2017-01-20 19:01:01.961 | 2017-01-20 19:01:02.167 | | 0 | Success | 2017-01-20 19:00:48.798 | 2017-01-20 19:00:49.473 | +--------------------+----------+--------------------------+--------------------------+--+ 3 rows selected (0.014 seconds) 0: jdbc:hive2://localhost:10000> LOAD DATA INPATH 'hdfs://localhost:54310/sc/one_row.csv' into table uniqdata_date OPTIONS('FILEHEADER'='CUST_ID,CUST_NAME,ACTIVE_EMUI_VERSION,DOB,DOJ,BIGINT_COLUMN1,BIGINT_COLUMN2,DECIMAL_COLUMN1,DECIMAL_COLUMN2,Double_COLUMN1,Double_COLUMN2,INTEGER_COLUMN1'); +---------+--+ | Result | +---------+--+ +---------+--+ No rows selected (0.357 seconds) 0: jdbc:hive2://localhost:10000> select count(CUST_NAME) from uniqdata_date; +------+--+ | _c0 | +------+--+ | 4 | +------+--+ 1 row selected (0.217 seconds) 0: jdbc:hive2://localhost:10000> show segments for table uniqdata_date; +--------------------+----------+--------------------------+--------------------------+--+ | SegmentSequenceId | Status | Load Start Time | Load End Time | +--------------------+----------+--------------------------+--------------------------+--+ | 3 | Success | 2017-01-20 19:01:30.826 | 2017-01-20 19:01:31.04 | | 2 | Success | 2017-01-20 19:01:20.479 | 2017-01-20 19:01:20.734 | | 1 | Success | 2017-01-20 19:01:01.961 | 2017-01-20 19:01:02.167 | | 0 | Success | 2017-01-20 19:00:48.798 | 2017-01-20 19:00:49.473 | +--------------------+----------+--------------------------+--------------------------+--+ 4 rows selected (0.022 seconds) 0: jdbc:hive2://localhost:10000> LOAD DATA INPATH 'hdfs://localhost:54310/sc/one_row.csv' into table uniqdata_date OPTIONS('FILEHEADER'='CUST_ID,CUST_NAME,ACTIVE_EMUI_VERSION,DOB,DOJ,BIGINT_COLUMN1,BIGINT_COLUMN2,DECIMAL_COLUMN1,DECIMAL_COLUMN2,Double_COLUMN1,Double_COLUMN2,INTEGER_COLUMN1'); +---------+--+ | Result | +---------+--+ +---------+--+ No rows selected (0.552 seconds) 0: jdbc:hive2://localhost:10000> select count(CUST_NAME) from uniqdata_date; +------+--+ | _c0 | +------+--+ | 5 | +------+--+ 1 row selected (0.233 seconds) 0: jdbc:hive2://localhost:10000> show segments for table uniqdata_date; +--------------------+----------+--------------------------+--------------------------+--+ | SegmentSequenceId | Status | Load Start Time | Load End Time | +--------------------+----------+--------------------------+--------------------------+--+ | 4 | Success | 2017-01-20 19:01:48.578 | 2017-01-20 19:01:48.899 | | 3 | Success | 2017-01-20 19:01:30.826 | 2017-01-20 19:01:31.04 | | 2 | Success | 2017-01-20 19:01:20.479 | 2017-01-20 19:01:20.734 | | 1 | Success | 2017-01-20 19:01:01.961 | 2017-01-20 19:01:02.167 | | 0 | Success | 2017-01-20 19:00:48.798 | 2017-01-20 19:00:49.473 | +--------------------+----------+--------------------------+--------------------------+--+ 5 rows selected (0.021 seconds) 0: jdbc:hive2://localhost:10000> LOAD DATA INPATH 'hdfs://localhost:54310/sc/one_row.csv' into table uniqdata_date OPTIONS('FILEHEADER'='CUST_ID,CUST_NAME,ACTIVE_EMUI_VERSION,DOB,DOJ,BIGINT_COLUMN1,BIGINT_COLUMN2,DECIMAL_COLUMN1,DECIMAL_COLUMN2,Double_COLUMN1,Double_COLUMN2,INTEGER_COLUMN1'); +---------+--+ | Result | +---------+--+ +---------+--+ No rows selected (0.895 seconds) 0: jdbc:hive2://localhost:10000> select count(CUST_NAME) from uniqdata_date; +------+--+ | _c0 | +------+--+ | 6 | +------+--+ 1 row selected (0.203 seconds) 0: jdbc:hive2://localhost:10000> show segments for table uniqdata_date; +--------------------+------------+--------------------------+--------------------------+--+ | SegmentSequenceId | Status | Load Start Time | Load End Time | +--------------------+------------+--------------------------+--------------------------+--+ | 5 | Success | 2017-01-20 19:01:59.212 | 2017-01-20 19:01:59.493 | | 4 | Success | 2017-01-20 19:01:48.578 | 2017-01-20 19:01:48.899 | | 3 | Compacted | 2017-01-20 19:01:30.826 | 2017-01-20 19:01:31.04 | | 2 | Compacted | 2017-01-20 19:01:20.479 | 2017-01-20 19:01:20.734 | | 1 | Compacted | 2017-01-20 19:01:01.961 | 2017-01-20 19:01:02.167 | | 0.1 | Success | 2017-01-20 19:01:59.545 | 2017-01-20 19:01:59.852 | | 0 | Compacted | 2017-01-20 19:00:48.798 | 2017-01-20 19:00:49.473 | +--------------------+------------+--------------------------+--------------------------+--+ 7 rows selected (0.018 seconds) 0: jdbc:hive2://localhost:10000> select count(CUST_NAME) from uniqdata_date; +------+--+ | _c0 | +------+--+ | 6 | +------+--+ 1 row selected (0.219 seconds) > Minor Compaction results of count of individual column is incorrect > ------------------------------------------------------------------- > > Key: CARBONDATA-662 > URL: https://issues.apache.org/jira/browse/CARBONDATA-662 > Project: CarbonData > Issue Type: Bug > Components: data-load > Affects Versions: 1.0.0-incubating > Reporter: Vyom Rastogi > Assignee: ravikiran > Priority: Minor > Labels: spark1.6 > > Carbon_Properties: > carbon.allowed.compaction.days = 2 > carbon.enable.auto.load.merge = true > carbon.compaction.level.threshold = 4,3 > carbon.numberof.preserve.segments = 2 > carbon.sort.file.buffer.size = 20 > carbon.leaf.node.size = 120000 > max.query.execution.time = 60 > carbon.number.of.cores.while.loading = 6 > carbon.number.of.cores = 4 > carbon.sort.size = 500000 > Steps for compaction : > 1)Create table uniqdata: > CREATE TABLE uniqdata (CUST_ID int,CUST_NAME String,ACTIVE_EMUI_VERSION string, DOB timestamp, DOJ timestamp, BIGINT_COLUMN1 bigint,BIGINT_COLUMN2 bigint,DECIMAL_COLUMN1 decimal(30,10), DECIMAL_COLUMN2 decimal(36,10),Double_COLUMN1 double, Double_COLUMN2 double,INTEGER_COLUMN1 int) STORED BY 'org.apache.carbondata.format'; > 2)load the Uniqdata: > LOAD DATA INPATH 'hdfs://hacluster/vyom/2000_UniqData.csv' into table uniqdata OPTIONS('DELIMITER'=',' , 'QUOTECHAR'='"','BAD_RECORDS_ACTION'='FORCE','FILEHEADER'='CUST_ID,CUST_NAME,ACTIVE_EMUI_VERSION,DOB,DOJ,BIGINT_COLUMN1,BIGINT_COLUMN2,DECIMAL_COLUMN1,DECIMAL_COLUMN2,Double_COLUMN1,Double_COLUMN2,INTEGER_COLUMN1'); > 3)Select the count of cust_name by filtering Cust_id > 0: jdbc:hive2://172.168.100.212:23040> select count( (CUST_NAME)) from uniqdata where CUST_ID=9000 ; > +------+--+ > | _c0 | > +------+--+ > | 1 | > +------+--+ > 4)load the Uniqdata: > LOAD DATA INPATH 'hdfs://hacluster/vyom/2000_UniqData.csv' into table uniqdata OPTIONS('DELIMITER'=',' , 'QUOTECHAR'='"','BAD_RECORDS_ACTION'='FORCE','FILEHEADER'='CUST_ID,CUST_NAME,ACTIVE_EMUI_VERSION,DOB,DOJ,BIGINT_COLUMN1,BIGINT_COLUMN2,DECIMAL_COLUMN1,DECIMAL_COLUMN2,Double_COLUMN1,Double_COLUMN2,INTEGER_COLUMN1'); > 5)Select > 0: jdbc:hive2://172.168.100.212:23040> select count( (CUST_NAME)) from uniqdata where CUST_ID=9000 ; > +------+--+ > | _c0 | > +------+--+ > | 2 | > +------+--+ > 6)Load into uniqdata > 0: jdbc:hive2://172.168.100.212:23040> LOAD DATA INPATH 'hdfs://hacluster/vyom/2000_UniqData.csv' into table uniqdata OPTIONS('DELIMITER'=',' , 'QUOTECHAR'='"','BAD_RECORDS_ACTION'='FORCE','FILEHEADER'='CUST_ID,CUST_NAME,ACTIVE_EMUI_VERSION,DOB,DOJ,BIGINT_COLUMN1,BIGINT_COLUMN2,DECIMAL_COLUMN1,DECIMAL_COLUMN2,Double_COLUMN1,Double_COLUMN2,INTEGER_COLUMN1'); > +---------+--+ > | Result | > +---------+--+ > +---------+--+ > No rows selected (0.327 seconds) > 7) > 0: jdbc:hive2://172.168.100.212:23040> select count( (CUST_NAME)) from uniqdata where CUST_ID=9000 ; > +------+--+ > | _c0 | > +------+--+ > | 3 | > +------+--+ > 8) > 0: jdbc:hive2://172.168.100.212:23040> LOAD DATA INPATH 'hdfs://hacluster/vyom/2000_UniqData.csv' into table uniqdata OPTIONS('DELIMITER'=',' , 'QUOTECHAR'='"','BAD_RECORDS_ACTION'='FORCE','FILEHEADER'='CUST_ID,CUST_NAME,ACTIVE_EMUI_VERSION,DOB,DOJ,BIGINT_COLUMN1,BIGINT_COLUMN2,DECIMAL_COLUMN1,DECIMAL_COLUMN2,Double_COLUMN1,Double_COLUMN2,INTEGER_COLUMN1'); > +---------+--+ > | Result | > +---------+--+ > +---------+--+ > 9)Select the Count > 0: jdbc:hive2://172.168.100.212:23040> select count( (CUST_NAME)) from uniqdata where CUST_ID=9000 ; > +------+--+ > | _c0 | > +------+--+ > | 4 | > +------+--+ > 1 row selected (5.194 seconds) > 10)Load the data > 0: jdbc:hive2://172.168.100.212:23040> LOAD DATA INPATH 'hdfs://hacluster/vyom/2000_UniqData.csv' into table uniqdata OPTIONS('DELIMITER'=',' , 'QUOTECHAR'='"','BAD_RECORDS_ACTION'='FORCE','FILEHEADER'='CUST_ID,CUST_NAME,ACTIVE_EMUI_VERSION,DOB,DOJ,BIGINT_COLUMN1,BIGINT_COLUMN2,DECIMAL_COLUMN1,DECIMAL_COLUMN2,Double_COLUMN1,Double_COLUMN2,INTEGER_COLUMN1'); > +---------+--+ > | Result | > +---------+--+ > +---------+--+ > No rows selected (0.342 seconds) > 11)Select into cust_name > 0: jdbc:hive2://172.168.100.212:23040> select count( (CUST_NAME)) from uniqdata where CUST_ID=9000 ; > +------+--+ > | _c0 | > +------+--+ > | 5 | > +------+--+ > 1 row selected (5.19 seconds) > 12)Show Segments for table uniqdata; > +--------------------+------------------+--------------------------+--------------------------+--+ > | SegmentSequenceId | Status | Load Start Time | Load End Time | > +--------------------+------------------+--------------------------+--------------------------+--+ > | 5 | Partial Success | 2017-01-19 16:33:14.006 | 2017-01-19 16:33:14.186 | > | 4 | Partial Success | 2017-01-19 16:32:49.484 | 2017-01-19 16:32:49.691 | > | 3 | Compacted | 2017-01-19 16:32:41.044 | 2017-01-19 16:32:41.252 | > | 2 | Compacted | 2017-01-19 16:32:36.952 | 2017-01-19 16:32:37.159 | > | 1 | Compacted | 2017-01-19 16:32:33.251 | 2017-01-19 16:32:33.447 | > | 0.1 | Success | 2017-01-19 16:33:14.205 | 2017-01-19 16:33:14.531 | > | 0 | Compacted | 2017-01-19 16:32:08.457 | 2017-01-19 16:32:08.657 | > +--------------------+------------------+--------------------------+--------------------------+--+ > 13)Select the cust_name by Cust_id ideally Count should be 6 but it was coming as 14 > 0: jdbc:hive2://172.168.100.212:23040> select count( (CUST_NAME)) from uniqdata where CUST_ID=9000 ; > +------+--+ > | _c0 | > +------+--+ > | 14 | > +------+--+ -- This message was sent by Atlassian JIRA (v6.3.4#6332) |
Free forum by Nabble | Edit this page |