I have disable inverted list in all columns, but it still 50% larger than
parquet.
31G(parquet) vs 48G (carbondata) with 424,000,000 records.
Carbondata version is 1.3
CREATE TABLE growth.carbondata_m_device_distinct (
A_id bigint,
app_name string,
app_id int,
platform string,
is_F smallint,
F_id bigint,
E_id bigint,
ut smallint,
os string,
os_api int,
os_version string,
rom string,
F_id string,
C_id string,
D_id string,
mc string,
model string,
resolution string,
display_density string,
sdk_version int,
carrier string,
is_BBB smallint,
language string,
timezone int,
channel string,
access string,
package string,
display_name string,
app_version string,
version_code int,
client_ip string,
city_id int,
city_name string,
is_AAA smallint,
B_date_str string,
duration int,
A_times int,
A_date_str string,
activation_channel string,
B_id bigint
) PARTITIONED BY (p_date string)
STORED BY 'carbondata'
TBLPROPERTIES('NO_INVERTED_INDEX'='A_id,app_name,app_id,platform,is_F,F_id,E_id,ut,os,os_api,os_version,rom,F_id,C_id,D_id,mc,model,resolution,display_density,sdk_version,carrier,is_BBB,language,timezone,channel,access,package,display_name,app_version,version_code,client_ip,city_id,city_name,is_AAA,B_date_str,duration,A_times,A_date_str,activation_channel,B_id','DICTIONARY_INCLUDE'='app_name,platform,os_version,package,city_name,activation_channel,app_version,sdk_version,carrier,channel,display_name,platform,os,version_code,display_density,access,app_id,package,timezone,language,is_F,is_BBB,is_AAA')
--
Sent from:
http://apache-carbondata-dev-mailing-list-archive.1130556.n5.nabble.com/