[ https://issues.apache.org/jira/browse/CARBONDATA-2041?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] anubhav tarar reassigned CARBONDATA-2041: ----------------------------------------- Assignee: anubhav tarar > Not able to load data into a partitioned table using insert overwrite > ---------------------------------------------------------------------- > > Key: CARBONDATA-2041 > URL: https://issues.apache.org/jira/browse/CARBONDATA-2041 > Project: CarbonData > Issue Type: Bug > Components: data-load > Affects Versions: 1.3.0 > Environment: spark 2.1 > Reporter: Vandana Yadav > Assignee: anubhav tarar > Priority: Major > Attachments: 2000_UniqData.csv > > > Not able to load data into a partitioned table using insert overwrite > Steps to reproduce: > 1) Create Hive table and load data in it: > a) CREATE TABLE uniqdata_hive (CUST_ID int,CUST_NAME String,ACTIVE_EMUI_VERSION string, DOB timestamp, DOJ timestamp, BIGINT_COLUMN1 bigint,BIGINT_COLUMN2 bigint,DECIMAL_COLUMN1 decimal(30,10), DECIMAL_COLUMN2 decimal(36,10),Double_COLUMN1 double, Double_COLUMN2 double, INTEGER_COLUMN1 int)ROW FORMAT DELIMITED FIELDS TERMINATED BY ','; > b) LOAD DATA LOCAL INPATH '/home/knoldus/Desktop/csv/TestData/Data/uniqdata/2000_UniqData.csv' into table UNIQDATA_HIVE; > 2) Create a partitioned table(hive and carbon both): > a) CREATE TABLE uniqdata_string(CUST_ID int,CUST_NAME String,DOB timestamp,DOJ timestamp, BIGINT_COLUMN1 bigint,BIGINT_COLUMN2 bigint,DECIMAL_COLUMN1 decimal(30,10),DECIMAL_COLUMN2 decimal(36,10),Double_COLUMN1 double, Double_COLUMN2 double,INTEGER_COLUMN1 int) PARTITIONED BY(ACTIVE_EMUI_VERSION string) STORED BY 'org.apache.carbondata.format' TBLPROPERTIES ('TABLE_BLOCKSIZE'= '256 MB'); > b) CREATE TABLE uniqdata_hive_partition (CUST_ID int,CUST_NAME String, DOB timestamp, DOJ timestamp, BIGINT_COLUMN1 bigint,BIGINT_COLUMN2 bigint,DECIMAL_COLUMN1 decimal(30,10), DECIMAL_COLUMN2 decimal(36,10),Double_COLUMN1 double, Double_COLUMN2 double, INTEGER_COLUMN1 int) partitioned by (active_emui_version string)ROW FORMAT DELIMITED FIELDS TERMINATED BY ','; > > 3) Load data into the partitioned table using insert overwrite: > a) load into the hive_partitioned table: > insert overwrite table uniqdata_hive_partition partition (active_emui_version) select CUST_ID,CUST_NAME, DOB, DOJ, BIGINT_COLUMN1,BIGINT_COLUMN2,DECIMAL_COLUMN1, DECIMAL_COLUMN2,Double_COLUMN1, Double_COLUMN2, INTEGER_COLUMN1,active_emui_version from uniqdata_hive limit 10; > > output: > Data successfully loaded into the table: > validation Query: > select count(*) from uniqdata_hive_partition > output: > count(1) | > +-----------+--+ > | 10 | > +-----------+–+ > > b) load into carbon partitioned table: > insert overwrite table uniqdata_string partition(active_emui_version) select CUST_ID, CUST_NAME,DOB,doj, bigint_column1, bigint_column2, decimal_column1, decimal_column2,double_column1, double_column2,integer_column1,active_emui_version from uniqdata_hive limit 10; > > Expected Result: Data should be loaded successfully > Actual Result: > Error: org.apache.spark.SparkException: Job aborted. (state=,code=0) > > logs: > org.apache.spark.sql.AnalysisException: org.apache.hadoop.hive.ql.metadata.HiveException: partition spec is invalid; field active_emui_version does not exist or is empty; > at org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:98) > at org.apache.spark.sql.hive.HiveExternalCatalog.createPartitions(HiveExternalCatalog.scala:842) > at org.apache.spark.sql.catalyst.catalog.SessionCatalog.createPartitions(SessionCatalog.scala:679) > at org.apache.spark.sql.hive.CarbonSessionCatalog.createPartitions(CarbonSessionState.scala:155) > at org.apache.spark.sql.execution.command.AlterTableAddPartitionCommand.run(ddl.scala:361) > at org.apache.spark.sql.execution.datasources.DataSourceAnalysis$$anonfun$apply$1.org$apache$spark$sql$execution$datasources$DataSourceAnalysis$$anonfun$$refreshPartitionsCallback$1(DataSourceStrategy.scala:221) > at org.apache.spark.sql.execution.datasources.DataSourceAnalysis$$anonfun$apply$1$$anonfun$8.apply(DataSourceStrategy.scala:243) > at org.apache.spark.sql.execution.datasources.DataSourceAnalysis$$anonfun$apply$1$$anonfun$8.apply(DataSourceStrategy.scala:243) > at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply$mcV$sp(FileFormatWriter.scala:143) > at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:121) > at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:121) > at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:57) > at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:121) > at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:101) > at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:58) > at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:56) > at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:74) > at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) > at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) > at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135) > at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132) > at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113) > at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:87) > at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:87) > at org.apache.spark.sql.Dataset.<init>(Dataset.scala:185) > at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:64) > at org.apache.spark.sql.execution.command.management.CarbonLoadDataCommand.overwritePartition(CarbonLoadDataCommand.scala:830) > at org.apache.spark.sql.execution.command.management.CarbonLoadDataCommand.loadDataWithPartition(CarbonLoadDataCommand.scala:635) > at org.apache.spark.sql.execution.command.management.CarbonLoadDataCommand.loadData(CarbonLoadDataCommand.scala:447) > at org.apache.spark.sql.execution.command.management.CarbonLoadDataCommand.processData(CarbonLoadDataCommand.scala:230) > at org.apache.spark.sql.execution.command.DataCommand.run(package.scala:71) > at org.apache.spark.sql.execution.command.management.CarbonInsertIntoCommand.processData(CarbonInsertIntoCommand.scala:48) > at org.apache.spark.sql.execution.command.DataCommand.run(package.scala:71) > at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:58) > at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:56) > at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:74) > at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) > at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114) > at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135) > at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132) > at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113) > at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:87) > at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:87) > at org.apache.spark.sql.Dataset.<init>(Dataset.scala:185) > at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:64) > at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:592) > at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:699) > at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.org$apache$spark$sql$hive$thriftserver$SparkExecuteStatementOperation$$execute(SparkExecuteStatementOperation.scala:220) > at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$1$$anon$2.run(SparkExecuteStatementOperation.scala:163) > at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$1$$anon$2.run(SparkExecuteStatementOperation.scala:160) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1698) > at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$1.run(SparkExecuteStatementOperation.scala:173) > at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) > at java.util.concurrent.FutureTask.run(FutureTask.java:266) > at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > -- This message was sent by Atlassian JIRA (v7.6.3#76005) |
Free forum by Nabble | Edit this page |