[ https://issues.apache.org/jira/browse/CARBONDATA-1541?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] xubo245 updated CARBONDATA-1541: -------------------------------- Description: There are some errors when bad_records_action is IGNORE {code:java} 17/10/09 01:20:31 ERROR CarbonRowDataWriterProcessorStepImpl: [Executor task launch worker-0][partitionID:default_int_table_2ade496b-a9e8-4e7c-82bd-fb21c2e590eb] Failed for table: int_table in DataWriterProcessorStepImpl org.apache.carbondata.processing.newflow.exception.CarbonDataLoadingException: unable to generate the mdkey at org.apache.carbondata.processing.newflow.steps.CarbonRowDataWriterProcessorStepImpl.processBatch(CarbonRowDataWriterProcessorStepImpl.java:276) at org.apache.carbondata.processing.newflow.steps.CarbonRowDataWriterProcessorStepImpl.doExecute(CarbonRowDataWriterProcessorStepImpl.java:162) at org.apache.carbondata.processing.newflow.steps.CarbonRowDataWriterProcessorStepImpl.execute(CarbonRowDataWriterProcessorStepImpl.java:123) at org.apache.carbondata.processing.newflow.DataLoadExecutor.execute(DataLoadExecutor.java:51) at org.apache.carbondata.spark.rdd.NewCarbonDataLoadRDD$$anon$1.<init>(NewCarbonDataLoadRDD.scala:254) at org.apache.carbondata.spark.rdd.NewCarbonDataLoadRDD.internalCompute(NewCarbonDataLoadRDD.scala:229) at org.apache.carbondata.spark.rdd.CarbonRDD.compute(CarbonRDD.scala:62) {code} 1. When table only have one column and the column data is INT, there is an error: code: {code:java} test("Loading table: int, bad_records_action is IGNORE") { val fileLocation = s"$rootPath/integration/spark-common-test/src/test/resources/badrecords/intTest.csv" sql("drop table if exists int_table") sql("CREATE TABLE if not exists int_table(intField INT) STORED BY 'carbondata'") sql( s""" | LOAD DATA LOCAL INPATH '$fileLocation' | INTO TABLE int_table | OPTIONS('FILEHEADER' = 'intField','bad_records_logger_enable'='true','bad_records_action'='IGNORE') """.stripMargin) sql("select * from int_table").show() checkAnswer(sql("select * from int_table where intField = 1"), Seq(Row(1), Row(1))) sql("drop table if exists int_table") } {code} 2. when sort_columns is null, there is an error : {code:java} test("sort_columns is null, error") { sql("drop table if exists sales") sql( """CREATE TABLE IF NOT EXISTS sales(ID BigInt, date Timestamp, country String, actual_price Double, Quantity int, sold_price Decimal(19,2)) STORED BY 'carbondata' TBLPROPERTIES('sort_columns'='')""") CarbonProperties.getInstance() .addProperty(CarbonCommonConstants.CARBON_BADRECORDS_LOC, new File("./target/test/badRecords") .getCanonicalPath) CarbonProperties.getInstance() .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy/MM/dd") var csvFilePath = s"$resourcesPath/badrecords/datasample.csv" sql("LOAD DATA local inpath '" + csvFilePath + "' INTO TABLE sales OPTIONS" + "('bad_records_logger_enable'='true','bad_records_action'='redirect', 'DELIMITER'=" + " ',', 'QUOTECHAR'= '\"')"); checkAnswer( sql("select count(*) from sales"), Seq(Row(2) ) ) } {code} The test code has been pushed into https://github.com/xubo245/carbondata/tree/badRecordAction {code:java} org.apache.carbondata.integration.spark.testsuite.dataload.LoadDataWithBadRecords {code} was: There are some errors when bad_records_action is IGNORE {code:java} 17/10/09 01:20:31 ERROR CarbonRowDataWriterProcessorStepImpl: [Executor task launch worker-0][partitionID:default_int_table_2ade496b-a9e8-4e7c-82bd-fb21c2e590eb] Failed for table: int_table in DataWriterProcessorStepImpl org.apache.carbondata.processing.newflow.exception.CarbonDataLoadingException: unable to generate the mdkey at org.apache.carbondata.processing.newflow.steps.CarbonRowDataWriterProcessorStepImpl.processBatch(CarbonRowDataWriterProcessorStepImpl.java:276) at org.apache.carbondata.processing.newflow.steps.CarbonRowDataWriterProcessorStepImpl.doExecute(CarbonRowDataWriterProcessorStepImpl.java:162) at org.apache.carbondata.processing.newflow.steps.CarbonRowDataWriterProcessorStepImpl.execute(CarbonRowDataWriterProcessorStepImpl.java:123) at org.apache.carbondata.processing.newflow.DataLoadExecutor.execute(DataLoadExecutor.java:51) at org.apache.carbondata.spark.rdd.NewCarbonDataLoadRDD$$anon$1.<init>(NewCarbonDataLoadRDD.scala:254) at org.apache.carbondata.spark.rdd.NewCarbonDataLoadRDD.internalCompute(NewCarbonDataLoadRDD.scala:229) at org.apache.carbondata.spark.rdd.CarbonRDD.compute(CarbonRDD.scala:62) {code} 1. When table only have one column and the column data is INT, there is an error: code: {code:java} test("Loading table: int, bad_records_action is IGNORE") { val fileLocation = s"$rootPath/integration/spark-common-test/src/test/resources/badrecords/intTest.csv" sql("drop table if exists int_table") sql("CREATE TABLE if not exists int_table(intField INT) STORED BY 'carbondata'") sql( s""" | LOAD DATA LOCAL INPATH '$fileLocation' | INTO TABLE int_table | OPTIONS('FILEHEADER' = 'intField','bad_records_logger_enable'='true','bad_records_action'='IGNORE') """.stripMargin) sql("select * from int_table").show() checkAnswer(sql("select * from int_table where intField = 1"), Seq(Row(1), Row(1))) sql("drop table if exists int_table") } {code} 2. when sort_columns is null, there is an error : {code:java} test("sort_columns is null, error") { sql("drop table if exists sales") sql( """CREATE TABLE IF NOT EXISTS sales(ID BigInt, date Timestamp, country String, actual_price Double, Quantity int, sold_price Decimal(19,2)) STORED BY 'carbondata' TBLPROPERTIES('sort_columns'='')""") CarbonProperties.getInstance() .addProperty(CarbonCommonConstants.CARBON_BADRECORDS_LOC, new File("./target/test/badRecords") .getCanonicalPath) CarbonProperties.getInstance() .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy/MM/dd") var csvFilePath = s"$resourcesPath/badrecords/datasample.csv" sql("LOAD DATA local inpath '" + csvFilePath + "' INTO TABLE sales OPTIONS" + "('bad_records_logger_enable'='true','bad_records_action'='redirect', 'DELIMITER'=" + " ',', 'QUOTECHAR'= '\"')"); checkAnswer( sql("select count(*) from sales"), Seq(Row(2) ) ) } {code} > There are some errors when bad_records_action is IGNORE > ------------------------------------------------------- > > Key: CARBONDATA-1541 > URL: https://issues.apache.org/jira/browse/CARBONDATA-1541 > Project: CarbonData > Issue Type: Bug > Components: data-load > Affects Versions: 1.1.1 > Reporter: xubo245 > Priority: Minor > Original Estimate: 240h > Remaining Estimate: 240h > > There are some errors when bad_records_action is IGNORE > {code:java} > 17/10/09 01:20:31 ERROR CarbonRowDataWriterProcessorStepImpl: [Executor task launch worker-0][partitionID:default_int_table_2ade496b-a9e8-4e7c-82bd-fb21c2e590eb] Failed for table: int_table in DataWriterProcessorStepImpl > org.apache.carbondata.processing.newflow.exception.CarbonDataLoadingException: unable to generate the mdkey > at org.apache.carbondata.processing.newflow.steps.CarbonRowDataWriterProcessorStepImpl.processBatch(CarbonRowDataWriterProcessorStepImpl.java:276) > at org.apache.carbondata.processing.newflow.steps.CarbonRowDataWriterProcessorStepImpl.doExecute(CarbonRowDataWriterProcessorStepImpl.java:162) > at org.apache.carbondata.processing.newflow.steps.CarbonRowDataWriterProcessorStepImpl.execute(CarbonRowDataWriterProcessorStepImpl.java:123) > at org.apache.carbondata.processing.newflow.DataLoadExecutor.execute(DataLoadExecutor.java:51) > at org.apache.carbondata.spark.rdd.NewCarbonDataLoadRDD$$anon$1.<init>(NewCarbonDataLoadRDD.scala:254) > at org.apache.carbondata.spark.rdd.NewCarbonDataLoadRDD.internalCompute(NewCarbonDataLoadRDD.scala:229) > at org.apache.carbondata.spark.rdd.CarbonRDD.compute(CarbonRDD.scala:62) > {code} > > 1. When table only have one column and the column data is INT, there is an error: > code: > {code:java} > test("Loading table: int, bad_records_action is IGNORE") { > val fileLocation = s"$rootPath/integration/spark-common-test/src/test/resources/badrecords/intTest.csv" > sql("drop table if exists int_table") > sql("CREATE TABLE if not exists int_table(intField INT) STORED BY 'carbondata'") > sql( > s""" > | LOAD DATA LOCAL INPATH '$fileLocation' > | INTO TABLE int_table > | OPTIONS('FILEHEADER' = 'intField','bad_records_logger_enable'='true','bad_records_action'='IGNORE') > """.stripMargin) > sql("select * from int_table").show() > checkAnswer(sql("select * from int_table where intField = 1"), > Seq(Row(1), Row(1))) > sql("drop table if exists int_table") > } > {code} > 2. when sort_columns is null, there is an error : > {code:java} > test("sort_columns is null, error") { > sql("drop table if exists sales") > sql( > """CREATE TABLE IF NOT EXISTS sales(ID BigInt, date Timestamp, country String, > actual_price Double, Quantity int, sold_price Decimal(19,2)) > STORED BY 'carbondata' > TBLPROPERTIES('sort_columns'='')""") > CarbonProperties.getInstance() > .addProperty(CarbonCommonConstants.CARBON_BADRECORDS_LOC, > new File("./target/test/badRecords") > .getCanonicalPath) > CarbonProperties.getInstance() > .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy/MM/dd") > var csvFilePath = s"$resourcesPath/badrecords/datasample.csv" > sql("LOAD DATA local inpath '" + csvFilePath + "' INTO TABLE sales OPTIONS" > + > "('bad_records_logger_enable'='true','bad_records_action'='redirect', 'DELIMITER'=" + > " ',', 'QUOTECHAR'= '\"')"); > checkAnswer( > sql("select count(*) from sales"), > Seq(Row(2) > ) > ) > } > {code} > The test code has been pushed into https://github.com/xubo245/carbondata/tree/badRecordAction > {code:java} > org.apache.carbondata.integration.spark.testsuite.dataload.LoadDataWithBadRecords > {code} -- This message was sent by Atlassian JIRA (v6.4.14#64029) |
Free forum by Nabble | Edit this page |