akashrn5 commented on a change in pull request #3865: URL: https://github.com/apache/carbondata/pull/3865#discussion_r471918176 ########## File path: integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala ########## @@ -145,47 +150,162 @@ class TestLoadDataGeneral extends QueryTest with BeforeAndAfterEach { sql("drop table if exists carbon_table") } - test("test insert / update with data more than 32000 characters") { + test("test load / insert / update with data more than 32000 characters and bad record action as Redirect") { + val testdata =s"$resourcesPath/MoreThan32KChar.csv" + FileFactory.deleteAllFilesOfDir(new File(CarbonProperties.getInstance() + .getProperty(CarbonCommonConstants.CARBON_BADRECORDS_LOC))) + sql("CREATE TABLE longerthan32kchar(dim1 String, dim2 String, mes1 int) STORED AS carbondata") + sql(s"LOAD DATA LOCAL INPATH '$testdata' into table longerThan32kChar OPTIONS('FILEHEADER'='dim1,dim2,mes1', " + + s"'BAD_RECORDS_ACTION'='REDIRECT','BAD_RECORDS_LOGGER_ENABLE'='TRUE')") + var redirectCsvPath = getRedirectCsvPath("default", "longerthan32kchar", "0", "0") + assert(checkRedirectedCsvContentAvailableInSource(testdata, redirectCsvPath)) + val longChar: String = RandomStringUtils.randomAlphabetic(33000) + CarbonProperties.getInstance() .addProperty(CarbonCommonConstants.CARBON_ENABLE_BAD_RECORD_HANDLING_FOR_INSERT, "true") - val testdata =s"$resourcesPath/32000char.csv" - sql("drop table if exists load32000chardata") - sql("drop table if exists load32000chardata_dup") - sql("CREATE TABLE load32000chardata(dim1 String, dim2 String, mes1 int) STORED AS carbondata") - sql("CREATE TABLE load32000chardata_dup(dim1 String, dim2 String, mes1 int) STORED AS carbondata") - sql(s"LOAD DATA LOCAL INPATH '$testdata' into table load32000chardata OPTIONS('FILEHEADER'='dim1,dim2,mes1')") + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION, "REDIRECT"); + sql(s"insert into longerthan32kchar values('33000', '$longChar', 4)") + checkAnswer(sql("select * from longerthan32kchar"), Seq(Row("ok", "hi", 1), Row("itsok", "hello", 2))) + redirectCsvPath = getRedirectCsvPath("default", "longerthan32kchar", "1", "0") + var redirectedFileLineList = FileUtils.readLines(redirectCsvPath) + var iterator = redirectedFileLineList.iterator() + while (iterator.hasNext) { + assert(iterator.next().equals("33000,"+longChar+",4")) + } + + // Update strings of length greater than 32000 + sql(s"update longerthan32kchar set(longerthan32kchar.dim2)=('$longChar') " + + "where longerthan32kchar.mes1=1").show() + checkAnswer(sql("select * from longerthan32kchar"), Seq(Row("itsok", "hello", 2))) + redirectCsvPath = getRedirectCsvPath("default", "longerthan32kchar", "0", "1") + redirectedFileLineList = FileUtils.readLines(redirectCsvPath) + iterator = redirectedFileLineList.iterator() + while (iterator.hasNext) { + assert(iterator.next().equals("ok,"+longChar+",1")) + } + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_ENABLE_BAD_RECORD_HANDLING_FOR_INSERT, "false") + + // Insert longer string without converter step will throw exception intercept[Exception] { - sql("insert into load32000chardata_dup select dim1,concat(load32000chardata.dim2,'aaaa'),mes1 from load32000chardata").show() + sql(s"insert into longerthan32kchar values('32000', '$longChar', 3)") } - sql(s"LOAD DATA LOCAL INPATH '$testdata' into table load32000chardata_dup OPTIONS('FILEHEADER'='dim1,dim2,mes1')") + + FileFactory.deleteAllFilesOfDir(new File(CarbonProperties.getInstance() + .getProperty(CarbonCommonConstants.CARBON_BADRECORDS_LOC))) + } + + test("test load / insert / update with data more than 32000 characters and bad record action as Force") { + val testdata =s"$resourcesPath/MoreThan32KChar.csv" + sql("CREATE TABLE longerthan32kchar(dim1 String, dim2 String, mes1 int) STORED AS carbondata") + sql(s"LOAD DATA LOCAL INPATH '$testdata' into table longerThan32kChar OPTIONS('FILEHEADER'='dim1,dim2,mes1', " + + s"'BAD_RECORDS_ACTION'='FORCE','BAD_RECORDS_LOGGER_ENABLE'='TRUE')") + checkAnswer(sql("select * from longerthan32kchar"), Seq(Row("ok", "hi", 1), Row("itsok", "hello", 2), Row("32123", null, 3))) Review comment: move `testdata`, create and load command to a method and pass the bad record action as parameter, as its a common code between the test cases, code will be clean. ########## File path: integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala ########## @@ -145,47 +150,162 @@ class TestLoadDataGeneral extends QueryTest with BeforeAndAfterEach { sql("drop table if exists carbon_table") } - test("test insert / update with data more than 32000 characters") { + test("test load / insert / update with data more than 32000 characters and bad record action as Redirect") { + val testdata =s"$resourcesPath/MoreThan32KChar.csv" + FileFactory.deleteAllFilesOfDir(new File(CarbonProperties.getInstance() + .getProperty(CarbonCommonConstants.CARBON_BADRECORDS_LOC))) + sql("CREATE TABLE longerthan32kchar(dim1 String, dim2 String, mes1 int) STORED AS carbondata") + sql(s"LOAD DATA LOCAL INPATH '$testdata' into table longerThan32kChar OPTIONS('FILEHEADER'='dim1,dim2,mes1', " + + s"'BAD_RECORDS_ACTION'='REDIRECT','BAD_RECORDS_LOGGER_ENABLE'='TRUE')") + var redirectCsvPath = getRedirectCsvPath("default", "longerthan32kchar", "0", "0") + assert(checkRedirectedCsvContentAvailableInSource(testdata, redirectCsvPath)) + val longChar: String = RandomStringUtils.randomAlphabetic(33000) + CarbonProperties.getInstance() .addProperty(CarbonCommonConstants.CARBON_ENABLE_BAD_RECORD_HANDLING_FOR_INSERT, "true") - val testdata =s"$resourcesPath/32000char.csv" - sql("drop table if exists load32000chardata") - sql("drop table if exists load32000chardata_dup") - sql("CREATE TABLE load32000chardata(dim1 String, dim2 String, mes1 int) STORED AS carbondata") - sql("CREATE TABLE load32000chardata_dup(dim1 String, dim2 String, mes1 int) STORED AS carbondata") - sql(s"LOAD DATA LOCAL INPATH '$testdata' into table load32000chardata OPTIONS('FILEHEADER'='dim1,dim2,mes1')") + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION, "REDIRECT"); + sql(s"insert into longerthan32kchar values('33000', '$longChar', 4)") + checkAnswer(sql("select * from longerthan32kchar"), Seq(Row("ok", "hi", 1), Row("itsok", "hello", 2))) + redirectCsvPath = getRedirectCsvPath("default", "longerthan32kchar", "1", "0") + var redirectedFileLineList = FileUtils.readLines(redirectCsvPath) + var iterator = redirectedFileLineList.iterator() + while (iterator.hasNext) { + assert(iterator.next().equals("33000,"+longChar+",4")) + } + + // Update strings of length greater than 32000 + sql(s"update longerthan32kchar set(longerthan32kchar.dim2)=('$longChar') " + + "where longerthan32kchar.mes1=1").show() + checkAnswer(sql("select * from longerthan32kchar"), Seq(Row("itsok", "hello", 2))) + redirectCsvPath = getRedirectCsvPath("default", "longerthan32kchar", "0", "1") + redirectedFileLineList = FileUtils.readLines(redirectCsvPath) + iterator = redirectedFileLineList.iterator() + while (iterator.hasNext) { + assert(iterator.next().equals("ok,"+longChar+",1")) + } + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_ENABLE_BAD_RECORD_HANDLING_FOR_INSERT, "false") + + // Insert longer string without converter step will throw exception intercept[Exception] { - sql("insert into load32000chardata_dup select dim1,concat(load32000chardata.dim2,'aaaa'),mes1 from load32000chardata").show() + sql(s"insert into longerthan32kchar values('32000', '$longChar', 3)") } - sql(s"LOAD DATA LOCAL INPATH '$testdata' into table load32000chardata_dup OPTIONS('FILEHEADER'='dim1,dim2,mes1')") + + FileFactory.deleteAllFilesOfDir(new File(CarbonProperties.getInstance() + .getProperty(CarbonCommonConstants.CARBON_BADRECORDS_LOC))) + } + + test("test load / insert / update with data more than 32000 characters and bad record action as Force") { + val testdata =s"$resourcesPath/MoreThan32KChar.csv" + sql("CREATE TABLE longerthan32kchar(dim1 String, dim2 String, mes1 int) STORED AS carbondata") + sql(s"LOAD DATA LOCAL INPATH '$testdata' into table longerThan32kChar OPTIONS('FILEHEADER'='dim1,dim2,mes1', " + + s"'BAD_RECORDS_ACTION'='FORCE','BAD_RECORDS_LOGGER_ENABLE'='TRUE')") + checkAnswer(sql("select * from longerthan32kchar"), Seq(Row("ok", "hi", 1), Row("itsok", "hello", 2), Row("32123", null, 3))) + val longChar: String = RandomStringUtils.randomAlphabetic(33000) + + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_ENABLE_BAD_RECORD_HANDLING_FOR_INSERT, "true") + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION, "FORCE"); + sql(s"insert into longerthan32kchar values('33000', '$longChar', 4)") + checkAnswer(sql("select * from longerthan32kchar"), + Seq(Row("ok", "hi", 1), Row("itsok", "hello", 2), Row("32123", null, 3), Row("33000", null, 4))) + + // Update strings of length greater than 32000 + sql(s"update longerthan32kchar set(longerthan32kchar.dim2)=('$longChar') " + + "where longerthan32kchar.mes1=1").show() + checkAnswer(sql("select * from longerthan32kchar"), + Seq(Row("ok", null, 1), Row("itsok", "hello", 2), Row("32123", null, 3), Row("33000", null, 4))) + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_ENABLE_BAD_RECORD_HANDLING_FOR_INSERT, "false") + + // Insert longer string without converter step will throw exception intercept[Exception] { - sql("update load32000chardata_dup set(load32000chardata_dup.dim2)=(select concat(load32000chardata.dim2,'aaaa') from load32000chardata)").show() + sql(s"insert into longerthan32kchar values('32000', '$longChar', 3)") } + } + + test("test load / insert / update with data more than 32000 characters and bad record action as Fail") { + val testdata =s"$resourcesPath/MoreThan32KChar.csv" + sql("CREATE TABLE longerthan32kchar(dim1 String, dim2 String, mes1 int) STORED AS carbondata") + intercept[Exception] { + sql(s"LOAD DATA LOCAL INPATH '$testdata' into table longerThan32kChar OPTIONS('FILEHEADER'='dim1,dim2,mes1', " + + s"'BAD_RECORDS_ACTION'='FAIL','BAD_RECORDS_LOGGER_ENABLE'='TRUE')") + } + val longChar: String = RandomStringUtils.randomAlphabetic(33000) + + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_ENABLE_BAD_RECORD_HANDLING_FOR_INSERT, "true") + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION, "FAIL"); + intercept[Exception] { + sql(s"insert into longerthan32kchar values('33000', '$longChar', 4)") + } + + // Update strings of length greater than 32000 + sql(s"insert into longerthan32kchar values('ok', 'hi', 1)") + intercept[Exception] { + sql(s"update longerthan32kchar set(longerthan32kchar.dim2)=('$longChar') " + + "where longerthan32kchar.mes1=1").show() + } + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_ENABLE_BAD_RECORD_HANDLING_FOR_INSERT, "false") + + // Insert longer string without converter step will throw exception + intercept[Exception] { Review comment: same as above ########## File path: integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala ########## @@ -145,47 +150,162 @@ class TestLoadDataGeneral extends QueryTest with BeforeAndAfterEach { sql("drop table if exists carbon_table") } - test("test insert / update with data more than 32000 characters") { + test("test load / insert / update with data more than 32000 characters and bad record action as Redirect") { + val testdata =s"$resourcesPath/MoreThan32KChar.csv" + FileFactory.deleteAllFilesOfDir(new File(CarbonProperties.getInstance() + .getProperty(CarbonCommonConstants.CARBON_BADRECORDS_LOC))) + sql("CREATE TABLE longerthan32kchar(dim1 String, dim2 String, mes1 int) STORED AS carbondata") + sql(s"LOAD DATA LOCAL INPATH '$testdata' into table longerThan32kChar OPTIONS('FILEHEADER'='dim1,dim2,mes1', " + + s"'BAD_RECORDS_ACTION'='REDIRECT','BAD_RECORDS_LOGGER_ENABLE'='TRUE')") + var redirectCsvPath = getRedirectCsvPath("default", "longerthan32kchar", "0", "0") + assert(checkRedirectedCsvContentAvailableInSource(testdata, redirectCsvPath)) + val longChar: String = RandomStringUtils.randomAlphabetic(33000) + CarbonProperties.getInstance() .addProperty(CarbonCommonConstants.CARBON_ENABLE_BAD_RECORD_HANDLING_FOR_INSERT, "true") - val testdata =s"$resourcesPath/32000char.csv" - sql("drop table if exists load32000chardata") - sql("drop table if exists load32000chardata_dup") - sql("CREATE TABLE load32000chardata(dim1 String, dim2 String, mes1 int) STORED AS carbondata") - sql("CREATE TABLE load32000chardata_dup(dim1 String, dim2 String, mes1 int) STORED AS carbondata") - sql(s"LOAD DATA LOCAL INPATH '$testdata' into table load32000chardata OPTIONS('FILEHEADER'='dim1,dim2,mes1')") + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION, "REDIRECT"); + sql(s"insert into longerthan32kchar values('33000', '$longChar', 4)") + checkAnswer(sql("select * from longerthan32kchar"), Seq(Row("ok", "hi", 1), Row("itsok", "hello", 2))) + redirectCsvPath = getRedirectCsvPath("default", "longerthan32kchar", "1", "0") + var redirectedFileLineList = FileUtils.readLines(redirectCsvPath) + var iterator = redirectedFileLineList.iterator() + while (iterator.hasNext) { + assert(iterator.next().equals("33000,"+longChar+",4")) + } + + // Update strings of length greater than 32000 + sql(s"update longerthan32kchar set(longerthan32kchar.dim2)=('$longChar') " + + "where longerthan32kchar.mes1=1").show() + checkAnswer(sql("select * from longerthan32kchar"), Seq(Row("itsok", "hello", 2))) + redirectCsvPath = getRedirectCsvPath("default", "longerthan32kchar", "0", "1") + redirectedFileLineList = FileUtils.readLines(redirectCsvPath) + iterator = redirectedFileLineList.iterator() + while (iterator.hasNext) { + assert(iterator.next().equals("ok,"+longChar+",1")) + } + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_ENABLE_BAD_RECORD_HANDLING_FOR_INSERT, "false") + + // Insert longer string without converter step will throw exception intercept[Exception] { - sql("insert into load32000chardata_dup select dim1,concat(load32000chardata.dim2,'aaaa'),mes1 from load32000chardata").show() + sql(s"insert into longerthan32kchar values('32000', '$longChar', 3)") } - sql(s"LOAD DATA LOCAL INPATH '$testdata' into table load32000chardata_dup OPTIONS('FILEHEADER'='dim1,dim2,mes1')") + + FileFactory.deleteAllFilesOfDir(new File(CarbonProperties.getInstance() + .getProperty(CarbonCommonConstants.CARBON_BADRECORDS_LOC))) + } + + test("test load / insert / update with data more than 32000 characters and bad record action as Force") { + val testdata =s"$resourcesPath/MoreThan32KChar.csv" + sql("CREATE TABLE longerthan32kchar(dim1 String, dim2 String, mes1 int) STORED AS carbondata") + sql(s"LOAD DATA LOCAL INPATH '$testdata' into table longerThan32kChar OPTIONS('FILEHEADER'='dim1,dim2,mes1', " + + s"'BAD_RECORDS_ACTION'='FORCE','BAD_RECORDS_LOGGER_ENABLE'='TRUE')") + checkAnswer(sql("select * from longerthan32kchar"), Seq(Row("ok", "hi", 1), Row("itsok", "hello", 2), Row("32123", null, 3))) + val longChar: String = RandomStringUtils.randomAlphabetic(33000) + + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_ENABLE_BAD_RECORD_HANDLING_FOR_INSERT, "true") + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION, "FORCE"); + sql(s"insert into longerthan32kchar values('33000', '$longChar', 4)") + checkAnswer(sql("select * from longerthan32kchar"), + Seq(Row("ok", "hi", 1), Row("itsok", "hello", 2), Row("32123", null, 3), Row("33000", null, 4))) + + // Update strings of length greater than 32000 + sql(s"update longerthan32kchar set(longerthan32kchar.dim2)=('$longChar') " + + "where longerthan32kchar.mes1=1").show() + checkAnswer(sql("select * from longerthan32kchar"), + Seq(Row("ok", null, 1), Row("itsok", "hello", 2), Row("32123", null, 3), Row("33000", null, 4))) + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_ENABLE_BAD_RECORD_HANDLING_FOR_INSERT, "false") + + // Insert longer string without converter step will throw exception intercept[Exception] { Review comment: assert for exception message ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/test/util/QueryTest.scala ########## @@ -207,6 +208,45 @@ class QueryTest extends PlanTest { } } } + + def getRedirectCsvPath(dbName: String, Review comment: may be add all these bad record methods to a scala object specific to bad record util functions and use in all places ########## File path: integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala ########## @@ -145,47 +150,162 @@ class TestLoadDataGeneral extends QueryTest with BeforeAndAfterEach { sql("drop table if exists carbon_table") } - test("test insert / update with data more than 32000 characters") { + test("test load / insert / update with data more than 32000 characters and bad record action as Redirect") { + val testdata =s"$resourcesPath/MoreThan32KChar.csv" + FileFactory.deleteAllFilesOfDir(new File(CarbonProperties.getInstance() + .getProperty(CarbonCommonConstants.CARBON_BADRECORDS_LOC))) + sql("CREATE TABLE longerthan32kchar(dim1 String, dim2 String, mes1 int) STORED AS carbondata") + sql(s"LOAD DATA LOCAL INPATH '$testdata' into table longerThan32kChar OPTIONS('FILEHEADER'='dim1,dim2,mes1', " + + s"'BAD_RECORDS_ACTION'='REDIRECT','BAD_RECORDS_LOGGER_ENABLE'='TRUE')") + var redirectCsvPath = getRedirectCsvPath("default", "longerthan32kchar", "0", "0") + assert(checkRedirectedCsvContentAvailableInSource(testdata, redirectCsvPath)) + val longChar: String = RandomStringUtils.randomAlphabetic(33000) + CarbonProperties.getInstance() .addProperty(CarbonCommonConstants.CARBON_ENABLE_BAD_RECORD_HANDLING_FOR_INSERT, "true") - val testdata =s"$resourcesPath/32000char.csv" - sql("drop table if exists load32000chardata") - sql("drop table if exists load32000chardata_dup") - sql("CREATE TABLE load32000chardata(dim1 String, dim2 String, mes1 int) STORED AS carbondata") - sql("CREATE TABLE load32000chardata_dup(dim1 String, dim2 String, mes1 int) STORED AS carbondata") - sql(s"LOAD DATA LOCAL INPATH '$testdata' into table load32000chardata OPTIONS('FILEHEADER'='dim1,dim2,mes1')") + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION, "REDIRECT"); + sql(s"insert into longerthan32kchar values('33000', '$longChar', 4)") + checkAnswer(sql("select * from longerthan32kchar"), Seq(Row("ok", "hi", 1), Row("itsok", "hello", 2))) + redirectCsvPath = getRedirectCsvPath("default", "longerthan32kchar", "1", "0") + var redirectedFileLineList = FileUtils.readLines(redirectCsvPath) + var iterator = redirectedFileLineList.iterator() + while (iterator.hasNext) { + assert(iterator.next().equals("33000,"+longChar+",4")) + } + + // Update strings of length greater than 32000 + sql(s"update longerthan32kchar set(longerthan32kchar.dim2)=('$longChar') " + + "where longerthan32kchar.mes1=1").show() + checkAnswer(sql("select * from longerthan32kchar"), Seq(Row("itsok", "hello", 2))) + redirectCsvPath = getRedirectCsvPath("default", "longerthan32kchar", "0", "1") + redirectedFileLineList = FileUtils.readLines(redirectCsvPath) + iterator = redirectedFileLineList.iterator() + while (iterator.hasNext) { + assert(iterator.next().equals("ok,"+longChar+",1")) + } + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_ENABLE_BAD_RECORD_HANDLING_FOR_INSERT, "false") + + // Insert longer string without converter step will throw exception intercept[Exception] { Review comment: please add an assert to check the exception message, as you are formatting the exception message. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
Free forum by Nabble | Edit this page |