[GitHub] [carbondata] nihal0107 commented on a change in pull request #3865: [CARBONDATA-3928] Handled the Strings which length is greater than 32000 as a bad record.

classic Classic list List threaded Threaded
1 message Options
Reply | Threaded
Open this post in threaded view
|

[GitHub] [carbondata] nihal0107 commented on a change in pull request #3865: [CARBONDATA-3928] Handled the Strings which length is greater than 32000 as a bad record.

GitBox

nihal0107 commented on a change in pull request #3865:
URL: https://github.com/apache/carbondata/pull/3865#discussion_r474419263



##########
File path: integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala
##########
@@ -145,47 +152,153 @@ class TestLoadDataGeneral extends QueryTest with BeforeAndAfterEach {
     sql("drop table if exists carbon_table")
   }
 
-  test("test insert / update with data more than 32000 characters") {
+  private def createTableAndLoadData (badRecordAction: String): Unit = {
+    BadRecordUtil.cleanBadRecordPath("default", "longerthan32kchar")
+    sql("CREATE TABLE longerthan32kchar(dim1 String, dim2 String, mes1 int) STORED AS carbondata")
+    sql(s"LOAD DATA LOCAL INPATH '$testdata' into table longerThan32kChar OPTIONS('FILEHEADER'='dim1,dim2,mes1', " +
+      s"'BAD_RECORDS_ACTION'='${badRecordAction}','BAD_RECORDS_LOGGER_ENABLE'='TRUE')")
+  }
+
+  test("test load / insert / update with data more than 32000 characters and bad record action as Redirect") {
+    createTableAndLoadData("REDIRECT")
+    var redirectCsvPath = BadRecordUtil
+      .getRedirectCsvPath("default", "longerthan32kchar", "0", "0")
+    assert(BadRecordUtil.checkRedirectedCsvContentAvailableInSource(testdata, redirectCsvPath))
+    CarbonProperties.getInstance()
+      .addProperty(CarbonCommonConstants.CARBON_ENABLE_BAD_RECORD_HANDLING_FOR_INSERT, "true")
+    CarbonProperties.getInstance()
+      .addProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION, "REDIRECT");
+    sql(s"insert into longerthan32kchar values('33000', '$longChar', 4)")
+    checkAnswer(sql("select * from longerthan32kchar"), Seq(Row("ok", "hi", 1), Row("itsok", "hello", 2)))
+    redirectCsvPath = BadRecordUtil.getRedirectCsvPath("default", "longerthan32kchar", "1", "0")
+    var redirectedFileLineList = FileUtils.readLines(redirectCsvPath)
+    var iterator = redirectedFileLineList.iterator()
+    while (iterator.hasNext) {
+      assert(iterator.next().equals("33000,"+longChar+",4"))
+    }
+
+    // Update strings of length greater than 32000
+    sql(s"update longerthan32kchar set(longerthan32kchar.dim2)=('$longChar') " +
+      "where longerthan32kchar.mes1=1").show()
+    checkAnswer(sql("select * from longerthan32kchar"), Seq(Row("itsok", "hello", 2)))
+    redirectCsvPath = BadRecordUtil.getRedirectCsvPath("default", "longerthan32kchar", "0", "1")
+    redirectedFileLineList = FileUtils.readLines(redirectCsvPath)
+    iterator = redirectedFileLineList.iterator()
+    while (iterator.hasNext) {
+      assert(iterator.next().equals("ok,"+longChar+",1"))
+    }
+    CarbonProperties.getInstance()
+      .addProperty(CarbonCommonConstants.CARBON_ENABLE_BAD_RECORD_HANDLING_FOR_INSERT, "false")
+
+    // Insert longer string without converter step will throw exception
+    intercept[Exception] {

Review comment:
       we can't check here because this is system generated exception. Not user formated exception.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[hidden email]