[GitHub] [carbondata] nihal0107 commented on a change in pull request #3865: [CARBONDATA-3928] Handled the Strings which length is greater than 32000 as a bad record.

classic Classic list List threaded Threaded
1 message Options
Reply | Threaded
Open this post in threaded view
|

[GitHub] [carbondata] nihal0107 commented on a change in pull request #3865: [CARBONDATA-3928] Handled the Strings which length is greater than 32000 as a bad record.

GitBox

nihal0107 commented on a change in pull request #3865:
URL: https://github.com/apache/carbondata/pull/3865#discussion_r471231039



##########
File path: integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala
##########
@@ -154,13 +153,44 @@ class TestLoadDataGeneral extends QueryTest with BeforeAndAfterEach {
     sql("CREATE TABLE load32000chardata(dim1 String, dim2 String, mes1 int) STORED AS carbondata")
     sql("CREATE TABLE load32000chardata_dup(dim1 String, dim2 String, mes1 int) STORED AS carbondata")
     sql(s"LOAD DATA LOCAL INPATH '$testdata' into table load32000chardata OPTIONS('FILEHEADER'='dim1,dim2,mes1')")
-    intercept[Exception] {
-      sql("insert into load32000chardata_dup select dim1,concat(load32000chardata.dim2,'aaaa'),mes1 from load32000chardata").show()
-    }
+    checkAnswer(sql("select count(*) from load32000chardata"), Seq(Row(3)))
+    // String whilch length greater than 32000 will be considered as bad record and will be inserted as null in table
+    sql("insert into load32000chardata_dup select dim1,concat(load32000chardata.dim2,'aaaa'),mes1 from load32000chardata").show()
+    checkAnswer(sql("select count(*) from load32000chardata_dup"), Seq(Row(3)))
+    checkAnswer(sql("select * from load32000chardata_dup where mes1=3"), Seq(Row("32000", null, 3)))
     sql(s"LOAD DATA LOCAL INPATH '$testdata' into table load32000chardata_dup OPTIONS('FILEHEADER'='dim1,dim2,mes1')")
+    checkAnswer(sql("select count(*) from load32000chardata_dup"), Seq(Row(6)))
+    // Update strings of length greater than 32000 will invalidate the whole row.
+    sql("update load32000chardata_dup set(load32000chardata_dup.dim2)=(select concat(load32000chardata.dim2,'aaaa') " +
+      "from load32000chardata where load32000chardata.mes1=3) where load32000chardata_dup.mes1=3").show()
+    checkAnswer(sql("select count(*) from load32000chardata_dup"), Seq(Row(6)))
+    checkAnswer(sql("select * from load32000chardata_dup where mes1=3"), Seq(Row("32000", null, 3), Row("32000", null, 3)))
+
+    val longChar: String = RandomStringUtils.randomAlphabetic(33000)
+    // BAD_RECORD_ACTION = "REDIRECT"
+    CarbonProperties.getInstance()
+        .addProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION, "REDIRECT");
+    sql(s"insert into load32000chardata_dup values('32000', '$longChar', 3)")
+    checkAnswer(sql("select count(*) from load32000chardata_dup"), Seq(Row(6)))
+    checkAnswer(sql("select * from load32000chardata_dup where mes1=3"), Seq(Row("32000", null, 3), Row("32000", null, 3)))
+
+    // BAD_RECORD_ACTION = "IGNORE"
+    CarbonProperties.getInstance()
+      .addProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION, "IGNORE");
+    sql(s"insert into load32000chardata_dup values('32000', '$longChar', 3)")
+    checkAnswer(sql("select count(*) from load32000chardata_dup"), Seq(Row(6)))
+    checkAnswer(sql("select * from load32000chardata_dup where mes1=3"), Seq(Row("32000", null, 3), Row("32000", null, 3)))
+
+    // BAD_RECORD_ACTION = "FAIL"
+    CarbonProperties.getInstance()
+      .addProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION, "FAIL");
     intercept[Exception] {
-      sql("update load32000chardata_dup set(load32000chardata_dup.dim2)=(select concat(load32000chardata.dim2,'aaaa') from load32000chardata)").show()
+      sql(s"insert into load32000chardata_dup values('32000', '$longChar', 3)")
     }
+    checkAnswer(sql("select count(*) from load32000chardata_dup"), Seq(Row(6)))
+    checkAnswer(sql("select * from load32000chardata_dup where mes1=3"), Seq(Row("32000", null, 3), Row("32000", null, 3)))
+    CarbonProperties.getInstance()

Review comment:
       Done




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[hidden email]