[GitHub] [carbondata] nihal0107 commented on a change in pull request #3865: [CARBONDATA-3928] Handled the Strings which length is greater than 32000 as a bad record.

classic Classic list List threaded Threaded
1 message Options
Reply | Threaded
Open this post in threaded view
|

[GitHub] [carbondata] nihal0107 commented on a change in pull request #3865: [CARBONDATA-3928] Handled the Strings which length is greater than 32000 as a bad record.

GitBox

nihal0107 commented on a change in pull request #3865:
URL: https://github.com/apache/carbondata/pull/3865#discussion_r464908207



##########
File path: integration/spark/src/main/scala/org/apache/carbondata/spark/util/CarbonScalaUtil.scala
##########
@@ -75,11 +75,10 @@ object CarbonScalaUtil {
         carbonLoadModel.getBinaryDecoder)
     } catch {
       case e: Exception =>
-        if (e.getMessage.startsWith(FieldConverter.stringLengthExceedErrorMsg)) {
-          val msg = s"Column ${carbonLoadModel.getCarbonDataLoadSchema.getCarbonTable
-            .getCreateOrderColumn.get(idx).getColName} is too long," +
-            s" consider to use 'long_string_columns' table property."
-          LOGGER.error(msg, e)
+        if (e.getMessage.startsWith(CarbonCommonConstants.STRING_LENGTH_EXCEEDED_MESSAGE)) {
+          val msg = CarbonCommonConstants.STRING_LENGTH_EXCEEDED_MESSAGE.format(row,
+              carbonLoadModel.getCarbonDataLoadSchema.getCarbonTable.getCreateOrderColumn
+                .get(idx).getColName)

Review comment:
       done.

##########
File path: integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala
##########
@@ -170,21 +175,22 @@ class TestLoadDataGeneral extends QueryTest with BeforeAndAfterEach {
     sql("drop table if exists load32000bytes")
     sql("create table load32000bytes(name string) STORED AS carbondata")
     sql("insert into table load32000bytes select 'aaa'")
+    checkAnswer(sql("select count(*) from load32000bytes"), Seq(Row(1)))
 
-    assert(intercept[Exception] {
-      sql(s"load data local inpath '$testdata' into table load32000bytes OPTIONS ('FILEHEADER'='name')")
-    }.getMessage.contains("DataLoad failure: Dataload failed, String size cannot exceed 32000 bytes"))
+    // Below load will be inserted as null because Strings greater than 32000 is bad record.
+    sql(s"load data local inpath '$testdata' into table load32000bytes OPTIONS ('FILEHEADER'='name')")
+    checkAnswer(sql("select count(*) from load32000bytes"), Seq(Row(2)))
+    checkAnswer(sql("select * from load32000bytes"), Seq(Row("aaa"), Row(null)))
 
     val source = scala.io.Source.fromFile(testdata, CarbonCommonConstants.DEFAULT_CHARSET)
     val data = source.mkString
 
+    // Insert will throw exception as it is without converter step.
     intercept[Exception] {
       sql(s"insert into load32000bytes values('$data')")
     }
 
-    intercept[Exception] {
-      sql(s"update load32000bytes set(name)= ('$data')").show()
-    }
+    sql(s"update load32000bytes set(name)= ('$data')").show()

Review comment:
       done.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[hidden email]