[GitHub] [carbondata] KanakaKumar commented on a change in pull request #3181: [CARBONDATA-3356] Support decimal for json schema and Provide better exception for users to solve problem when carbonData DataSource read SDK files with varchar

classic Classic list List threaded Threaded
1 message Options
Reply | Threaded
Open this post in threaded view
|

[GitHub] [carbondata] KanakaKumar commented on a change in pull request #3181: [CARBONDATA-3356] Support decimal for json schema and Provide better exception for users to solve problem when carbonData DataSource read SDK files with varchar

GitBox
KanakaKumar commented on a change in pull request #3181: [CARBONDATA-3356]  Support decimal for json schema and Provide better exception for users to solve problem  when carbonData DataSource read SDK files with varchar
URL: https://github.com/apache/carbondata/pull/3181#discussion_r292781692
 
 

 ##########
 File path: integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala
 ##########
 @@ -38,6 +39,84 @@ import org.apache.carbondata.sdk.file.{CarbonWriter, Field, Schema}
 
 class SparkCarbonDataSourceTest extends FunSuite with BeforeAndAfterAll {
 
+
+  var writerPath = new File(this.getClass.getResource("/").getPath
+          + "../../target/SparkCarbonFileFormat/SDKWriterOutput/").getCanonicalPath
+  //getCanonicalPath gives path with \, but the code expects /.
+  writerPath = writerPath.replace("\\", "/")
+
+  def buildTestData(rows: Int,
+                    sortColumns: List[String]): Any = {
+    val schema = new StringBuilder()
+            .append("[ \n")
+            .append("   {\"stringField\":\"string\"},\n")
+            .append("   {\"byteField\":\"byte\"},\n")
+            .append("   {\"shortField\":\"short\"},\n")
+            .append("   {\"intField\":\"int\"},\n")
+            .append("   {\"longField\":\"long\"},\n")
+            .append("   {\"doubleField\":\"double\"},\n")
+            .append("   {\"floatField\":\"float\"},\n")
+            .append("   {\"decimalField\":\"decimal(17,2)\"},\n")
+            .append("   {\"boolField\":\"boolean\"},\n")
+            .append("   {\"dateField\":\"DATE\"},\n")
+            .append("   {\"timeField\":\"TIMESTAMP\"},\n")
+            .append("   {\"varcharField\":\"varchar\"},\n")
+            .append("   {\"varcharField2\":\"varchar\"}\n")
+            .append("]")
+            .toString()
+
+    try {
+      val builder = CarbonWriter.builder()
+      val writer =
+        builder.outputPath(writerPath)
+                .sortBy(sortColumns.toArray)
+                .uniqueIdentifier(System.currentTimeMillis)
+                .withBlockSize(2)
+                .withCsvInput(Schema.parseJson(schema))
+                .writtenBy("TestNonTransactionalCarbonTable")
+                .build()
+      var i = 0
+      while (i < rows) {
+        writer.write(Array[String]("robot" + i,
+          String.valueOf(i / 100),
+          String.valueOf(i / 100),
+          String.valueOf(i),
+          String.valueOf(i),
+          String.valueOf(i),
+          String.valueOf(i),
+          String.valueOf(i),
+          "true",
+          "2019-03-02",
+          "2019-02-12 03:03:34",
+          "var1",
+          "var2"))
+        i += 1
+      }
+      writer.close()
+    } catch {
+      case ex: Throwable => throw new RuntimeException(ex)
+    }
+  }
+
+  test("Carbon DataSource read SDK data with varchar") {
+    import spark._
+    FileUtils.deleteDirectory(new File(writerPath))
+    val num = 10000
+    buildTestData(num, List("stringField", "intField"))
+    if (SparkUtil.isSparkVersionXandAbove("2.2")) {
+      sql("DROP TABLE IF EXISTS carbontable_varchar")
+      sql("DROP TABLE IF EXISTS carbontable_varchar2")
+      sql(s"CREATE TABLE carbontable_varchar USING CARBON LOCATION '$writerPath'")
+      val e = intercept[Exception] {
+        sql("SELECT COUNT(*) FROM carbontable_varchar").show()
+      }
+      assert(e.getMessage.contains("Datatype of the Column VARCHAR present in index file, is varchar and not same as datatype of the column with same name present in table, because carbon convert varchar of carbon to string of spark, please set long_string_columns for varchar column"))
 
 Review comment:
   Should we add this details as a Note in SDK guide ?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[hidden email]


With regards,
Apache Git Services