[GitHub] [carbondata] KanakaKumar commented on a change in pull request #3188: [CARBONDATA-3358] Support configurable decode for loading binary data, support base64 and Hex decode.

classic Classic list List threaded Threaded
1 message Options
Reply | Threaded
Open this post in threaded view
|

[GitHub] [carbondata] KanakaKumar commented on a change in pull request #3188: [CARBONDATA-3358] Support configurable decode for loading binary data, support base64 and Hex decode.

GitBox

KanakaKumar commented on a change in pull request #3188:
URL: https://github.com/apache/carbondata/pull/3188#discussion_r283660030



##########
File path: integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/binary/TestBinaryDataType.scala
##########
@@ -1146,6 +1142,188 @@ class TestBinaryDataType extends QueryTest with BeforeAndAfterAll {
         }
     }
 
+    test("Create table and load data with binary column for hive: test encode with base64 and streaming = true") {
+        sql("DROP TABLE IF EXISTS hivetable")
+        sql("DROP TABLE IF EXISTS carbontable")
+        sql(
+            s"""
+               | CREATE TABLE IF NOT EXISTS hivetable (
+               |    id int,
+               |    label boolean,
+               |    name string,
+               |    binaryField binary,
+               |    autoLabel boolean)
+               | row format delimited fields terminated by ','
+             """.stripMargin)
+        sql(
+            s"""
+               | LOAD DATA LOCAL INPATH '$resourcesPath/binaryDataBase64.csv'
+               | INTO TABLE hivetable
+             """.stripMargin)
+
+        sql(
+            s"""
+               | CREATE TABLE IF NOT EXISTS carbontable (
+               |    id int,
+               |    label boolean,
+               |    name string,
+               |    binaryField binary,
+               |    autoLabel boolean)
+               | STORED BY 'carbondata'
+               | tblproperties('streaming'='true')
+             """.stripMargin)
+        sql(
+            s"""
+               | LOAD DATA LOCAL INPATH '$resourcesPath/binaryDataBase64.csv'
+               | INTO TABLE carbontable
+               | OPTIONS('header'='false','DELIMITER'=',','binary_decoder'='baSe64')
+             """.stripMargin)
+
+        val hiveResult = sql("SELECT * FROM hivetable")
+        val carbonResult = sql("SELECT * FROM carbontable")
+        checkAnswer(hiveResult, carbonResult)
+
+        checkAnswer(sql("SELECT COUNT(*) FROM hivetable"), Seq(Row(3)))
+        try {
+            val carbonDF = carbonResult.collect()
+            assert(3 == carbonDF.length)
+            carbonDF.foreach { each =>
+                assert(5 == each.length)
+
+                assert(Integer.valueOf(each(0).toString) > 0)
+                assert(each(1).toString.equalsIgnoreCase("false") || (each(1).toString.equalsIgnoreCase("true")))
+                assert(each(2).toString.contains(".png"))
+
+                val value = each.getAs[Array[Byte]](3).slice(0, 10)
+                assert(new String(Base64.encodeBase64(value)).equals("iVBORw0KGgoAAA=="))
+                assert(each(4).toString.equalsIgnoreCase("false") || (each(4).toString.equalsIgnoreCase("true")))
+            }
+
+            val df = hiveResult.collect()
+            assert(3 == df.length)
+            df.foreach { each =>
+                assert(5 == each.length)
+
+                assert(Integer.valueOf(each(0).toString) > 0)
+                assert(each(1).toString.equalsIgnoreCase("false") || (each(1).toString.equalsIgnoreCase("true")))
+                assert(each(2).toString.contains(".png"))
+
+
+                val value = each.getAs[Array[Byte]](3).slice(0, 10)
+                assert(new String(Base64.encodeBase64(value)).equals("iVBORw0KGgoAAA=="))
+                assert(each(4).toString.equalsIgnoreCase("false") || (each(4).toString.equalsIgnoreCase("true")))
+            }
+        } catch {
+            case e: Exception =>
+                e.printStackTrace()
+                assert(false)
+        }
+    }
+
+    test("Create table and load data with binary column for hive: test encode without \u0001 and not base64") {
+        // Carbon will throw exception if the data is not base64 when carbon set binary_decoder is base64
+        // hive will save as original data if the data is not base64
+        sql("DROP TABLE IF EXISTS hivetable")
+        sql("DROP TABLE IF EXISTS carbontable")
+        sql(
+            s"""
+               | CREATE TABLE IF NOT EXISTS hivetable (
+               |    id int,
+               |    label boolean,
+               |    name string,
+               |    binaryField binary,
+               |    autoLabel boolean)
+               | row format delimited fields terminated by '|'
+             """.stripMargin)
+        sql(
+            s"""
+               | LOAD DATA LOCAL INPATH '$resourcesPath/binarystringdata2.csv'
+               | INTO TABLE hivetable
+             """.stripMargin)
+
+        sql(
+            s"""
+               | CREATE TABLE IF NOT EXISTS carbontable (
+               |    id int,
+               |    label boolean,
+               |    name string,
+               |    binaryField binary,
+               |    autoLabel boolean)
+               | STORED BY 'carbondata'
+             """.stripMargin)
+        val e = intercept[Exception] {
+            sql(
+                s"""
+                   | LOAD DATA LOCAL INPATH '$resourcesPath/binarystringdata2.csv'
+                   | INTO TABLE carbontable
+                   | OPTIONS('header'='false','DELIMITER'='|','binary_decoder'='baSe64')
+             """.stripMargin)
+        }
+        assert(e.getMessage.contains("Binary decoder is base64, but data is not base64"))
+    }
+
+    test("Create table and load data with binary column with Hex decode") {

Review comment:
       Add validation for invalid binary_decoder value




--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[hidden email]