Apache CarbonData Dev Mailing List archive

[GitHub] [carbondata] akkio-97 opened a new pull request #4115: [WIP]

Classic

List

Threaded

110 messages Options

123456

GitBox

[GitHub] [carbondata] Indhumathi27 commented on a change in pull request #4115: [CARBONDATA-4163] Support adding of single-level complex columns(array/struct)

Indhumathi27 commented on a change in pull request #4115:
URL: https://github.com/apache/carbondata/pull/4115#discussion_r613746573

##########
File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchemaCommon.scala
##########
@@ -473,6 +528,11 @@ class AlterTableColumnSchemaGenerator(
for (elem <- alterTableModel.tableProperties) {
if (elem._1.toLowerCase.startsWith(defaultValueString)) {
if (col.getColumnName.equalsIgnoreCase(elem._1.substring(defaultValueString.length))) {
+ if (DataTypes.isArrayType(col.getDataType) || DataTypes.isStructType(col.getDataType) ||
+ DataTypes.isStructType(col.getDataType)) {

Review comment:
Same check twice for Struct type. Please check

##########
File path: integration/spark/src/test/scala/org/apache/carbondata/view/rewrite/TestAllOperationsOnMV.scala
##########
@@ -79,6 +90,12 @@ class TestAllOperationsOnMV extends QueryTest with BeforeAndAfterEach {
}.getMessage.contains("Cannot add columns in a materialized view table default.dm1")
}

+ test("test alter add complex column on MV table") {
+ intercept[ProcessMetaDataException] {

Review comment:
This test case already exists. Please remove duplicate test cases

##########
File path: index/secondary-index/src/test/scala/org/apache/carbondata/spark/testsuite/secondaryindex/TestSIWithComplexArrayType.scala
##########
@@ -43,6 +43,83 @@ class TestSIWithComplexArrayType extends QueryTest with BeforeAndAfterEach {
sql("drop table if exists complextable5")
}

+ test("Test alter add array column before creating SI") {
+ sql("drop table if exists complextable")
+ sql("create table complextable (id string, country array<string>, name string) stored as carbondata")
+ sql("insert into complextable select 1,array('china', 'us'), 'b'")
+ sql("insert into complextable select 2,array('pak'), 'v'")
+
+ sql("drop index if exists index_11 on complextable")
+ sql(
+ "ALTER TABLE complextable ADD COLUMNS(arr2 array<string>)")
+ sql("insert into complextable select 3,array('china'), 'f',array('hello','world')")
+ sql("insert into complextable select 4,array('india'),'g',array('iron','man','jarvis')")
+
+ val result1 = sql("select * from complextable where array_contains(arr2,'iron')")
+ val result2 = sql("select * from complextable where arr2[0]='iron'")
+ sql("create index index_11 on table complextable(arr2) as 'carbondata'")
+ val df1 = sql(" select * from complextable where array_contains(arr2,'iron')")
+ val df2 = sql(" select * from complextable where arr2[0]='iron'")
+ if (!isFilterPushedDownToSI(df1.queryExecution.sparkPlan)) {
+ assert(false)
+ } else {
+ assert(true)
+ }
+ if (!isFilterPushedDownToSI(df2.queryExecution.sparkPlan)) {
+ assert(false)
+ } else {
+ assert(true)
+ }
+ val doNotHitSIDf = sql(" select * from complextable where array_contains(arr2,'iron') and array_contains(arr2,'man')")
+ if (isFilterPushedDownToSI(doNotHitSIDf.queryExecution.sparkPlan)) {
+ assert(false)
+ } else {
+ assert(true)
+ }
+ checkAnswer(result1, df1)
+ checkAnswer(result2, df2)
+ }
+
+ test("Test alter add array column after creating SI") {

Review comment:
I think this testcase validation is not required, as it is smiiliar to testcases that you have added in TestAlterTableAddColumns. Add column on maintable wont affect SI / MV, as they are created with other columns. you can remove those test cases.

##########
File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchemaCommon.scala
##########
@@ -275,6 +275,64 @@ class AlterTableColumnSchemaGenerator(
allColumns ++= Seq(columnSchema)
}
newCols ++= Seq(columnSchema)
+ if (DataTypes.isArrayType(columnSchema.getDataType)) {
+ columnSchema.setNumberOfChild(field.children.size)
+ val childField = field.children.get(0)
+ // TODO: should support adding multi-level complex columns: CARBONDATA-4164
+ if (childField.children != Some(null)) {
+ throw new UnsupportedOperationException("Only single-level complex types are allowed")

Review comment:
```suggestion
throw new UnsupportedOperationException("Alter add columns with single-level complex types are only allowed")
```

##########
File path: integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/alterTable/TestAlterTableAddColumns.scala
##########
@@ -136,6 +140,221 @@ class TestAlterTableAddColumns extends QueryTest with BeforeAndAfterAll {
sql(s"""DROP TABLE IF EXISTS ${ tableName }""")
}

+ test("Test adding of array of all primitive datatypes") {
+ sql("DROP TABLE IF EXISTS alter_com")
+ sql("CREATE TABLE alter_com(intfield int) STORED AS carbondata")
+ sql(
+ "ALTER TABLE alter_com ADD COLUMNS(arr1 array<short>, arr2 array<int>, arr3 " +
+ "array<long>, arr4 array<double>, arr5 array<decimal(8,2)>, arr6 array<string>, arr7 " +
+ "array<char(5)>, arr8 array<varchar(50)>, arr9 array<boolean>, arr10 array<date>, arr11 " +
+ "array<timestamp> )")
+ sql("desc table alter_com").show(false)
+ sql(
+ "insert into alter_com values(1,array(1,5),array(1,2),array(1,2,3),array(1.2d,2.3d),array" +
+ "(4.5,6.7),array('hello','world'),array('a','bcd'),array('abcd','efg'),array(true,false)," +
+ "array('2017-02-01','2018-09-11'),array('2017-02-01 00:01:00','2018-02-01 02:21:00') )")
+ checkAnswer(sql(
+ "select * from alter_com"),
+ Seq(Row(1, make(Array(1, 5)),
+ make(Array(1, 2)),
+ make(Array(1, 2, 3)),
+ make(Array(1.2, 2.3)),
+ make(Array(java.math.BigDecimal.valueOf(4.5).setScale(2),
+ java.math.BigDecimal.valueOf(6.7).setScale(2))),
+ make(Array("hello", "world")),
+ make(Array("a", "bcd")),
+ make(Array("abcd", "efg")),
+ make(Array(true, false)),
+ make(Array(Date.valueOf("2017-02-01"), Date.valueOf("2018-09-11"))),
+ make(Array(Timestamp.valueOf("2017-02-01 00:01:00"),
+ Timestamp.valueOf("2018-02-01 02:21:00")))
+ )))
+ sql("DROP TABLE IF EXISTS alter_com")
+ }
+
+ test("Test adding of struct of all primitive datatypes") {
+ sql("DROP TABLE IF EXISTS alter_com")
+ sql(
+ "CREATE TABLE alter_com(intField INT, structField struct<a:short,b:int,c:long,d:double," +
+ "e:decimal(8,2),f:string,g:char(5),h:varchar(50),i:boolean,j:date,k:timestamp>) STORED AS " +
+ "carbondata")
+ sql(
+ "insert into alter_com values(1, named_struct('a',1,'b',2,'c',3,'d',1.23,'e',2.34,'f'," +
+ "'hello','g','abc','h','def','i',true,'j','2017-02-01','k','2018-02-01 02:00:00.0') ) ")
+ sql("DROP TABLE IF EXISTS alter_com")
+ }
+
+ def insertIntoTableForArrayType(): Unit = {
+ sql("insert into alter_com values(4,array(2),array(1,2,3,4),array('abc','def'))")
+ sql("insert into alter_com values(5,array(1,2),array(1), array('Hulk','Thor'))")
+ sql(
+ "insert into alter_com values(6,array(1,2,3),array(1234,8,33333),array('Iron','Man'," +
+ "'Jarvis'))")
+ }
+
+ def checkRestulForArrayType(): Unit = {
+ val totalRows = sql("select * from alter_com").collect()
+ val a = sql("select * from alter_com where array_contains(arr2,2)").collect
+ val b = sql("select * from alter_com where array_contains(arr3,'Thor')").collect
+ val c = sql("select * from alter_com where intField = 1").collect
+ assert(totalRows.size == 6)
+ assert(a.size == 1)
+ assert(b.size == 1)
+ // check default value for newly added array column that is index - 3 and 4
+ assert(c(0)(2) == null && c(0)(3) == null)
+ }
+
+ test("Test alter add for arrays enabling local dictionary") {
+ sql("DROP TABLE IF EXISTS alter_com")
+ sql(
+ "CREATE TABLE alter_com(intField INT, arr1 array<int>) " +
+ "STORED AS carbondata TBLPROPERTIES('local_dictionary_enable'='true')")
+
+ sql("insert into alter_com values(1,array(1) )")
+ sql("insert into alter_com values(2,array(9,0) )")
+ sql("insert into alter_com values(3,array(11,12,13) )")
+
+ sql(
+ "ALTER TABLE alter_com ADD COLUMNS(arr2 array<int>, arr3 array<string>) TBLPROPERTIES" +
+ "('LOCAL_DICTIONARY_INCLUDE'='arr3')")
+ val schema = sql("describe alter_com").collect()
+ assert(schema.size == 4)
+
+ // For the previous segments the default value for newly added array column is null
+ insertIntoTableForArrayType
+ checkRestulForArrayType
+ sql("DROP TABLE IF EXISTS alter_com")
+
+ }
+
+ test("Test alter add for arrays disabling local dictionary") {
+ sql("DROP TABLE IF EXISTS alter_com")
+ sql(
+ "CREATE TABLE alter_com(intField INT, arr1 array<int>) " +
+ "STORED AS carbondata TBLPROPERTIES('local_dictionary_enable'='false')")
+
+ sql("insert into alter_com values(1,array(1) )")
+ sql("insert into alter_com values(2,array(9,0) )")
+ sql("insert into alter_com values(3,array(11,12,13) )")
+ sql(
+ "ALTER TABLE alter_com ADD COLUMNS(arr2 array<int>, arr3 array<string>) TBLPROPERTIES" +
+ "('LOCAL_DICTIONARY_EXCLUDE'='arr3')")
+ val schema = sql("describe alter_com").collect()
+ assert(schema.size == 4)
+
+ // For the previous segments the default value for newly added array column is null
+ insertIntoTableForArrayType
+ checkRestulForArrayType
+ sql("DROP TABLE IF EXISTS alter_com")
+
+ }
+
+ def insertIntoTableForStructType(): Unit = {
+ sql(
+ "insert into alter_struct values(2, named_struct('id1', 'id2','name1','name2'), " +
+ "named_struct('a','id2','b', 'abc2'), 'hello world', 5, named_struct('c','id3'," +
+ "'d', 'abc3','e', 22), array(1,2,3) )")
+ sql(
+ "insert into alter_struct values(3, named_struct('id1', 'id3','name1','name3'), " +
+ "named_struct('a','id2.1','b', 'abc2.1'), 'India', 5, named_struct('c','id3.1'," +
+ "'d', 'abc3.1','e', 25), array(4,5) )")
+
+ }
+
+ def checkResultForStructType(): Unit = {
+ val totalRows = sql("select * from alter_struct").collect()
+ val a = sql("select * from alter_struct where struct1.a = 'id2' ").collect()
+ val b = sql(
+ "select * from alter_struct where struct1.a = 'id2' or struct2.c = 'id3.1' or " +
+ "array_contains(arr,5) ").collect()
+ val c = sql("select * from alter_struct where roll = 1").collect()
+
+ assert(totalRows.size == 3)
+ assert(a.size == 1)
+ assert(b.size == 2)
+ // check default value for newly added struct column
+ assert(c(0)(2) == null)
+ }
+
+ test("Test alter add for structs enabling local dictionary") {

Review comment:
modify this testcase as below. in testcase, keep create table with/without local dictionary and move all other steps to method and reuse. you can pass local dict enable/disable as param, to make changes in alter add.
Please handle for Array testcase also.

##########
File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchemaCommon.scala
##########
@@ -275,6 +275,64 @@ class AlterTableColumnSchemaGenerator(
allColumns ++= Seq(columnSchema)
}
newCols ++= Seq(columnSchema)
+ if (DataTypes.isArrayType(columnSchema.getDataType)) {
+ columnSchema.setNumberOfChild(field.children.size)
+ val childField = field.children.get(0)
+ // TODO: should support adding multi-level complex columns: CARBONDATA-4164
+ if (childField.children != Some(null)) {
+ throw new UnsupportedOperationException("Only single-level complex types are allowed")
+ }
+ val childSchema: ColumnSchema = TableNewProcessor.createColumnSchema(
+ childField,
+ new java.util.ArrayList[Encoding](),
+ isDimensionCol = true,
+ childField.precision,
+ childField.scale,
+ childField.schemaOrdinal + currentSchemaOrdinal,
+ alterTableModel.highCardinalityDims,
+ alterTableModel.databaseName.getOrElse(dbName),
+ isSortColumn(childField.name.getOrElse(childField.column)),
+ isVarcharColumn(childField.name.getOrElse(childField.column)))
+
+ if (childSchema.getDataType == DataTypes.VARCHAR) {
+ // put the new long string columns in 'longStringCols'
+ // and add them after old long string columns
+ longStringCols ++= Seq(childSchema)
+ } else {
+ allColumns ++= Seq(childSchema)
+ }
+ newCols ++= Seq(childSchema)
+ } else if (DataTypes.isStructType(columnSchema.getDataType)) {
+ val noOfChildren = field.children.get.size
+ columnSchema.setNumberOfChild(noOfChildren)
+ for (i <- 0 to noOfChildren - 1) {
+ val childField = field.children.get(i)
+ // TODO: should support adding multi-level complex columns: CARBONDATA-4164
+ if (childField.children != Some(null)) {
+ throw new UnsupportedOperationException("Only single-level complex types are allowed")

Review comment:
Same comment as above

--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[hidden email]

GitBox