Login  Register

[GitHub] [carbondata] akashrn5 commented on a change in pull request #4129: [CARBONDATA-4179] Support renaming of complex columns (array/struct)

Posted by GitBox on Jun 03, 2021; 7:58am
URL: http://apache-carbondata-dev-mailing-list-archive.168.s1.nabble.com/GitHub-carbondata-akkio-97-opened-a-new-pull-request-4129-WIP-alter-rename-complex-types-tp108015p108596.html


akashrn5 commented on a change in pull request #4129:
URL: https://github.com/apache/carbondata/pull/4129#discussion_r637747237



##########
File path: integration/spark/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/AlterTableColumnRenameTestCase.scala
##########
@@ -48,6 +52,262 @@ class AlterTableColumnRenameTestCase extends QueryTest with BeforeAndAfterAll {
     sql("drop table simple_table")
   }
 
+  test("Rename more than one column at a time in one operation") {
+    sql("drop table if exists test_rename")
+    sql("CREATE TABLE test_rename (str struct<a:struct<b:int, d:int>, c:int>) STORED AS carbondata")
+    sql("insert into test_rename values(named_struct('a11',named_struct('b2',12,'d',12), 'c', 12))")
+    sql("alter table test_rename change str str22 struct<a11:struct<b2:int, d:int>, c:int>")
+    sql("insert into test_rename values(named_struct('a11',named_struct('b2',24,'d',24), 'c', 24))")
+
+    val rows = sql("select str22.a11.b2 from test_rename").collect()
+    assert(rows(0).equals(Row(12)) && rows(1).equals(Row(24)))
+    // check if old column names are still present
+    val ex1 = intercept[AnalysisException] {
+      sql("select str from test_rename").show(false)
+    }
+    assert(ex1.getMessage
+      .contains("cannot resolve '`str`' given input columns: [test_rename.str22]"))
+
+    val ex2 = intercept[AnalysisException] {
+      sql("select str.a from test_rename").show(false)
+    }
+    assert(ex2.getMessage
+      .contains("cannot resolve '`str.a`' given input columns: [test_rename.str22]"))
+
+    // check un-altered columns
+    val rows1 = sql("select str22.c from test_rename").collect()
+    val rows2 = sql("select str22.a11.d from test_rename").collect()
+    assert(rows1.sameElements(Array(Row(12), Row(24))))
+    assert(rows2.sameElements(Array(Row(12), Row(24))))
+  }
+
+  test("rename complex columns with invalid structure/duplicate names/Map type") {
+    sql("drop table if exists test_rename")
+    sql(
+      "CREATE TABLE test_rename (str struct<a:int,b:long>, str2 struct<a:int,b:long>, map1 " +
+      "map<string, string>, str3 struct<a:int, b:map<string, string>>) STORED AS carbondata")
+
+    val ex1 = intercept[ProcessMetaDataException] {
+      sql("alter table test_rename change str str struct<a:array<int>,b:long>")
+    }
+    assert(ex1.getMessage
+      .contains(
+        "column rename operation failed: because datatypes of complex children cannot be altered"))
+
+    val ex2 = intercept[ProcessMetaDataException] {
+      sql("alter table test_rename change str str struct<a:int,b:long,c:int>")
+    }
+    assert(ex2.getMessage
+      .contains(
+        "column rename operation failed: because number of children of old and new complex " +
+        "columns are not the same"))
+
+    val ex3 = intercept[ProcessMetaDataException] {
+      sql("alter table test_rename change str str int")
+    }
+    assert(ex3.getMessage
+      .contains(
+        "column rename operation failed: because old and new complex columns are not compatible " +
+        "in structure"))
+
+    val ex4 = intercept[ProcessMetaDataException] {
+      sql("alter table test_rename change str str struct<a:int,a:long>")
+    }
+    assert(ex4.getMessage
+      .contains(
+        "column rename operation failed: because duplicate columns are present"))
+
+    val ex5 = intercept[ProcessMetaDataException] {
+      sql("alter table test_rename change str str2 struct<a:int,b:long>")
+    }
+    assert(ex5.getMessage
+      .contains(
+        "Column Rename Operation failed. New column name str2 already exists in table test_rename"))
+
+    val ex6 = intercept[ProcessMetaDataException] {
+      sql("alter table test_rename change map1 map2 map<string, struct<a:int>>")
+    }
+    assert(ex6.getMessage
+      .contains("rename operation failed: cannot alter map type column"))
+
+    val ex7 = intercept[ProcessMetaDataException] {
+      sql("alter table test_rename change str3 str33 struct<a:int, bc:map<string, string>>")
+    }
+    assert(ex7.getMessage
+      .contains(
+        "rename operation failed: cannot alter complex structure that includes map type column"))
+  }
+
+  def checkAnswerUtil1(df1: DataFrame, df2: DataFrame, df3: DataFrame) {
+    checkAnswer(df1, Seq(Row(Row(Row(2)))))
+    checkAnswer(df2, Seq(Row(Row(2))))
+    checkAnswer(df3, Seq(Row(2)))
+  }
+
+  def checkAnswerUtil2(df1: DataFrame, df2: DataFrame, df3: DataFrame) {
+    checkAnswer(df1, Seq(Row(Row(Row(2))), Row(Row(Row(3)))))
+    checkAnswer(df2, Seq(Row(Row(2)), Row(Row(3))))
+    checkAnswer(df3, Seq(Row(2), Row(3)))
+  }
+
+  test("test alter rename struct of (primitive/struct/array)") {
+    sql("drop table if exists test_rename")
+    sql("CREATE TABLE test_rename (str1 struct<a:int>, str2 struct<a:struct<b:int>>, str3 " +
+        "struct<a:struct<b:struct<c:int>>>, intfield int) STORED AS carbondata")
+    sql("insert into test_rename values(named_struct('a', 2), " +
+        "named_struct('a', named_struct('b', 2)), named_struct('a', named_struct('b', " +
+        "named_struct('c', 2))), 1)")
+
+    // rename parent column from str2 to str22 and read old rows
+    sql("alter table test_rename change str2 str22 struct<a:struct<b:int>>")
+    var df1 = sql("select str22 from test_rename")
+    var df2 = sql("select str22.a from test_rename")
+    var df3 = sql("select str22.a.b from test_rename")
+    assert(df1.collect().size == 1 && df2.collect().size == 1 && df3.collect().size == 1)
+    checkAnswerUtil1(df1, df2, df3)
+
+    // rename child column from a to a11
+    sql("alter table test_rename change str22 str22 struct<a11:struct<b:int>>")
+    df1 = sql("select str22 from test_rename")
+    df2 = sql("select str22.a11 from test_rename")
+    df3 = sql("select str22.a11.b from test_rename")
+    assert(df1.collect().size == 1 && df2.collect().size == 1 && df3.collect().size == 1)
+    checkAnswerUtil1(df1, df2, df3)
+
+    // rename parent column from str22 to str33
+    sql("alter table test_rename change str22 str33 struct<a11:struct<b:int>>")
+    df1 = sql("select str33 from test_rename")
+    df2 = sql("select str33.a11 from test_rename")
+    df3 = sql("select str33.a11.b from test_rename")
+    assert(df1.collect().size == 1 && df2.collect().size == 1 && df3.collect().size == 1)
+    checkAnswerUtil1(df1, df2, df3)
+
+    // insert new rows
+    sql("insert into test_rename values(named_struct('a', 3), " +
+        "named_struct('a', named_struct('b', 3)), named_struct('a', named_struct('b', " +
+        "named_struct('c', 3))), 2)")
+    df1 = sql("select str33 from test_rename")
+    df2 = sql("select str33.a11 from test_rename")
+    df3 = sql("select str33.a11.b from test_rename")
+    assert(df1.collect().size == 2 && df2.collect().size == 2 && df3.collect().size == 2)
+    checkAnswerUtil2(df1, df2, df3)
+
+    sql("alter table test_rename change str33 str33 struct<a11:struct<b11:int>>")
+    sql("alter table test_rename change str33 str33 struct<a22:struct<b11:int>>")
+    df1 = sql("select str33 from test_rename")
+    df2 = sql("select str33.a22 from test_rename")
+    df3 = sql("select str33.a22.b11 from test_rename")
+    assert(df1.collect().size == 2 && df2.collect().size == 2 && df3.collect().size == 2)
+    checkAnswerUtil2(df1, df2, df3)
+
+    val desc = sql("desc table test_rename").collect()
+    assert(desc(0)(0).equals("str1"))
+    assert(desc(1)(0).equals("str33"))
+    assert(desc(1)(1).equals("struct<a22:struct<b11:int>>"))
+    assert(desc(2)(0).equals("str3"))
+  }
+
+  test("test alter rename array of (primitive/array/struct)") {
+    sql("drop table if exists test_rename")
+    sql(
+      "CREATE TABLE test_rename (arr1 array<int>, arr2 array<array<int>>, arr3 array<string>, " +
+      "arr4 array<struct<a:int>>) STORED AS carbondata")
+    sql(
+      "insert into test_rename values (array(1,2,3), array(array(1,2),array(3,4)), array('hello'," +
+      "'world'), array(named_struct('a',45)))")
+
+    sql("alter table test_rename change arr1 arr11 array<int>")

Review comment:
       remove this sql if not used and check in all other places and remove

##########
File path: integration/spark/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/AlterTableColumnRenameTestCase.scala
##########
@@ -48,6 +52,262 @@ class AlterTableColumnRenameTestCase extends QueryTest with BeforeAndAfterAll {
     sql("drop table simple_table")
   }
 
+  test("Rename more than one column at a time in one operation") {
+    sql("drop table if exists test_rename")
+    sql("CREATE TABLE test_rename (str struct<a:struct<b:int, d:int>, c:int>) STORED AS carbondata")
+    sql("insert into test_rename values(named_struct('a11',named_struct('b2',12,'d',12), 'c', 12))")
+    sql("alter table test_rename change str str22 struct<a11:struct<b2:int, d:int>, c:int>")
+    sql("insert into test_rename values(named_struct('a11',named_struct('b2',24,'d',24), 'c', 24))")
+
+    val rows = sql("select str22.a11.b2 from test_rename").collect()
+    assert(rows(0).equals(Row(12)) && rows(1).equals(Row(24)))
+    // check if old column names are still present
+    val ex1 = intercept[AnalysisException] {
+      sql("select str from test_rename").show(false)
+    }
+    assert(ex1.getMessage
+      .contains("cannot resolve '`str`' given input columns: [test_rename.str22]"))
+
+    val ex2 = intercept[AnalysisException] {
+      sql("select str.a from test_rename").show(false)
+    }
+    assert(ex2.getMessage
+      .contains("cannot resolve '`str.a`' given input columns: [test_rename.str22]"))
+
+    // check un-altered columns
+    val rows1 = sql("select str22.c from test_rename").collect()
+    val rows2 = sql("select str22.a11.d from test_rename").collect()
+    assert(rows1.sameElements(Array(Row(12), Row(24))))
+    assert(rows2.sameElements(Array(Row(12), Row(24))))
+  }
+
+  test("rename complex columns with invalid structure/duplicate names/Map type") {
+    sql("drop table if exists test_rename")
+    sql(
+      "CREATE TABLE test_rename (str struct<a:int,b:long>, str2 struct<a:int,b:long>, map1 " +
+      "map<string, string>, str3 struct<a:int, b:map<string, string>>) STORED AS carbondata")
+
+    val ex1 = intercept[ProcessMetaDataException] {
+      sql("alter table test_rename change str str struct<a:array<int>,b:long>")
+    }
+    assert(ex1.getMessage
+      .contains(
+        "column rename operation failed: because datatypes of complex children cannot be altered"))
+
+    val ex2 = intercept[ProcessMetaDataException] {
+      sql("alter table test_rename change str str struct<a:int,b:long,c:int>")
+    }
+    assert(ex2.getMessage
+      .contains(
+        "column rename operation failed: because number of children of old and new complex " +
+        "columns are not the same"))
+
+    val ex3 = intercept[ProcessMetaDataException] {
+      sql("alter table test_rename change str str int")
+    }
+    assert(ex3.getMessage
+      .contains(
+        "column rename operation failed: because old and new complex columns are not compatible " +
+        "in structure"))
+
+    val ex4 = intercept[ProcessMetaDataException] {
+      sql("alter table test_rename change str str struct<a:int,a:long>")
+    }
+    assert(ex4.getMessage
+      .contains(
+        "column rename operation failed: because duplicate columns are present"))
+
+    val ex5 = intercept[ProcessMetaDataException] {
+      sql("alter table test_rename change str str2 struct<a:int,b:long>")
+    }
+    assert(ex5.getMessage
+      .contains(
+        "Column Rename Operation failed. New column name str2 already exists in table test_rename"))
+
+    val ex6 = intercept[ProcessMetaDataException] {
+      sql("alter table test_rename change map1 map2 map<string, struct<a:int>>")
+    }
+    assert(ex6.getMessage
+      .contains("rename operation failed: cannot alter map type column"))
+
+    val ex7 = intercept[ProcessMetaDataException] {
+      sql("alter table test_rename change str3 str33 struct<a:int, bc:map<string, string>>")
+    }
+    assert(ex7.getMessage
+      .contains(
+        "rename operation failed: cannot alter complex structure that includes map type column"))
+  }
+
+  def checkAnswerUtil1(df1: DataFrame, df2: DataFrame, df3: DataFrame) {
+    checkAnswer(df1, Seq(Row(Row(Row(2)))))
+    checkAnswer(df2, Seq(Row(Row(2))))
+    checkAnswer(df3, Seq(Row(2)))
+  }
+
+  def checkAnswerUtil2(df1: DataFrame, df2: DataFrame, df3: DataFrame) {
+    checkAnswer(df1, Seq(Row(Row(Row(2))), Row(Row(Row(3)))))
+    checkAnswer(df2, Seq(Row(Row(2)), Row(Row(3))))
+    checkAnswer(df3, Seq(Row(2), Row(3)))
+  }
+
+  test("test alter rename struct of (primitive/struct/array)") {
+    sql("drop table if exists test_rename")
+    sql("CREATE TABLE test_rename (str1 struct<a:int>, str2 struct<a:struct<b:int>>, str3 " +
+        "struct<a:struct<b:struct<c:int>>>, intfield int) STORED AS carbondata")
+    sql("insert into test_rename values(named_struct('a', 2), " +
+        "named_struct('a', named_struct('b', 2)), named_struct('a', named_struct('b', " +
+        "named_struct('c', 2))), 1)")
+
+    // rename parent column from str2 to str22 and read old rows
+    sql("alter table test_rename change str2 str22 struct<a:struct<b:int>>")
+    var df1 = sql("select str22 from test_rename")
+    var df2 = sql("select str22.a from test_rename")
+    var df3 = sql("select str22.a.b from test_rename")
+    assert(df1.collect().size == 1 && df2.collect().size == 1 && df3.collect().size == 1)
+    checkAnswerUtil1(df1, df2, df3)
+
+    // rename child column from a to a11
+    sql("alter table test_rename change str22 str22 struct<a11:struct<b:int>>")
+    df1 = sql("select str22 from test_rename")
+    df2 = sql("select str22.a11 from test_rename")
+    df3 = sql("select str22.a11.b from test_rename")
+    assert(df1.collect().size == 1 && df2.collect().size == 1 && df3.collect().size == 1)
+    checkAnswerUtil1(df1, df2, df3)
+
+    // rename parent column from str22 to str33
+    sql("alter table test_rename change str22 str33 struct<a11:struct<b:int>>")
+    df1 = sql("select str33 from test_rename")
+    df2 = sql("select str33.a11 from test_rename")
+    df3 = sql("select str33.a11.b from test_rename")
+    assert(df1.collect().size == 1 && df2.collect().size == 1 && df3.collect().size == 1)
+    checkAnswerUtil1(df1, df2, df3)
+
+    // insert new rows
+    sql("insert into test_rename values(named_struct('a', 3), " +
+        "named_struct('a', named_struct('b', 3)), named_struct('a', named_struct('b', " +
+        "named_struct('c', 3))), 2)")
+    df1 = sql("select str33 from test_rename")
+    df2 = sql("select str33.a11 from test_rename")
+    df3 = sql("select str33.a11.b from test_rename")
+    assert(df1.collect().size == 2 && df2.collect().size == 2 && df3.collect().size == 2)
+    checkAnswerUtil2(df1, df2, df3)
+
+    sql("alter table test_rename change str33 str33 struct<a11:struct<b11:int>>")
+    sql("alter table test_rename change str33 str33 struct<a22:struct<b11:int>>")
+    df1 = sql("select str33 from test_rename")
+    df2 = sql("select str33.a22 from test_rename")
+    df3 = sql("select str33.a22.b11 from test_rename")
+    assert(df1.collect().size == 2 && df2.collect().size == 2 && df3.collect().size == 2)
+    checkAnswerUtil2(df1, df2, df3)
+
+    val desc = sql("desc table test_rename").collect()
+    assert(desc(0)(0).equals("str1"))
+    assert(desc(1)(0).equals("str33"))
+    assert(desc(1)(1).equals("struct<a22:struct<b11:int>>"))
+    assert(desc(2)(0).equals("str3"))
+  }
+
+  test("test alter rename array of (primitive/array/struct)") {
+    sql("drop table if exists test_rename")
+    sql(
+      "CREATE TABLE test_rename (arr1 array<int>, arr2 array<array<int>>, arr3 array<string>, " +
+      "arr4 array<struct<a:int>>) STORED AS carbondata")
+    sql(
+      "insert into test_rename values (array(1,2,3), array(array(1,2),array(3,4)), array('hello'," +
+      "'world'), array(named_struct('a',45)))")
+
+    sql("alter table test_rename change arr1 arr11 array<int>")

Review comment:
       when u just give sql("..."), it will return a dataframe, u are not taking return value, not giving any action on dataframe, so its of no use, u can remove




--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[hidden email]