Indhumathi27 commented on a change in pull request #4129: URL: https://github.com/apache/carbondata/pull/4129#discussion_r644509268 ########## File path: docs/ddl-of-carbondata.md ########## @@ -819,33 +819,47 @@ Users can specify which columns to include and exclude for local dictionary gene - Invalid scenarios * Change of decimal precision from (10,2) to (10,5) is invalid as in this case only scale is increased but total number of digits remains the same. * Change the comment of the partition column + * Rename operation fails if the structure of the complex column has been altered. Please ensure the old and new columns are compatible with Review comment: Looks like, the links under RENAME COLUMN / CHANGE COLUMN NAME/TYPE/COMMENT is not navigating to the description. Can you check and fix it -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
Indhumathi27 commented on a change in pull request #4129: URL: https://github.com/apache/carbondata/pull/4129#discussion_r644512076 ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/catalyst/CarbonParserUtil.scala ########## @@ -1132,10 +1135,60 @@ object CarbonParserUtil { } else if (scale < 0 || scale > 38) { throw new MalformedCarbonCommandException("Invalid value for scale") } - DataTypeInfo("decimal", precision, scale) + DataTypeInfo(columnName, "decimal", precision, scale) + case _ => + DataTypeInfo(columnName, + DataTypeConverterUtil.convertToCarbonType(dataType).getName.toLowerCase) + } + } + + /** + * This method will return the instantiated DataTypeInfo by parsing the column + */ + def parseColumn(columnName: String, dataType: DataType, + values: Option[List[(Int, Int)]]): DataTypeInfo = { + // creates parent dataTypeInfo first + val dataTypeInfo = CarbonParserUtil.parseDataType( Review comment: ```suggestion val dataTypeName = DataTypeConverterUtil.convertToCarbonType(dataType.typeName).getName val dataTypeInfo = CarbonParserUtil.parseDataType(columnName, dataTypeName.toLowerCase,values) ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
Indhumathi27 commented on a change in pull request #4129: URL: https://github.com/apache/carbondata/pull/4129#discussion_r644514061 ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/catalyst/CarbonParserUtil.scala ########## @@ -1132,10 +1135,60 @@ object CarbonParserUtil { } else if (scale < 0 || scale > 38) { throw new MalformedCarbonCommandException("Invalid value for scale") } - DataTypeInfo("decimal", precision, scale) + DataTypeInfo(columnName, "decimal", precision, scale) + case _ => + DataTypeInfo(columnName, + DataTypeConverterUtil.convertToCarbonType(dataType).getName.toLowerCase) + } + } + + /** + * This method will return the instantiated DataTypeInfo by parsing the column + */ + def parseColumn(columnName: String, dataType: DataType, + values: Option[List[(Int, Int)]]): DataTypeInfo = { + // creates parent dataTypeInfo first + val dataTypeInfo = CarbonParserUtil.parseDataType( + columnName, + DataTypeConverterUtil + .convertToCarbonType(dataType.typeName) + .getName + .toLowerCase, + values) + // check which child type is present and create children dataTypeInfo accordingly + dataType match { + case arrayType: ArrayType => + val childType: DataType = arrayType.elementType + val childName = columnName + ".val" + val childValues = childType match { + case d: DecimalType => Some(List((d.precision, d.scale))) + case _ => None + } + var childTypeInfoList: List[DataTypeInfo] = null + val childDatatypeInfo = parseColumn(childName, childType, childValues) + childTypeInfoList = List(childDatatypeInfo) Review comment: Could directly set, dataTypeInfo.setChildren( List(childDatatypeInfo)), since childTypeInfoList is not used anywhere else -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
akkio-97 commented on a change in pull request #4129: URL: https://github.com/apache/carbondata/pull/4129#discussion_r644549681 ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/schema/CarbonAlterTableColRenameDataTypeChangeCommand.scala ########## @@ -279,6 +404,23 @@ private[sql] case class CarbonAlterTableColRenameDataTypeChangeCommand( Seq.empty } + private def isComplexChild(columnSchema: ColumnSchema): Boolean = { + columnSchema.column_name.contains(CarbonCommonConstants.POINT) + } + + private def isChildOfOldColumn(columnSchemaName: String, oldColumnName: String): Boolean = { + columnSchemaName.startsWith(oldColumnName + CarbonCommonConstants.POINT) + } + + private def checkIfParentIsAltered(columnSchemaName: String): String = { Review comment: if str.a has changed to str.b. the map only stores this change. Its children should also be changed which is not stored in the map, since this is an indirect change -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
akashrn5 commented on a change in pull request #4129: URL: https://github.com/apache/carbondata/pull/4129#discussion_r637747237 ########## File path: integration/spark/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/AlterTableColumnRenameTestCase.scala ########## @@ -48,6 +52,262 @@ class AlterTableColumnRenameTestCase extends QueryTest with BeforeAndAfterAll { sql("drop table simple_table") } + test("Rename more than one column at a time in one operation") { + sql("drop table if exists test_rename") + sql("CREATE TABLE test_rename (str struct<a:struct<b:int, d:int>, c:int>) STORED AS carbondata") + sql("insert into test_rename values(named_struct('a11',named_struct('b2',12,'d',12), 'c', 12))") + sql("alter table test_rename change str str22 struct<a11:struct<b2:int, d:int>, c:int>") + sql("insert into test_rename values(named_struct('a11',named_struct('b2',24,'d',24), 'c', 24))") + + val rows = sql("select str22.a11.b2 from test_rename").collect() + assert(rows(0).equals(Row(12)) && rows(1).equals(Row(24))) + // check if old column names are still present + val ex1 = intercept[AnalysisException] { + sql("select str from test_rename").show(false) + } + assert(ex1.getMessage + .contains("cannot resolve '`str`' given input columns: [test_rename.str22]")) + + val ex2 = intercept[AnalysisException] { + sql("select str.a from test_rename").show(false) + } + assert(ex2.getMessage + .contains("cannot resolve '`str.a`' given input columns: [test_rename.str22]")) + + // check un-altered columns + val rows1 = sql("select str22.c from test_rename").collect() + val rows2 = sql("select str22.a11.d from test_rename").collect() + assert(rows1.sameElements(Array(Row(12), Row(24)))) + assert(rows2.sameElements(Array(Row(12), Row(24)))) + } + + test("rename complex columns with invalid structure/duplicate names/Map type") { + sql("drop table if exists test_rename") + sql( + "CREATE TABLE test_rename (str struct<a:int,b:long>, str2 struct<a:int,b:long>, map1 " + + "map<string, string>, str3 struct<a:int, b:map<string, string>>) STORED AS carbondata") + + val ex1 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str str struct<a:array<int>,b:long>") + } + assert(ex1.getMessage + .contains( + "column rename operation failed: because datatypes of complex children cannot be altered")) + + val ex2 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str str struct<a:int,b:long,c:int>") + } + assert(ex2.getMessage + .contains( + "column rename operation failed: because number of children of old and new complex " + + "columns are not the same")) + + val ex3 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str str int") + } + assert(ex3.getMessage + .contains( + "column rename operation failed: because old and new complex columns are not compatible " + + "in structure")) + + val ex4 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str str struct<a:int,a:long>") + } + assert(ex4.getMessage + .contains( + "column rename operation failed: because duplicate columns are present")) + + val ex5 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str str2 struct<a:int,b:long>") + } + assert(ex5.getMessage + .contains( + "Column Rename Operation failed. New column name str2 already exists in table test_rename")) + + val ex6 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change map1 map2 map<string, struct<a:int>>") + } + assert(ex6.getMessage + .contains("rename operation failed: cannot alter map type column")) + + val ex7 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str3 str33 struct<a:int, bc:map<string, string>>") + } + assert(ex7.getMessage + .contains( + "rename operation failed: cannot alter complex structure that includes map type column")) + } + + def checkAnswerUtil1(df1: DataFrame, df2: DataFrame, df3: DataFrame) { + checkAnswer(df1, Seq(Row(Row(Row(2))))) + checkAnswer(df2, Seq(Row(Row(2)))) + checkAnswer(df3, Seq(Row(2))) + } + + def checkAnswerUtil2(df1: DataFrame, df2: DataFrame, df3: DataFrame) { + checkAnswer(df1, Seq(Row(Row(Row(2))), Row(Row(Row(3))))) + checkAnswer(df2, Seq(Row(Row(2)), Row(Row(3)))) + checkAnswer(df3, Seq(Row(2), Row(3))) + } + + test("test alter rename struct of (primitive/struct/array)") { + sql("drop table if exists test_rename") + sql("CREATE TABLE test_rename (str1 struct<a:int>, str2 struct<a:struct<b:int>>, str3 " + + "struct<a:struct<b:struct<c:int>>>, intfield int) STORED AS carbondata") + sql("insert into test_rename values(named_struct('a', 2), " + + "named_struct('a', named_struct('b', 2)), named_struct('a', named_struct('b', " + + "named_struct('c', 2))), 1)") + + // rename parent column from str2 to str22 and read old rows + sql("alter table test_rename change str2 str22 struct<a:struct<b:int>>") + var df1 = sql("select str22 from test_rename") + var df2 = sql("select str22.a from test_rename") + var df3 = sql("select str22.a.b from test_rename") + assert(df1.collect().size == 1 && df2.collect().size == 1 && df3.collect().size == 1) + checkAnswerUtil1(df1, df2, df3) + + // rename child column from a to a11 + sql("alter table test_rename change str22 str22 struct<a11:struct<b:int>>") + df1 = sql("select str22 from test_rename") + df2 = sql("select str22.a11 from test_rename") + df3 = sql("select str22.a11.b from test_rename") + assert(df1.collect().size == 1 && df2.collect().size == 1 && df3.collect().size == 1) + checkAnswerUtil1(df1, df2, df3) + + // rename parent column from str22 to str33 + sql("alter table test_rename change str22 str33 struct<a11:struct<b:int>>") + df1 = sql("select str33 from test_rename") + df2 = sql("select str33.a11 from test_rename") + df3 = sql("select str33.a11.b from test_rename") + assert(df1.collect().size == 1 && df2.collect().size == 1 && df3.collect().size == 1) + checkAnswerUtil1(df1, df2, df3) + + // insert new rows + sql("insert into test_rename values(named_struct('a', 3), " + + "named_struct('a', named_struct('b', 3)), named_struct('a', named_struct('b', " + + "named_struct('c', 3))), 2)") + df1 = sql("select str33 from test_rename") + df2 = sql("select str33.a11 from test_rename") + df3 = sql("select str33.a11.b from test_rename") + assert(df1.collect().size == 2 && df2.collect().size == 2 && df3.collect().size == 2) + checkAnswerUtil2(df1, df2, df3) + + sql("alter table test_rename change str33 str33 struct<a11:struct<b11:int>>") + sql("alter table test_rename change str33 str33 struct<a22:struct<b11:int>>") + df1 = sql("select str33 from test_rename") + df2 = sql("select str33.a22 from test_rename") + df3 = sql("select str33.a22.b11 from test_rename") + assert(df1.collect().size == 2 && df2.collect().size == 2 && df3.collect().size == 2) + checkAnswerUtil2(df1, df2, df3) + + val desc = sql("desc table test_rename").collect() + assert(desc(0)(0).equals("str1")) + assert(desc(1)(0).equals("str33")) + assert(desc(1)(1).equals("struct<a22:struct<b11:int>>")) + assert(desc(2)(0).equals("str3")) + } + + test("test alter rename array of (primitive/array/struct)") { + sql("drop table if exists test_rename") + sql( + "CREATE TABLE test_rename (arr1 array<int>, arr2 array<array<int>>, arr3 array<string>, " + + "arr4 array<struct<a:int>>) STORED AS carbondata") + sql( + "insert into test_rename values (array(1,2,3), array(array(1,2),array(3,4)), array('hello'," + + "'world'), array(named_struct('a',45)))") + + sql("alter table test_rename change arr1 arr11 array<int>") Review comment: remove this sql if not used and check in all other places and remove ########## File path: integration/spark/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/AlterTableColumnRenameTestCase.scala ########## @@ -48,6 +52,262 @@ class AlterTableColumnRenameTestCase extends QueryTest with BeforeAndAfterAll { sql("drop table simple_table") } + test("Rename more than one column at a time in one operation") { + sql("drop table if exists test_rename") + sql("CREATE TABLE test_rename (str struct<a:struct<b:int, d:int>, c:int>) STORED AS carbondata") + sql("insert into test_rename values(named_struct('a11',named_struct('b2',12,'d',12), 'c', 12))") + sql("alter table test_rename change str str22 struct<a11:struct<b2:int, d:int>, c:int>") + sql("insert into test_rename values(named_struct('a11',named_struct('b2',24,'d',24), 'c', 24))") + + val rows = sql("select str22.a11.b2 from test_rename").collect() + assert(rows(0).equals(Row(12)) && rows(1).equals(Row(24))) + // check if old column names are still present + val ex1 = intercept[AnalysisException] { + sql("select str from test_rename").show(false) + } + assert(ex1.getMessage + .contains("cannot resolve '`str`' given input columns: [test_rename.str22]")) + + val ex2 = intercept[AnalysisException] { + sql("select str.a from test_rename").show(false) + } + assert(ex2.getMessage + .contains("cannot resolve '`str.a`' given input columns: [test_rename.str22]")) + + // check un-altered columns + val rows1 = sql("select str22.c from test_rename").collect() + val rows2 = sql("select str22.a11.d from test_rename").collect() + assert(rows1.sameElements(Array(Row(12), Row(24)))) + assert(rows2.sameElements(Array(Row(12), Row(24)))) + } + + test("rename complex columns with invalid structure/duplicate names/Map type") { + sql("drop table if exists test_rename") + sql( + "CREATE TABLE test_rename (str struct<a:int,b:long>, str2 struct<a:int,b:long>, map1 " + + "map<string, string>, str3 struct<a:int, b:map<string, string>>) STORED AS carbondata") + + val ex1 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str str struct<a:array<int>,b:long>") + } + assert(ex1.getMessage + .contains( + "column rename operation failed: because datatypes of complex children cannot be altered")) + + val ex2 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str str struct<a:int,b:long,c:int>") + } + assert(ex2.getMessage + .contains( + "column rename operation failed: because number of children of old and new complex " + + "columns are not the same")) + + val ex3 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str str int") + } + assert(ex3.getMessage + .contains( + "column rename operation failed: because old and new complex columns are not compatible " + + "in structure")) + + val ex4 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str str struct<a:int,a:long>") + } + assert(ex4.getMessage + .contains( + "column rename operation failed: because duplicate columns are present")) + + val ex5 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str str2 struct<a:int,b:long>") + } + assert(ex5.getMessage + .contains( + "Column Rename Operation failed. New column name str2 already exists in table test_rename")) + + val ex6 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change map1 map2 map<string, struct<a:int>>") + } + assert(ex6.getMessage + .contains("rename operation failed: cannot alter map type column")) + + val ex7 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str3 str33 struct<a:int, bc:map<string, string>>") + } + assert(ex7.getMessage + .contains( + "rename operation failed: cannot alter complex structure that includes map type column")) + } + + def checkAnswerUtil1(df1: DataFrame, df2: DataFrame, df3: DataFrame) { + checkAnswer(df1, Seq(Row(Row(Row(2))))) + checkAnswer(df2, Seq(Row(Row(2)))) + checkAnswer(df3, Seq(Row(2))) + } + + def checkAnswerUtil2(df1: DataFrame, df2: DataFrame, df3: DataFrame) { + checkAnswer(df1, Seq(Row(Row(Row(2))), Row(Row(Row(3))))) + checkAnswer(df2, Seq(Row(Row(2)), Row(Row(3)))) + checkAnswer(df3, Seq(Row(2), Row(3))) + } + + test("test alter rename struct of (primitive/struct/array)") { + sql("drop table if exists test_rename") + sql("CREATE TABLE test_rename (str1 struct<a:int>, str2 struct<a:struct<b:int>>, str3 " + + "struct<a:struct<b:struct<c:int>>>, intfield int) STORED AS carbondata") + sql("insert into test_rename values(named_struct('a', 2), " + + "named_struct('a', named_struct('b', 2)), named_struct('a', named_struct('b', " + + "named_struct('c', 2))), 1)") + + // rename parent column from str2 to str22 and read old rows + sql("alter table test_rename change str2 str22 struct<a:struct<b:int>>") + var df1 = sql("select str22 from test_rename") + var df2 = sql("select str22.a from test_rename") + var df3 = sql("select str22.a.b from test_rename") + assert(df1.collect().size == 1 && df2.collect().size == 1 && df3.collect().size == 1) + checkAnswerUtil1(df1, df2, df3) + + // rename child column from a to a11 + sql("alter table test_rename change str22 str22 struct<a11:struct<b:int>>") + df1 = sql("select str22 from test_rename") + df2 = sql("select str22.a11 from test_rename") + df3 = sql("select str22.a11.b from test_rename") + assert(df1.collect().size == 1 && df2.collect().size == 1 && df3.collect().size == 1) + checkAnswerUtil1(df1, df2, df3) + + // rename parent column from str22 to str33 + sql("alter table test_rename change str22 str33 struct<a11:struct<b:int>>") + df1 = sql("select str33 from test_rename") + df2 = sql("select str33.a11 from test_rename") + df3 = sql("select str33.a11.b from test_rename") + assert(df1.collect().size == 1 && df2.collect().size == 1 && df3.collect().size == 1) + checkAnswerUtil1(df1, df2, df3) + + // insert new rows + sql("insert into test_rename values(named_struct('a', 3), " + + "named_struct('a', named_struct('b', 3)), named_struct('a', named_struct('b', " + + "named_struct('c', 3))), 2)") + df1 = sql("select str33 from test_rename") + df2 = sql("select str33.a11 from test_rename") + df3 = sql("select str33.a11.b from test_rename") + assert(df1.collect().size == 2 && df2.collect().size == 2 && df3.collect().size == 2) + checkAnswerUtil2(df1, df2, df3) + + sql("alter table test_rename change str33 str33 struct<a11:struct<b11:int>>") + sql("alter table test_rename change str33 str33 struct<a22:struct<b11:int>>") + df1 = sql("select str33 from test_rename") + df2 = sql("select str33.a22 from test_rename") + df3 = sql("select str33.a22.b11 from test_rename") + assert(df1.collect().size == 2 && df2.collect().size == 2 && df3.collect().size == 2) + checkAnswerUtil2(df1, df2, df3) + + val desc = sql("desc table test_rename").collect() + assert(desc(0)(0).equals("str1")) + assert(desc(1)(0).equals("str33")) + assert(desc(1)(1).equals("struct<a22:struct<b11:int>>")) + assert(desc(2)(0).equals("str3")) + } + + test("test alter rename array of (primitive/array/struct)") { + sql("drop table if exists test_rename") + sql( + "CREATE TABLE test_rename (arr1 array<int>, arr2 array<array<int>>, arr3 array<string>, " + + "arr4 array<struct<a:int>>) STORED AS carbondata") + sql( + "insert into test_rename values (array(1,2,3), array(array(1,2),array(3,4)), array('hello'," + + "'world'), array(named_struct('a',45)))") + + sql("alter table test_rename change arr1 arr11 array<int>") Review comment: when u just give sql("..."), it will return a dataframe, u are not taking return value, not giving any action on dataframe, so its of no use, u can remove -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
akkio-97 commented on a change in pull request #4129: URL: https://github.com/apache/carbondata/pull/4129#discussion_r644576572 ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/catalyst/CarbonParserUtil.scala ########## @@ -1132,10 +1135,60 @@ object CarbonParserUtil { } else if (scale < 0 || scale > 38) { throw new MalformedCarbonCommandException("Invalid value for scale") } - DataTypeInfo("decimal", precision, scale) + DataTypeInfo(columnName, "decimal", precision, scale) + case _ => + DataTypeInfo(columnName, + DataTypeConverterUtil.convertToCarbonType(dataType).getName.toLowerCase) + } + } + + /** + * This method will return the instantiated DataTypeInfo by parsing the column + */ + def parseColumn(columnName: String, dataType: DataType, + values: Option[List[(Int, Int)]]): DataTypeInfo = { + // creates parent dataTypeInfo first + val dataTypeInfo = CarbonParserUtil.parseDataType( Review comment: done ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/catalyst/CarbonParserUtil.scala ########## @@ -1132,10 +1135,60 @@ object CarbonParserUtil { } else if (scale < 0 || scale > 38) { throw new MalformedCarbonCommandException("Invalid value for scale") } - DataTypeInfo("decimal", precision, scale) + DataTypeInfo(columnName, "decimal", precision, scale) + case _ => + DataTypeInfo(columnName, + DataTypeConverterUtil.convertToCarbonType(dataType).getName.toLowerCase) + } + } + + /** + * This method will return the instantiated DataTypeInfo by parsing the column + */ + def parseColumn(columnName: String, dataType: DataType, + values: Option[List[(Int, Int)]]): DataTypeInfo = { + // creates parent dataTypeInfo first + val dataTypeInfo = CarbonParserUtil.parseDataType( + columnName, + DataTypeConverterUtil + .convertToCarbonType(dataType.typeName) + .getName + .toLowerCase, + values) + // check which child type is present and create children dataTypeInfo accordingly + dataType match { + case arrayType: ArrayType => + val childType: DataType = arrayType.elementType + val childName = columnName + ".val" + val childValues = childType match { + case d: DecimalType => Some(List((d.precision, d.scale))) + case _ => None + } + var childTypeInfoList: List[DataTypeInfo] = null + val childDatatypeInfo = parseColumn(childName, childType, childValues) + childTypeInfoList = List(childDatatypeInfo) Review comment: done -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
akkio-97 commented on a change in pull request #4129: URL: https://github.com/apache/carbondata/pull/4129#discussion_r638754972 ########## File path: integration/spark/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/AlterTableColumnRenameTestCase.scala ########## @@ -48,6 +52,262 @@ class AlterTableColumnRenameTestCase extends QueryTest with BeforeAndAfterAll { sql("drop table simple_table") } + test("Rename more than one column at a time in one operation") { + sql("drop table if exists test_rename") + sql("CREATE TABLE test_rename (str struct<a:struct<b:int, d:int>, c:int>) STORED AS carbondata") + sql("insert into test_rename values(named_struct('a11',named_struct('b2',12,'d',12), 'c', 12))") + sql("alter table test_rename change str str22 struct<a11:struct<b2:int, d:int>, c:int>") + sql("insert into test_rename values(named_struct('a11',named_struct('b2',24,'d',24), 'c', 24))") + + val rows = sql("select str22.a11.b2 from test_rename").collect() + assert(rows(0).equals(Row(12)) && rows(1).equals(Row(24))) + // check if old column names are still present + val ex1 = intercept[AnalysisException] { + sql("select str from test_rename").show(false) + } + assert(ex1.getMessage + .contains("cannot resolve '`str`' given input columns: [test_rename.str22]")) + + val ex2 = intercept[AnalysisException] { + sql("select str.a from test_rename").show(false) + } + assert(ex2.getMessage + .contains("cannot resolve '`str.a`' given input columns: [test_rename.str22]")) + + // check un-altered columns + val rows1 = sql("select str22.c from test_rename").collect() + val rows2 = sql("select str22.a11.d from test_rename").collect() + assert(rows1.sameElements(Array(Row(12), Row(24)))) + assert(rows2.sameElements(Array(Row(12), Row(24)))) + } + + test("rename complex columns with invalid structure/duplicate names/Map type") { + sql("drop table if exists test_rename") + sql( + "CREATE TABLE test_rename (str struct<a:int,b:long>, str2 struct<a:int,b:long>, map1 " + + "map<string, string>, str3 struct<a:int, b:map<string, string>>) STORED AS carbondata") + + val ex1 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str str struct<a:array<int>,b:long>") + } + assert(ex1.getMessage + .contains( + "column rename operation failed: because datatypes of complex children cannot be altered")) + + val ex2 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str str struct<a:int,b:long,c:int>") + } + assert(ex2.getMessage + .contains( + "column rename operation failed: because number of children of old and new complex " + + "columns are not the same")) + + val ex3 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str str int") + } + assert(ex3.getMessage + .contains( + "column rename operation failed: because old and new complex columns are not compatible " + + "in structure")) + + val ex4 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str str struct<a:int,a:long>") + } + assert(ex4.getMessage + .contains( + "column rename operation failed: because duplicate columns are present")) + + val ex5 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str str2 struct<a:int,b:long>") + } + assert(ex5.getMessage + .contains( + "Column Rename Operation failed. New column name str2 already exists in table test_rename")) + + val ex6 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change map1 map2 map<string, struct<a:int>>") + } + assert(ex6.getMessage + .contains("rename operation failed: cannot alter map type column")) + + val ex7 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str3 str33 struct<a:int, bc:map<string, string>>") + } + assert(ex7.getMessage + .contains( + "rename operation failed: cannot alter complex structure that includes map type column")) + } + + def checkAnswerUtil1(df1: DataFrame, df2: DataFrame, df3: DataFrame) { + checkAnswer(df1, Seq(Row(Row(Row(2))))) + checkAnswer(df2, Seq(Row(Row(2)))) + checkAnswer(df3, Seq(Row(2))) + } + + def checkAnswerUtil2(df1: DataFrame, df2: DataFrame, df3: DataFrame) { + checkAnswer(df1, Seq(Row(Row(Row(2))), Row(Row(Row(3))))) + checkAnswer(df2, Seq(Row(Row(2)), Row(Row(3)))) + checkAnswer(df3, Seq(Row(2), Row(3))) + } + + test("test alter rename struct of (primitive/struct/array)") { + sql("drop table if exists test_rename") + sql("CREATE TABLE test_rename (str1 struct<a:int>, str2 struct<a:struct<b:int>>, str3 " + + "struct<a:struct<b:struct<c:int>>>, intfield int) STORED AS carbondata") + sql("insert into test_rename values(named_struct('a', 2), " + + "named_struct('a', named_struct('b', 2)), named_struct('a', named_struct('b', " + + "named_struct('c', 2))), 1)") + + // rename parent column from str2 to str22 and read old rows + sql("alter table test_rename change str2 str22 struct<a:struct<b:int>>") + var df1 = sql("select str22 from test_rename") + var df2 = sql("select str22.a from test_rename") + var df3 = sql("select str22.a.b from test_rename") + assert(df1.collect().size == 1 && df2.collect().size == 1 && df3.collect().size == 1) + checkAnswerUtil1(df1, df2, df3) + + // rename child column from a to a11 + sql("alter table test_rename change str22 str22 struct<a11:struct<b:int>>") + df1 = sql("select str22 from test_rename") + df2 = sql("select str22.a11 from test_rename") + df3 = sql("select str22.a11.b from test_rename") + assert(df1.collect().size == 1 && df2.collect().size == 1 && df3.collect().size == 1) + checkAnswerUtil1(df1, df2, df3) + + // rename parent column from str22 to str33 + sql("alter table test_rename change str22 str33 struct<a11:struct<b:int>>") + df1 = sql("select str33 from test_rename") + df2 = sql("select str33.a11 from test_rename") + df3 = sql("select str33.a11.b from test_rename") + assert(df1.collect().size == 1 && df2.collect().size == 1 && df3.collect().size == 1) + checkAnswerUtil1(df1, df2, df3) + + // insert new rows + sql("insert into test_rename values(named_struct('a', 3), " + + "named_struct('a', named_struct('b', 3)), named_struct('a', named_struct('b', " + + "named_struct('c', 3))), 2)") + df1 = sql("select str33 from test_rename") + df2 = sql("select str33.a11 from test_rename") + df3 = sql("select str33.a11.b from test_rename") + assert(df1.collect().size == 2 && df2.collect().size == 2 && df3.collect().size == 2) + checkAnswerUtil2(df1, df2, df3) + + sql("alter table test_rename change str33 str33 struct<a11:struct<b11:int>>") + sql("alter table test_rename change str33 str33 struct<a22:struct<b11:int>>") + df1 = sql("select str33 from test_rename") + df2 = sql("select str33.a22 from test_rename") + df3 = sql("select str33.a22.b11 from test_rename") + assert(df1.collect().size == 2 && df2.collect().size == 2 && df3.collect().size == 2) + checkAnswerUtil2(df1, df2, df3) + + val desc = sql("desc table test_rename").collect() + assert(desc(0)(0).equals("str1")) + assert(desc(1)(0).equals("str33")) + assert(desc(1)(1).equals("struct<a22:struct<b11:int>>")) + assert(desc(2)(0).equals("str3")) + } + + test("test alter rename array of (primitive/array/struct)") { + sql("drop table if exists test_rename") + sql( + "CREATE TABLE test_rename (arr1 array<int>, arr2 array<array<int>>, arr3 array<string>, " + + "arr4 array<struct<a:int>>) STORED AS carbondata") + sql( + "insert into test_rename values (array(1,2,3), array(array(1,2),array(3,4)), array('hello'," + + "'world'), array(named_struct('a',45)))") + + sql("alter table test_rename change arr1 arr11 array<int>") Review comment: parent name is changed. We are using it to query right? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
akkio-97 commented on a change in pull request #4129: URL: https://github.com/apache/carbondata/pull/4129#discussion_r644591642 ########## File path: docs/ddl-of-carbondata.md ########## @@ -819,33 +819,47 @@ Users can specify which columns to include and exclude for local dictionary gene - Invalid scenarios * Change of decimal precision from (10,2) to (10,5) is invalid as in this case only scale is increased but total number of digits remains the same. * Change the comment of the partition column + * Rename operation fails if the structure of the complex column has been altered. Please ensure the old and new columns are compatible with Review comment: rename and change column name are both the same and they are being navigated to the same description -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
akkio-97 commented on a change in pull request #4129: URL: https://github.com/apache/carbondata/pull/4129#discussion_r644627016 ########## File path: docs/ddl-of-carbondata.md ########## @@ -819,33 +819,47 @@ Users can specify which columns to include and exclude for local dictionary gene - Invalid scenarios * Change of decimal precision from (10,2) to (10,5) is invalid as in this case only scale is increased but total number of digits remains the same. * Change the comment of the partition column + * Rename operation fails if the structure of the complex column has been altered. Please ensure the old and new columns are compatible with Review comment: done -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
CarbonDataQA2 commented on pull request #4129: URL: https://github.com/apache/carbondata/pull/4129#issuecomment-853741821 Build Failed with Spark 2.4.5, Please check CI http://121.244.95.60:12602/job/ApacheCarbon_PR_Builder_2.4.5/3741/ -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
akkio-97 commented on pull request #4129: URL: https://github.com/apache/carbondata/pull/4129#issuecomment-853770424 retest this please -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
akashrn5 commented on a change in pull request #4129: URL: https://github.com/apache/carbondata/pull/4129#discussion_r644710741 ########## File path: docs/ddl-of-carbondata.md ########## @@ -805,7 +805,7 @@ Users can specify which columns to include and exclude for local dictionary gene 2. If a column to be dropped has any Secondary index table created on them, drop column operation fails and the user will be asked to drop the corresponding SI table first before going for actual drop. - - #### CHANGE COLUMN NAME/TYPE/COMMENT + - #### change-column-nametype Review comment: this name doesn't look right, why changed? if changed also it should be name/type/comment right -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
akashrn5 commented on a change in pull request #4129: URL: https://github.com/apache/carbondata/pull/4129#discussion_r644711580 ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/catalyst/CarbonParserUtil.scala ########## @@ -1132,10 +1135,53 @@ object CarbonParserUtil { } else if (scale < 0 || scale > 38) { throw new MalformedCarbonCommandException("Invalid value for scale") } - DataTypeInfo("decimal", precision, scale) + DataTypeInfo(columnName, "decimal", precision, scale) + case _ => + DataTypeInfo(columnName, + DataTypeConverterUtil.convertToCarbonType(dataType).getName.toLowerCase) + } + } + + /** + * This method will return the instantiated DataTypeInfo by parsing the column + */ + def parseColumn(columnName: String, dataType: DataType, + values: Option[List[(Int, Int)]]): DataTypeInfo = { + // creates parent dataTypeInfo first + val dataTypeName = DataTypeConverterUtil.convertToCarbonType(dataType.typeName).getName + val dataTypeInfo = CarbonParserUtil.parseDataType(columnName, dataTypeName.toLowerCase, values) + // check which child type is present and create children dataTypeInfo accordingly + dataType match { + case arrayType: ArrayType => + val childType: DataType = arrayType.elementType + val childName = columnName + ".val" + val childValues = childType match { + case d: DecimalType => Some(List((d.precision, d.scale))) + case _ => None + } + val childDatatypeInfo = parseColumn(childName, childType, childValues) + dataTypeInfo.setChildren(List(childDatatypeInfo)) + case structType: StructType => + var childTypeInfoList: List[DataTypeInfo] = null + for (childField <- structType) { + val childType = childField.dataType + val childName = columnName + CarbonCommonConstants.POINT + childField.name + val childValues = childType match { + case d: DecimalType => Some(List((d.precision, d.scale))) + case _ => None + } + val childDatatypeInfo = CarbonParserUtil.parseColumn(childName, childType, childValues) + if (childTypeInfoList == null) { + childTypeInfoList = List(childDatatypeInfo) + } else { + childTypeInfoList = childTypeInfoList :+ childDatatypeInfo + } + } + dataTypeInfo.setChildren(childTypeInfoList) case _ => - DataTypeInfo(DataTypeConverterUtil.convertToCarbonType(dataType).getName.toLowerCase) } + // To-Do for map types Review comment: add jira ID here, todo should be together, else i don't think it identifies as TODO -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
akashrn5 commented on a change in pull request #4129: URL: https://github.com/apache/carbondata/pull/4129#discussion_r644723696 ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/schema/CarbonAlterTableColRenameDataTypeChangeCommand.scala ########## @@ -225,12 +264,46 @@ private[sql] case class CarbonAlterTableColRenameDataTypeChangeCommand( newColumnProperties.put(CarbonCommonConstants.COLUMN_COMMENT, newColumnComment.get) columnSchema.setColumnProperties(newColumnProperties) } - addColumnSchema = columnSchema + addedTableColumnSchema = columnSchema + } else if (isComplexChild(columnSchema)) { + if (alteredColumnNamesMap.contains(columnSchemaName)) { + // matches exactly + val newComplexChildName = alteredColumnNamesMap(columnSchemaName) + columnSchema.setColumn_name(newComplexChildName) + isSchemaEntryRequired = true + } else { + val alteredParent = checkIfParentIsAltered(columnSchemaName) + /* + * Lets say, if complex schema is: str struct<a: int> + * and if parent column is changed from str -> str2 + * then its child name should also be changed from str.a -> str2.a + */ + if (alteredParent != null) { + val newParent = alteredColumnNamesMap(alteredParent) + val newComplexChildName = newParent + columnSchemaName + .split(alteredParent)(1) + columnSchema.setColumn_name(newComplexChildName) + isSchemaEntryRequired = true + } + } + } + // validate duplicate child columns + if (uniqueColumnSet.contains(columnSchema.getColumn_name)) { + throw new UnsupportedOperationException("Duplicate columns are present") + } + + // make a new schema evolution entry after column rename or datatype change + if (isSchemaEntryRequired) { + addedColumnsList ++= List(columnSchema) + deletedColumnsList ++= List(deletedColumnSchema) timeStamp = System.currentTimeMillis() - // make a new schema evolution entry after column rename or datatype change - schemaEvolutionEntry = AlterTableUtil - .addNewSchemaEvolutionEntry(timeStamp, addColumnSchema, deletedColumnSchema) + schemaEvolutionEntry = AlterTableUtil.addNewSchemaEvolutionEntry(schemaEvolutionEntry, Review comment: i don't see any reason to change this API, i think you have changed API and added null checks in `addNewSchemaEvolutionEntry` since in case of complex there can be multiple columns changed like child and parent. But just sending single column in List and adding null check is not good, here itself make the deleted and added column as List, incase of primitive it will be with single column, in case of complex when both parent and child changes, it will have multiple values. Please do the necessary changes -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
akashrn5 commented on a change in pull request #4129: URL: https://github.com/apache/carbondata/pull/4129#discussion_r644724511 ########## File path: integration/spark/src/main/scala/org/apache/spark/util/AlterTableUtil.scala ########## @@ -357,18 +358,30 @@ object AlterTableUtil { /** * This method create a new SchemaEvolutionEntry and adds to SchemaEvolutionEntry List * - * @param addColumnSchema added new column schema - * @param deletedColumnSchema old column schema which is deleted + * @param addColumnSchema added new column schema + * @param deletedColumnSchema old column schema which is deleted + * @param addedColumnsList list of added column schemas + * @param deletedColumnsList list of deleted column schemas * @return */ def addNewSchemaEvolutionEntry( - timeStamp: Long, + schemaEvolutionEntry: SchemaEvolutionEntry, addColumnSchema: org.apache.carbondata.format.ColumnSchema, - deletedColumnSchema: org.apache.carbondata.format.ColumnSchema): SchemaEvolutionEntry = { - val schemaEvolutionEntry = new SchemaEvolutionEntry(timeStamp) - schemaEvolutionEntry.setAdded(List(addColumnSchema).asJava) - schemaEvolutionEntry.setRemoved(List(deletedColumnSchema).asJava) - schemaEvolutionEntry + deletedColumnSchema: org.apache.carbondata.format.ColumnSchema, Review comment: please handle this according to comment in command class -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
akashrn5 commented on a change in pull request #4129: URL: https://github.com/apache/carbondata/pull/4129#discussion_r644724857 ########## File path: integration/spark/src/main/scala/org/apache/spark/util/AlterTableUtil.scala ########## @@ -1063,4 +1076,106 @@ object AlterTableUtil { } } } + + /** + * This method checks the structure of the old and new complex columns, and- + * 1. throws exception if the number of complex-levels in both columns does not match + * 2. throws exception if the number of children of both columns does not match + * 3. creates alteredColumnNamesMap: new_column_name -> datatype. Here new_column_name are those + * names of the columns that are altered. + * These maps will later be used while altering the table schema + */ + def validateComplexStructure(oldDimensionList: List[CarbonDimension], + newDimensionList: List[DataTypeInfo], + alteredColumnNamesMap: mutable.LinkedHashMap[String, String]): Unit = { + if (oldDimensionList == null && newDimensionList == null) { + throw new UnsupportedOperationException("Both old and new dimensions are null") + } else if (oldDimensionList == null || newDimensionList == null) { + throw new UnsupportedOperationException("Either the old or the new dimension is null") + } else if (oldDimensionList.size != newDimensionList.size) { + throw new UnsupportedOperationException( + "Number of children of old and new complex columns are not the same") + } else { + for ((newDimensionInfo, i) <- newDimensionList.zipWithIndex) { + val oldDimensionInfo = oldDimensionList(i) + val old_column_name = oldDimensionInfo.getColName.split('.').last Review comment: use constants for point at all places -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
akashrn5 commented on a change in pull request #4129: URL: https://github.com/apache/carbondata/pull/4129#discussion_r644725868 ########## File path: integration/spark/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/AlterTableColumnRenameTestCase.scala ########## @@ -48,6 +52,298 @@ class AlterTableColumnRenameTestCase extends QueryTest with BeforeAndAfterAll { sql("drop table simple_table") } + test("Rename more than one column at a time in one operation") { + sql("drop table if exists test_rename") + sql("CREATE TABLE test_rename (str struct<a:struct<b:int, d:int>, c:int>) STORED AS carbondata") + sql("insert into test_rename values(named_struct('a',named_struct('b',12,'d',12), 'c', 12))") + sql("alter table test_rename change str str22 struct<a11:struct<b2:int, d:int>, c:int>") + sql("insert into test_rename values(named_struct('a11',named_struct('b2',24,'d',24), 'c', 24))") + + val rows = sql("select str22.a11.b2 from test_rename").collect() + assert(rows(0).equals(Row(12)) && rows(1).equals(Row(24))) + // check if old column names are still present + val ex1 = intercept[AnalysisException] { + sql("select str from test_rename").show(false) + } + assert(ex1.getMessage.contains("cannot resolve '`str`'")) + + val ex2 = intercept[AnalysisException] { + sql("select str.a from test_rename").show(false) + } + assert(ex2.getMessage.contains("cannot resolve '`str.a`'")) + + // check un-altered columns + val rows1 = sql("select str22.c from test_rename").collect() + val rows2 = sql("select str22.a11.d from test_rename").collect() + assert(rows1.sameElements(Array(Row(12), Row(24)))) + assert(rows2.sameElements(Array(Row(12), Row(24)))) + } + + test("rename complex columns with invalid structure/duplicate-names/Map-type") { + sql("drop table if exists test_rename") + sql( + "CREATE TABLE test_rename (str struct<a:int,b:long>, str2 struct<a:int,b:long>, map1 " + + "map<string, string>, str3 struct<a:int, b:map<string, string>>) STORED AS carbondata") + + val ex1 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str str struct<a:array<int>,b:long>") + } + assert(ex1.getMessage + .contains( + "column rename operation failed: Altering datatypes of any child column is not supported")) + + val ex2 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str str struct<a:int,b:long,c:int>") + } + assert(ex2.getMessage + .contains( + "column rename operation failed: Number of children of old and new complex columns are " + + "not the same")) + + val ex3 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str str int") + } + assert(ex3.getMessage + .contains( + "column rename operation failed: Old and new complex columns are not compatible " + + "in structure")) + + val ex4 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str str struct<a:int,a:long>") + } + assert(ex4.getMessage + .contains( + "column rename operation failed: Duplicate columns are present")) + + val ex5 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str str2 struct<a:int,b:long>") + } + assert(ex5.getMessage + .contains( + "Column Rename Operation failed. New column name str2 already exists in table test_rename")) + + val ex6 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change map1 map2 map<string, struct<a:int>>") + } + assert(ex6.getMessage + .contains("rename operation failed: Alter rename is unsupported for Map datatype column")) + + val ex7 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str3 str33 struct<a:int, bc:map<string, string>>") + } + assert(ex7.getMessage + .contains( + "rename operation failed: Cannot alter complex structure that includes map type column")) + + val ex8 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str2 str22 struct<>") + } + assert(ex8.getMessage + .contains( + "rename operation failed: Either the old or the new dimension is null")) + + // ensure all failed rename operations have been reverted to original state + val describe = sql("desc table test_rename") + assert(describe.collect().size == 4) + assertResult(1)(describe.filter( + "col_name='str' and data_type = 'struct<a:int,b:bigint>'").count()) + assertResult(1)(describe.filter( + "col_name='str2' and data_type = 'struct<a:int,b:bigint>'").count()) + assertResult(1)(describe.filter( + "col_name='map1' and data_type = 'map<string,string>'").count()) + assertResult(1)(describe.filter( + "col_name='str3' and data_type = 'struct<a:int,b:map<string,string>>'").count()) + } + + def checkAnswerUtil1(df1: DataFrame, df2: DataFrame, df3: DataFrame) { + checkAnswer(df1, Seq(Row(Row(Row(2))))) + checkAnswer(df2, Seq(Row(Row(2)))) + checkAnswer(df3, Seq(Row(2))) + } + + def checkAnswerUtil2(df1: DataFrame, df2: DataFrame, df3: DataFrame) { + checkAnswer(df1, Seq(Row(Row(Row(2))), Row(Row(Row(3))))) + checkAnswer(df2, Seq(Row(Row(2)), Row(Row(3)))) + checkAnswer(df3, Seq(Row(2), Row(3))) + } + + test("test alter rename struct of (primitive/struct/array)") { + sql("drop table if exists test_rename") + sql("CREATE TABLE test_rename (str1 struct<a:int>, str2 struct<a:struct<b:int>>, str3 " + + "struct<a:struct<b:struct<c:int>>>, intfield int) STORED AS carbondata") + sql("insert into test_rename values(named_struct('a', 2), " + + "named_struct('a', named_struct('b', 2)), named_struct('a', named_struct('b', " + + "named_struct('c', 2))), 1)") + + // Operation 1: rename parent column from str2 to str22 and read old rows + sql("alter table test_rename change str2 str22 struct<a:struct<b:int>>") + var df1 = sql("select str22 from test_rename") + var df2 = sql("select str22.a from test_rename") + var df3 = sql("select str22.a.b from test_rename") + assert(df1.collect().size == 1 && df2.collect().size == 1 && df3.collect().size == 1) + checkAnswerUtil1(df1, df2, df3) + + // Operation 2: rename child column from a to a11 + sql("alter table test_rename change str22 str22 struct<a11:struct<b:int>>") + df1 = sql("select str22 from test_rename") + df2 = sql("select str22.a11 from test_rename") + df3 = sql("select str22.a11.b from test_rename") + assert(df1.collect().size == 1 && df2.collect().size == 1 && df3.collect().size == 1) + checkAnswerUtil1(df1, df2, df3) + + // Operation 3: rename parent column from str22 to str33 + sql("alter table test_rename change str22 str33 struct<a11:struct<b:int>>") + df1 = sql("select str33 from test_rename") + df2 = sql("select str33.a11 from test_rename") + df3 = sql("select str33.a11.b from test_rename") + assert(df1.collect().size == 1 && df2.collect().size == 1 && df3.collect().size == 1) + checkAnswerUtil1(df1, df2, df3) + + // insert new rows + sql("insert into test_rename values(named_struct('a', 3), " + + "named_struct('a', named_struct('b', 3)), named_struct('a', named_struct('b', " + + "named_struct('c', 3))), 2)") + df1 = sql("select str33 from test_rename") + df2 = sql("select str33.a11 from test_rename") + df3 = sql("select str33.a11.b from test_rename") + assert(df1.collect().size == 2 && df2.collect().size == 2 && df3.collect().size == 2) + checkAnswerUtil2(df1, df2, df3) + + // Operation 4: rename child column from a11 to a22 & b to b11 + sql("alter table test_rename change str33 str33 struct<a22:struct<b11:int>>") + df1 = sql("select str33 from test_rename") + df2 = sql("select str33.a22 from test_rename") + df3 = sql("select str33.a22.b11 from test_rename") + assert(df1.collect().size == 2 && df2.collect().size == 2 && df3.collect().size == 2) + checkAnswerUtil2(df1, df2, df3) + + // Operation 5: rename primitive column from intField to intField2 + sql("alter table test_rename change intField intField2 int") + + val describe = sql("desc table test_rename") + assert(describe.collect().size == 4) + assertResult(1)(describe.filter( + "col_name='str1' and data_type = 'struct<a:int>'").count()) + assertResult(1)(describe.filter( + "col_name='str33' and data_type = 'struct<a22:struct<b11:int>>'").count()) + assertResult(1)(describe.filter( + "col_name='str3' and data_type = 'struct<a:struct<b:struct<c:int>>>'").count()) + + // validate schema evolution entries for 4 above alter operations + val (addedColumns, removedColumns, noOfEvolutions) = returnValuesAfterSchemaEvolution( + "test_rename") + validateSchemaEvolution(addedColumns, removedColumns, noOfEvolutions) + } + + def returnValuesAfterSchemaEvolution(tableName: String): (Seq[ColumnSchema], Seq[ColumnSchema], + Int) = { + val carbonTable = CarbonEnv.getCarbonTable(None, tableName)(sqlContext.sparkSession) + val schemaEvolutionList = carbonTable.getTableInfo + .getFactTable + .getSchemaEvolution() + .getSchemaEvolutionEntryList() + var addedColumns = Seq[ColumnSchema]() + var removedColumns = Seq[ColumnSchema]() + for (i <- 0 until schemaEvolutionList.size()) { + addedColumns ++= + JavaConverters + .asScalaIteratorConverter(schemaEvolutionList.get(i).getAdded.iterator()) + .asScala + .toSeq + + removedColumns ++= + JavaConverters + .asScalaIteratorConverter(schemaEvolutionList.get(i).getRemoved.iterator()) + .asScala + .toSeq + } + (addedColumns, removedColumns, schemaEvolutionList.size() - 1) + } + + def validateSchemaEvolution(added: Seq[ColumnSchema], removed: Seq[ColumnSchema], + noOfEvolutions: Int): Unit = { + assert(noOfEvolutions == 5 && added.size == 11 && removed.size == 11) + // asserting only first 6 entries of added and removed columns + assert( + added(0).getColumnName.equals("str22") && removed(0).getColumnName.equals("str2") && + added(1).getColumnName.equals("str22.a") && removed(1).getColumnName.equals("str2.a") && + added(2).getColumnName.equals("str22.a.b") && removed(2).getColumnName.equals("str2.a.b") && + added(3).getColumnName.equals("str22.a11") && removed(3).getColumnName.equals("str22.a") && + added(4).getColumnName.equals("str22.a11.b") && removed(4).getColumnName.equals("str22.a.b")&& + added(5).getColumnName.equals("str33") && removed(5).getColumnName.equals("str22")) + } + + test("test alter rename array of (primitive/array/struct)") { + sql("drop table if exists test_rename") + sql( + "CREATE TABLE test_rename (arr1 array<int>, arr2 array<array<int>>, arr3 array<string>, " + + "arr4 array<struct<a:int>>) STORED AS carbondata") + sql( + "insert into test_rename values (array(1,2,3), array(array(1,2),array(3,4)), array('hello'," + + "'world'), array(named_struct('a',45)))") + + sql("alter table test_rename change arr1 arr11 array<int>") + val df1 = sql("select arr11 from test_rename") + assert(df1.collect.size == 1) + checkAnswer(df1, Seq(Row(make(Array(1, 2, 3))))) + + sql("alter table test_rename change arr2 arr22 array<array<int>>") + val df2 = sql("select arr22 from test_rename") + assert(df2.collect.size == 1) + checkAnswer(df2, Seq(Row(make(Array(make(Array(1, 2)), make(Array(3, 4))))))) + + sql("alter table test_rename change arr3 arr33 array<string>") + val df3 = sql("select arr33 from test_rename") + assert(df3.collect.size == 1) + checkAnswer(sql("select arr33 from test_rename"), Seq(Row(make(Array("hello", "world"))))) + + sql("alter table test_rename change arr4 arr44 array<struct<a:int>>") + sql("alter table test_rename change arr44 arr44 array<struct<a11:int>>") + + val df4 = sql("select arr44.a11 from test_rename") + assert(df4.collect.size == 1) + checkAnswer(df4, Seq(Row(make(Array(45))))) + + // test for new inserted row + sql( + "insert into test_rename values (array(11,22,33), array(array(11,22),array(33,44)), array" + + "('hello11', 'world11'), array(named_struct('a',4555)))") + val rows = sql("select arr11, arr22, arr33, arr44.a11 from test_rename").collect + assert(rows.size == 2) + val secondRow = rows(1) + assert(secondRow(0).equals(make(Array(11, 22, 33))) && + secondRow(1).equals(make(Array(make(Array(11, 22)), make(Array(33, 44))))) && + secondRow(2).equals(make(Array("hello11", "world11")))) + } + + test("validate alter change datatype for complex children columns") { + sql("drop table if exists test_rename") + sql( + "CREATE TABLE test_rename (str struct<a:int,b:long>) STORED AS carbondata") + + val ex1 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str str struct<a:long,b:long>") + } + assert(ex1.getMessage + .contains( + "column rename operation failed: Altering datatypes of any child column is not supported")) + } + + test("test change comment in case of complex types") { + sql("drop table if exists test_rename") + sql( + "CREATE TABLE test_rename (str struct<a:int> comment 'comment') STORED AS carbondata") + sql("alter table test_rename change str str struct<a:int> comment 'new comment'") + var describe = sql("desc table test_rename") + var count = describe.filter("col_name='str' and comment = 'new comment'").count() + assertResult(1)(count) + + sql("alter table test_rename change str str struct<a1:int> comment 'new comment 2'") + describe = sql("desc table test_rename") + count = describe.filter("col_name='str' and comment = 'new comment 2'").count() + assertResult(1)(count) + } Review comment: please add a test case with compaction and SI -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
akashrn5 commented on a change in pull request #4129: URL: https://github.com/apache/carbondata/pull/4129#discussion_r644728532 ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/schema/CarbonAlterTableColRenameDataTypeChangeCommand.scala ########## @@ -180,33 +200,50 @@ private[sql] case class CarbonAlterTableColRenameDataTypeChangeCommand( } if (alterTableColRenameAndDataTypeChangeModel.isColumnRename) { // validate the columns to be renamed - validColumnsForRenaming(carbonColumns, oldCarbonColumn.head, carbonTable) + validColumnsForRenaming(carbonColumns, carbonTable) } if (isDataTypeChange) { // validate the columns to change datatype - validateColumnDataType(alterTableColRenameAndDataTypeChangeModel.dataTypeInfo, + AlterTableUtil.validateColumnDataType(alterTableColRenameAndDataTypeChangeModel + .dataTypeInfo, oldCarbonColumn.head) } // read the latest schema file val tableInfo: TableInfo = metaStore.getThriftTableInfo(carbonTable) // maintain the added column for schema evolution history - var addColumnSchema: ColumnSchema = null + var addedTableColumnSchema: ColumnSchema = null var deletedColumnSchema: ColumnSchema = null var schemaEvolutionEntry: SchemaEvolutionEntry = null + var addedColumnsList: List[ColumnSchema] = List.empty[ColumnSchema] + var deletedColumnsList: List[ColumnSchema] = List.empty[ColumnSchema] val columnSchemaList = tableInfo.fact_table.table_columns.asScala.filter(!_.isInvisible) + // to validate duplicate children columns + var uniqueColumnSet: mutable.Set[String] = mutable.Set.empty + /* + * columnSchemaList is a flat structure containing all column schemas including both parent + * and child. + * It is iterated and rename/change-datatype update are made in this list itself. + * Entry is made to the schemaEvolutionEntry for each of the update. + */ columnSchemaList.foreach { columnSchema => - if (columnSchema.column_name.equalsIgnoreCase(oldColumnName)) { - deletedColumnSchema = columnSchema.deepCopy() - if (alterTableColRenameAndDataTypeChangeModel.isColumnRename) { - // if only column rename, just get the column schema and rename, make a - // schemaEvolutionEntry + val columnSchemaName = columnSchema.column_name Review comment: ```suggestion val columnName = columnSchema.column_name ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
akashrn5 commented on a change in pull request #4129: URL: https://github.com/apache/carbondata/pull/4129#discussion_r644729305 ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/schema/CarbonAlterTableColRenameDataTypeChangeCommand.scala ########## @@ -180,33 +200,50 @@ private[sql] case class CarbonAlterTableColRenameDataTypeChangeCommand( } if (alterTableColRenameAndDataTypeChangeModel.isColumnRename) { // validate the columns to be renamed - validColumnsForRenaming(carbonColumns, oldCarbonColumn.head, carbonTable) + validColumnsForRenaming(carbonColumns, carbonTable) } if (isDataTypeChange) { // validate the columns to change datatype - validateColumnDataType(alterTableColRenameAndDataTypeChangeModel.dataTypeInfo, + AlterTableUtil.validateColumnDataType(alterTableColRenameAndDataTypeChangeModel + .dataTypeInfo, oldCarbonColumn.head) } // read the latest schema file val tableInfo: TableInfo = metaStore.getThriftTableInfo(carbonTable) // maintain the added column for schema evolution history - var addColumnSchema: ColumnSchema = null + var addedTableColumnSchema: ColumnSchema = null var deletedColumnSchema: ColumnSchema = null var schemaEvolutionEntry: SchemaEvolutionEntry = null + var addedColumnsList: List[ColumnSchema] = List.empty[ColumnSchema] + var deletedColumnsList: List[ColumnSchema] = List.empty[ColumnSchema] val columnSchemaList = tableInfo.fact_table.table_columns.asScala.filter(!_.isInvisible) + // to validate duplicate children columns + var uniqueColumnSet: mutable.Set[String] = mutable.Set.empty + /* + * columnSchemaList is a flat structure containing all column schemas including both parent + * and child. + * It is iterated and rename/change-datatype update are made in this list itself. + * Entry is made to the schemaEvolutionEntry for each of the update. + */ columnSchemaList.foreach { columnSchema => - if (columnSchema.column_name.equalsIgnoreCase(oldColumnName)) { - deletedColumnSchema = columnSchema.deepCopy() - if (alterTableColRenameAndDataTypeChangeModel.isColumnRename) { - // if only column rename, just get the column schema and rename, make a - // schemaEvolutionEntry + val columnSchemaName = columnSchema.column_name + val isTableColumn = columnSchemaName.equalsIgnoreCase(oldColumnName) Review comment: ```suggestion val columnToAlter = columnSchemaName.equalsIgnoreCase(oldColumnName) ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
akashrn5 commented on a change in pull request #4129: URL: https://github.com/apache/carbondata/pull/4129#discussion_r644729305 ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/schema/CarbonAlterTableColRenameDataTypeChangeCommand.scala ########## @@ -180,33 +200,50 @@ private[sql] case class CarbonAlterTableColRenameDataTypeChangeCommand( } if (alterTableColRenameAndDataTypeChangeModel.isColumnRename) { // validate the columns to be renamed - validColumnsForRenaming(carbonColumns, oldCarbonColumn.head, carbonTable) + validColumnsForRenaming(carbonColumns, carbonTable) } if (isDataTypeChange) { // validate the columns to change datatype - validateColumnDataType(alterTableColRenameAndDataTypeChangeModel.dataTypeInfo, + AlterTableUtil.validateColumnDataType(alterTableColRenameAndDataTypeChangeModel + .dataTypeInfo, oldCarbonColumn.head) } // read the latest schema file val tableInfo: TableInfo = metaStore.getThriftTableInfo(carbonTable) // maintain the added column for schema evolution history - var addColumnSchema: ColumnSchema = null + var addedTableColumnSchema: ColumnSchema = null var deletedColumnSchema: ColumnSchema = null var schemaEvolutionEntry: SchemaEvolutionEntry = null + var addedColumnsList: List[ColumnSchema] = List.empty[ColumnSchema] + var deletedColumnsList: List[ColumnSchema] = List.empty[ColumnSchema] val columnSchemaList = tableInfo.fact_table.table_columns.asScala.filter(!_.isInvisible) + // to validate duplicate children columns + var uniqueColumnSet: mutable.Set[String] = mutable.Set.empty + /* + * columnSchemaList is a flat structure containing all column schemas including both parent + * and child. + * It is iterated and rename/change-datatype update are made in this list itself. + * Entry is made to the schemaEvolutionEntry for each of the update. + */ columnSchemaList.foreach { columnSchema => - if (columnSchema.column_name.equalsIgnoreCase(oldColumnName)) { - deletedColumnSchema = columnSchema.deepCopy() - if (alterTableColRenameAndDataTypeChangeModel.isColumnRename) { - // if only column rename, just get the column schema and rename, make a - // schemaEvolutionEntry + val columnSchemaName = columnSchema.column_name + val isTableColumn = columnSchemaName.equalsIgnoreCase(oldColumnName) Review comment: ```suggestion val isTheColumnToAlter = columnSchemaName.equalsIgnoreCase(oldColumnName) ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
Free forum by Nabble | Edit this page |