akashrn5 commented on a change in pull request #4129: URL: https://github.com/apache/carbondata/pull/4129#discussion_r644732755 ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/schema/CarbonAlterTableColRenameDataTypeChangeCommand.scala ########## @@ -180,33 +200,50 @@ private[sql] case class CarbonAlterTableColRenameDataTypeChangeCommand( } if (alterTableColRenameAndDataTypeChangeModel.isColumnRename) { // validate the columns to be renamed - validColumnsForRenaming(carbonColumns, oldCarbonColumn.head, carbonTable) + validColumnsForRenaming(carbonColumns, carbonTable) } if (isDataTypeChange) { // validate the columns to change datatype - validateColumnDataType(alterTableColRenameAndDataTypeChangeModel.dataTypeInfo, + AlterTableUtil.validateColumnDataType(alterTableColRenameAndDataTypeChangeModel + .dataTypeInfo, oldCarbonColumn.head) } // read the latest schema file val tableInfo: TableInfo = metaStore.getThriftTableInfo(carbonTable) // maintain the added column for schema evolution history - var addColumnSchema: ColumnSchema = null + var addedTableColumnSchema: ColumnSchema = null var deletedColumnSchema: ColumnSchema = null var schemaEvolutionEntry: SchemaEvolutionEntry = null + var addedColumnsList: List[ColumnSchema] = List.empty[ColumnSchema] + var deletedColumnsList: List[ColumnSchema] = List.empty[ColumnSchema] val columnSchemaList = tableInfo.fact_table.table_columns.asScala.filter(!_.isInvisible) + // to validate duplicate children columns + var uniqueColumnSet: mutable.Set[String] = mutable.Set.empty + /* + * columnSchemaList is a flat structure containing all column schemas including both parent + * and child. + * It is iterated and rename/change-datatype update are made in this list itself. + * Entry is made to the schemaEvolutionEntry for each of the update. + */ columnSchemaList.foreach { columnSchema => - if (columnSchema.column_name.equalsIgnoreCase(oldColumnName)) { - deletedColumnSchema = columnSchema.deepCopy() - if (alterTableColRenameAndDataTypeChangeModel.isColumnRename) { - // if only column rename, just get the column schema and rename, make a - // schemaEvolutionEntry + val columnSchemaName = columnSchema.column_name + val isTableColumn = columnSchemaName.equalsIgnoreCase(oldColumnName) + var isSchemaEntryRequired = false + deletedColumnSchema = columnSchema.deepCopy() + + if (isTableColumn) { + // isColumnRename will be true if the table-column/parent-column name has been altered, + // just get the columnSchema and rename, and make a schemaEvolutionEntry + if (isColumnRename) { columnSchema.setColumn_name(newColumnName) + isSchemaEntryRequired = true Review comment: you can remove this variable, directly call the `AltertableUtil ` API, one call will be with primitive case or parent column case, one more will be child class, it will be clean instead of playing with boolean variables. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
akashrn5 commented on a change in pull request #4129: URL: https://github.com/apache/carbondata/pull/4129#discussion_r644733607 ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/schema/CarbonAlterTableColRenameDataTypeChangeCommand.scala ########## @@ -225,12 +264,46 @@ private[sql] case class CarbonAlterTableColRenameDataTypeChangeCommand( newColumnProperties.put(CarbonCommonConstants.COLUMN_COMMENT, newColumnComment.get) columnSchema.setColumnProperties(newColumnProperties) } - addColumnSchema = columnSchema + addedTableColumnSchema = columnSchema + } else if (isComplexChild(columnSchema)) { + if (alteredColumnNamesMap.contains(columnSchemaName)) { + // matches exactly + val newComplexChildName = alteredColumnNamesMap(columnSchemaName) + columnSchema.setColumn_name(newComplexChildName) + isSchemaEntryRequired = true + } else { + val alteredParent = checkIfParentIsAltered(columnSchemaName) + /* + * Lets say, if complex schema is: str struct<a: int> + * and if parent column is changed from str -> str2 + * then its child name should also be changed from str.a -> str2.a + */ + if (alteredParent != null) { + val newParent = alteredColumnNamesMap(alteredParent) + val newComplexChildName = newParent + columnSchemaName + .split(alteredParent)(1) + columnSchema.setColumn_name(newComplexChildName) + isSchemaEntryRequired = true + } + } + } + // validate duplicate child columns + if (uniqueColumnSet.contains(columnSchema.getColumn_name)) { + throw new UnsupportedOperationException("Duplicate columns are present") + } + + // make a new schema evolution entry after column rename or datatype change + if (isSchemaEntryRequired) { + addedColumnsList ++= List(columnSchema) + deletedColumnsList ++= List(deletedColumnSchema) timeStamp = System.currentTimeMillis() - // make a new schema evolution entry after column rename or datatype change - schemaEvolutionEntry = AlterTableUtil - .addNewSchemaEvolutionEntry(timeStamp, addColumnSchema, deletedColumnSchema) + schemaEvolutionEntry = AlterTableUtil.addNewSchemaEvolutionEntry(schemaEvolutionEntry, + columnSchema, Review comment: ```suggestion addedTableColumnSchema, ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
akashrn5 commented on a change in pull request #4129: URL: https://github.com/apache/carbondata/pull/4129#discussion_r644734046 ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/schema/CarbonAlterTableColRenameDataTypeChangeCommand.scala ########## @@ -279,6 +352,24 @@ private[sql] case class CarbonAlterTableColRenameDataTypeChangeCommand( Seq.empty } + private def isComplexChild(columnSchema: ColumnSchema): Boolean = { + columnSchema.column_name.contains(CarbonCommonConstants.POINT) + } + + private def isChildOfColumn(columnSchemaName: String, oldColumnName: String): Boolean = { Review comment: ```suggestion private def isChildOfTheGivenColumn(columnSchemaName: String, oldColumnName: String): Boolean = { ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
CarbonDataQA2 commented on pull request #4129: URL: https://github.com/apache/carbondata/pull/4129#issuecomment-853822092 Build Success with Spark 2.4.5, Please check CI http://121.244.95.60:12602/job/ApacheCarbon_PR_Builder_2.4.5/3743/ -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
CarbonDataQA2 commented on pull request #4129: URL: https://github.com/apache/carbondata/pull/4129#issuecomment-853823337 Build Success with Spark 2.3.4, Please check CI http://121.244.95.60:12602/job/ApacheCarbonPRBuilder2.3/5487/ -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
akashrn5 commented on a change in pull request #4129: URL: https://github.com/apache/carbondata/pull/4129#discussion_r637747237 ########## File path: integration/spark/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/AlterTableColumnRenameTestCase.scala ########## @@ -48,6 +52,262 @@ class AlterTableColumnRenameTestCase extends QueryTest with BeforeAndAfterAll { sql("drop table simple_table") } + test("Rename more than one column at a time in one operation") { + sql("drop table if exists test_rename") + sql("CREATE TABLE test_rename (str struct<a:struct<b:int, d:int>, c:int>) STORED AS carbondata") + sql("insert into test_rename values(named_struct('a11',named_struct('b2',12,'d',12), 'c', 12))") + sql("alter table test_rename change str str22 struct<a11:struct<b2:int, d:int>, c:int>") + sql("insert into test_rename values(named_struct('a11',named_struct('b2',24,'d',24), 'c', 24))") + + val rows = sql("select str22.a11.b2 from test_rename").collect() + assert(rows(0).equals(Row(12)) && rows(1).equals(Row(24))) + // check if old column names are still present + val ex1 = intercept[AnalysisException] { + sql("select str from test_rename").show(false) + } + assert(ex1.getMessage + .contains("cannot resolve '`str`' given input columns: [test_rename.str22]")) + + val ex2 = intercept[AnalysisException] { + sql("select str.a from test_rename").show(false) + } + assert(ex2.getMessage + .contains("cannot resolve '`str.a`' given input columns: [test_rename.str22]")) + + // check un-altered columns + val rows1 = sql("select str22.c from test_rename").collect() + val rows2 = sql("select str22.a11.d from test_rename").collect() + assert(rows1.sameElements(Array(Row(12), Row(24)))) + assert(rows2.sameElements(Array(Row(12), Row(24)))) + } + + test("rename complex columns with invalid structure/duplicate names/Map type") { + sql("drop table if exists test_rename") + sql( + "CREATE TABLE test_rename (str struct<a:int,b:long>, str2 struct<a:int,b:long>, map1 " + + "map<string, string>, str3 struct<a:int, b:map<string, string>>) STORED AS carbondata") + + val ex1 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str str struct<a:array<int>,b:long>") + } + assert(ex1.getMessage + .contains( + "column rename operation failed: because datatypes of complex children cannot be altered")) + + val ex2 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str str struct<a:int,b:long,c:int>") + } + assert(ex2.getMessage + .contains( + "column rename operation failed: because number of children of old and new complex " + + "columns are not the same")) + + val ex3 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str str int") + } + assert(ex3.getMessage + .contains( + "column rename operation failed: because old and new complex columns are not compatible " + + "in structure")) + + val ex4 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str str struct<a:int,a:long>") + } + assert(ex4.getMessage + .contains( + "column rename operation failed: because duplicate columns are present")) + + val ex5 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str str2 struct<a:int,b:long>") + } + assert(ex5.getMessage + .contains( + "Column Rename Operation failed. New column name str2 already exists in table test_rename")) + + val ex6 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change map1 map2 map<string, struct<a:int>>") + } + assert(ex6.getMessage + .contains("rename operation failed: cannot alter map type column")) + + val ex7 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str3 str33 struct<a:int, bc:map<string, string>>") + } + assert(ex7.getMessage + .contains( + "rename operation failed: cannot alter complex structure that includes map type column")) + } + + def checkAnswerUtil1(df1: DataFrame, df2: DataFrame, df3: DataFrame) { + checkAnswer(df1, Seq(Row(Row(Row(2))))) + checkAnswer(df2, Seq(Row(Row(2)))) + checkAnswer(df3, Seq(Row(2))) + } + + def checkAnswerUtil2(df1: DataFrame, df2: DataFrame, df3: DataFrame) { + checkAnswer(df1, Seq(Row(Row(Row(2))), Row(Row(Row(3))))) + checkAnswer(df2, Seq(Row(Row(2)), Row(Row(3)))) + checkAnswer(df3, Seq(Row(2), Row(3))) + } + + test("test alter rename struct of (primitive/struct/array)") { + sql("drop table if exists test_rename") + sql("CREATE TABLE test_rename (str1 struct<a:int>, str2 struct<a:struct<b:int>>, str3 " + + "struct<a:struct<b:struct<c:int>>>, intfield int) STORED AS carbondata") + sql("insert into test_rename values(named_struct('a', 2), " + + "named_struct('a', named_struct('b', 2)), named_struct('a', named_struct('b', " + + "named_struct('c', 2))), 1)") + + // rename parent column from str2 to str22 and read old rows + sql("alter table test_rename change str2 str22 struct<a:struct<b:int>>") + var df1 = sql("select str22 from test_rename") + var df2 = sql("select str22.a from test_rename") + var df3 = sql("select str22.a.b from test_rename") + assert(df1.collect().size == 1 && df2.collect().size == 1 && df3.collect().size == 1) + checkAnswerUtil1(df1, df2, df3) + + // rename child column from a to a11 + sql("alter table test_rename change str22 str22 struct<a11:struct<b:int>>") + df1 = sql("select str22 from test_rename") + df2 = sql("select str22.a11 from test_rename") + df3 = sql("select str22.a11.b from test_rename") + assert(df1.collect().size == 1 && df2.collect().size == 1 && df3.collect().size == 1) + checkAnswerUtil1(df1, df2, df3) + + // rename parent column from str22 to str33 + sql("alter table test_rename change str22 str33 struct<a11:struct<b:int>>") + df1 = sql("select str33 from test_rename") + df2 = sql("select str33.a11 from test_rename") + df3 = sql("select str33.a11.b from test_rename") + assert(df1.collect().size == 1 && df2.collect().size == 1 && df3.collect().size == 1) + checkAnswerUtil1(df1, df2, df3) + + // insert new rows + sql("insert into test_rename values(named_struct('a', 3), " + + "named_struct('a', named_struct('b', 3)), named_struct('a', named_struct('b', " + + "named_struct('c', 3))), 2)") + df1 = sql("select str33 from test_rename") + df2 = sql("select str33.a11 from test_rename") + df3 = sql("select str33.a11.b from test_rename") + assert(df1.collect().size == 2 && df2.collect().size == 2 && df3.collect().size == 2) + checkAnswerUtil2(df1, df2, df3) + + sql("alter table test_rename change str33 str33 struct<a11:struct<b11:int>>") + sql("alter table test_rename change str33 str33 struct<a22:struct<b11:int>>") + df1 = sql("select str33 from test_rename") + df2 = sql("select str33.a22 from test_rename") + df3 = sql("select str33.a22.b11 from test_rename") + assert(df1.collect().size == 2 && df2.collect().size == 2 && df3.collect().size == 2) + checkAnswerUtil2(df1, df2, df3) + + val desc = sql("desc table test_rename").collect() + assert(desc(0)(0).equals("str1")) + assert(desc(1)(0).equals("str33")) + assert(desc(1)(1).equals("struct<a22:struct<b11:int>>")) + assert(desc(2)(0).equals("str3")) + } + + test("test alter rename array of (primitive/array/struct)") { + sql("drop table if exists test_rename") + sql( + "CREATE TABLE test_rename (arr1 array<int>, arr2 array<array<int>>, arr3 array<string>, " + + "arr4 array<struct<a:int>>) STORED AS carbondata") + sql( + "insert into test_rename values (array(1,2,3), array(array(1,2),array(3,4)), array('hello'," + + "'world'), array(named_struct('a',45)))") + + sql("alter table test_rename change arr1 arr11 array<int>") Review comment: remove this sql if not used and check in all other places and remove ########## File path: integration/spark/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/AlterTableColumnRenameTestCase.scala ########## @@ -48,6 +52,262 @@ class AlterTableColumnRenameTestCase extends QueryTest with BeforeAndAfterAll { sql("drop table simple_table") } + test("Rename more than one column at a time in one operation") { + sql("drop table if exists test_rename") + sql("CREATE TABLE test_rename (str struct<a:struct<b:int, d:int>, c:int>) STORED AS carbondata") + sql("insert into test_rename values(named_struct('a11',named_struct('b2',12,'d',12), 'c', 12))") + sql("alter table test_rename change str str22 struct<a11:struct<b2:int, d:int>, c:int>") + sql("insert into test_rename values(named_struct('a11',named_struct('b2',24,'d',24), 'c', 24))") + + val rows = sql("select str22.a11.b2 from test_rename").collect() + assert(rows(0).equals(Row(12)) && rows(1).equals(Row(24))) + // check if old column names are still present + val ex1 = intercept[AnalysisException] { + sql("select str from test_rename").show(false) + } + assert(ex1.getMessage + .contains("cannot resolve '`str`' given input columns: [test_rename.str22]")) + + val ex2 = intercept[AnalysisException] { + sql("select str.a from test_rename").show(false) + } + assert(ex2.getMessage + .contains("cannot resolve '`str.a`' given input columns: [test_rename.str22]")) + + // check un-altered columns + val rows1 = sql("select str22.c from test_rename").collect() + val rows2 = sql("select str22.a11.d from test_rename").collect() + assert(rows1.sameElements(Array(Row(12), Row(24)))) + assert(rows2.sameElements(Array(Row(12), Row(24)))) + } + + test("rename complex columns with invalid structure/duplicate names/Map type") { + sql("drop table if exists test_rename") + sql( + "CREATE TABLE test_rename (str struct<a:int,b:long>, str2 struct<a:int,b:long>, map1 " + + "map<string, string>, str3 struct<a:int, b:map<string, string>>) STORED AS carbondata") + + val ex1 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str str struct<a:array<int>,b:long>") + } + assert(ex1.getMessage + .contains( + "column rename operation failed: because datatypes of complex children cannot be altered")) + + val ex2 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str str struct<a:int,b:long,c:int>") + } + assert(ex2.getMessage + .contains( + "column rename operation failed: because number of children of old and new complex " + + "columns are not the same")) + + val ex3 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str str int") + } + assert(ex3.getMessage + .contains( + "column rename operation failed: because old and new complex columns are not compatible " + + "in structure")) + + val ex4 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str str struct<a:int,a:long>") + } + assert(ex4.getMessage + .contains( + "column rename operation failed: because duplicate columns are present")) + + val ex5 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str str2 struct<a:int,b:long>") + } + assert(ex5.getMessage + .contains( + "Column Rename Operation failed. New column name str2 already exists in table test_rename")) + + val ex6 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change map1 map2 map<string, struct<a:int>>") + } + assert(ex6.getMessage + .contains("rename operation failed: cannot alter map type column")) + + val ex7 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str3 str33 struct<a:int, bc:map<string, string>>") + } + assert(ex7.getMessage + .contains( + "rename operation failed: cannot alter complex structure that includes map type column")) + } + + def checkAnswerUtil1(df1: DataFrame, df2: DataFrame, df3: DataFrame) { + checkAnswer(df1, Seq(Row(Row(Row(2))))) + checkAnswer(df2, Seq(Row(Row(2)))) + checkAnswer(df3, Seq(Row(2))) + } + + def checkAnswerUtil2(df1: DataFrame, df2: DataFrame, df3: DataFrame) { + checkAnswer(df1, Seq(Row(Row(Row(2))), Row(Row(Row(3))))) + checkAnswer(df2, Seq(Row(Row(2)), Row(Row(3)))) + checkAnswer(df3, Seq(Row(2), Row(3))) + } + + test("test alter rename struct of (primitive/struct/array)") { + sql("drop table if exists test_rename") + sql("CREATE TABLE test_rename (str1 struct<a:int>, str2 struct<a:struct<b:int>>, str3 " + + "struct<a:struct<b:struct<c:int>>>, intfield int) STORED AS carbondata") + sql("insert into test_rename values(named_struct('a', 2), " + + "named_struct('a', named_struct('b', 2)), named_struct('a', named_struct('b', " + + "named_struct('c', 2))), 1)") + + // rename parent column from str2 to str22 and read old rows + sql("alter table test_rename change str2 str22 struct<a:struct<b:int>>") + var df1 = sql("select str22 from test_rename") + var df2 = sql("select str22.a from test_rename") + var df3 = sql("select str22.a.b from test_rename") + assert(df1.collect().size == 1 && df2.collect().size == 1 && df3.collect().size == 1) + checkAnswerUtil1(df1, df2, df3) + + // rename child column from a to a11 + sql("alter table test_rename change str22 str22 struct<a11:struct<b:int>>") + df1 = sql("select str22 from test_rename") + df2 = sql("select str22.a11 from test_rename") + df3 = sql("select str22.a11.b from test_rename") + assert(df1.collect().size == 1 && df2.collect().size == 1 && df3.collect().size == 1) + checkAnswerUtil1(df1, df2, df3) + + // rename parent column from str22 to str33 + sql("alter table test_rename change str22 str33 struct<a11:struct<b:int>>") + df1 = sql("select str33 from test_rename") + df2 = sql("select str33.a11 from test_rename") + df3 = sql("select str33.a11.b from test_rename") + assert(df1.collect().size == 1 && df2.collect().size == 1 && df3.collect().size == 1) + checkAnswerUtil1(df1, df2, df3) + + // insert new rows + sql("insert into test_rename values(named_struct('a', 3), " + + "named_struct('a', named_struct('b', 3)), named_struct('a', named_struct('b', " + + "named_struct('c', 3))), 2)") + df1 = sql("select str33 from test_rename") + df2 = sql("select str33.a11 from test_rename") + df3 = sql("select str33.a11.b from test_rename") + assert(df1.collect().size == 2 && df2.collect().size == 2 && df3.collect().size == 2) + checkAnswerUtil2(df1, df2, df3) + + sql("alter table test_rename change str33 str33 struct<a11:struct<b11:int>>") + sql("alter table test_rename change str33 str33 struct<a22:struct<b11:int>>") + df1 = sql("select str33 from test_rename") + df2 = sql("select str33.a22 from test_rename") + df3 = sql("select str33.a22.b11 from test_rename") + assert(df1.collect().size == 2 && df2.collect().size == 2 && df3.collect().size == 2) + checkAnswerUtil2(df1, df2, df3) + + val desc = sql("desc table test_rename").collect() + assert(desc(0)(0).equals("str1")) + assert(desc(1)(0).equals("str33")) + assert(desc(1)(1).equals("struct<a22:struct<b11:int>>")) + assert(desc(2)(0).equals("str3")) + } + + test("test alter rename array of (primitive/array/struct)") { + sql("drop table if exists test_rename") + sql( + "CREATE TABLE test_rename (arr1 array<int>, arr2 array<array<int>>, arr3 array<string>, " + + "arr4 array<struct<a:int>>) STORED AS carbondata") + sql( + "insert into test_rename values (array(1,2,3), array(array(1,2),array(3,4)), array('hello'," + + "'world'), array(named_struct('a',45)))") + + sql("alter table test_rename change arr1 arr11 array<int>") Review comment: when u just give sql("..."), it will return a dataframe, u are not taking return value, not giving any action on dataframe, so its of no use, u can remove ########## File path: docs/ddl-of-carbondata.md ########## @@ -805,7 +805,7 @@ Users can specify which columns to include and exclude for local dictionary gene 2. If a column to be dropped has any Secondary index table created on them, drop column operation fails and the user will be asked to drop the corresponding SI table first before going for actual drop. - - #### CHANGE COLUMN NAME/TYPE/COMMENT + - #### change-column-nametype Review comment: this name doesn't look right, why changed? if changed also it should be name/type/comment right ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/catalyst/CarbonParserUtil.scala ########## @@ -1132,10 +1135,53 @@ object CarbonParserUtil { } else if (scale < 0 || scale > 38) { throw new MalformedCarbonCommandException("Invalid value for scale") } - DataTypeInfo("decimal", precision, scale) + DataTypeInfo(columnName, "decimal", precision, scale) + case _ => + DataTypeInfo(columnName, + DataTypeConverterUtil.convertToCarbonType(dataType).getName.toLowerCase) + } + } + + /** + * This method will return the instantiated DataTypeInfo by parsing the column + */ + def parseColumn(columnName: String, dataType: DataType, + values: Option[List[(Int, Int)]]): DataTypeInfo = { + // creates parent dataTypeInfo first + val dataTypeName = DataTypeConverterUtil.convertToCarbonType(dataType.typeName).getName + val dataTypeInfo = CarbonParserUtil.parseDataType(columnName, dataTypeName.toLowerCase, values) + // check which child type is present and create children dataTypeInfo accordingly + dataType match { + case arrayType: ArrayType => + val childType: DataType = arrayType.elementType + val childName = columnName + ".val" + val childValues = childType match { + case d: DecimalType => Some(List((d.precision, d.scale))) + case _ => None + } + val childDatatypeInfo = parseColumn(childName, childType, childValues) + dataTypeInfo.setChildren(List(childDatatypeInfo)) + case structType: StructType => + var childTypeInfoList: List[DataTypeInfo] = null + for (childField <- structType) { + val childType = childField.dataType + val childName = columnName + CarbonCommonConstants.POINT + childField.name + val childValues = childType match { + case d: DecimalType => Some(List((d.precision, d.scale))) + case _ => None + } + val childDatatypeInfo = CarbonParserUtil.parseColumn(childName, childType, childValues) + if (childTypeInfoList == null) { + childTypeInfoList = List(childDatatypeInfo) + } else { + childTypeInfoList = childTypeInfoList :+ childDatatypeInfo + } + } + dataTypeInfo.setChildren(childTypeInfoList) case _ => - DataTypeInfo(DataTypeConverterUtil.convertToCarbonType(dataType).getName.toLowerCase) } + // To-Do for map types Review comment: add jira ID here, todo should be together, else i don't think it identifies as TODO ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/schema/CarbonAlterTableColRenameDataTypeChangeCommand.scala ########## @@ -225,12 +264,46 @@ private[sql] case class CarbonAlterTableColRenameDataTypeChangeCommand( newColumnProperties.put(CarbonCommonConstants.COLUMN_COMMENT, newColumnComment.get) columnSchema.setColumnProperties(newColumnProperties) } - addColumnSchema = columnSchema + addedTableColumnSchema = columnSchema + } else if (isComplexChild(columnSchema)) { + if (alteredColumnNamesMap.contains(columnSchemaName)) { + // matches exactly + val newComplexChildName = alteredColumnNamesMap(columnSchemaName) + columnSchema.setColumn_name(newComplexChildName) + isSchemaEntryRequired = true + } else { + val alteredParent = checkIfParentIsAltered(columnSchemaName) + /* + * Lets say, if complex schema is: str struct<a: int> + * and if parent column is changed from str -> str2 + * then its child name should also be changed from str.a -> str2.a + */ + if (alteredParent != null) { + val newParent = alteredColumnNamesMap(alteredParent) + val newComplexChildName = newParent + columnSchemaName + .split(alteredParent)(1) + columnSchema.setColumn_name(newComplexChildName) + isSchemaEntryRequired = true + } + } + } + // validate duplicate child columns + if (uniqueColumnSet.contains(columnSchema.getColumn_name)) { + throw new UnsupportedOperationException("Duplicate columns are present") + } + + // make a new schema evolution entry after column rename or datatype change + if (isSchemaEntryRequired) { + addedColumnsList ++= List(columnSchema) + deletedColumnsList ++= List(deletedColumnSchema) timeStamp = System.currentTimeMillis() - // make a new schema evolution entry after column rename or datatype change - schemaEvolutionEntry = AlterTableUtil - .addNewSchemaEvolutionEntry(timeStamp, addColumnSchema, deletedColumnSchema) + schemaEvolutionEntry = AlterTableUtil.addNewSchemaEvolutionEntry(schemaEvolutionEntry, Review comment: i don't see any reason to change this API, i think you have changed API and added null checks in `addNewSchemaEvolutionEntry` since in case of complex there can be multiple columns changed like child and parent. But just sending single column in List and adding null check is not good, here itself make the deleted and added column as List, incase of primitive it will be with single column, in case of complex when both parent and child changes, it will have multiple values. Please do the necessary changes ########## File path: integration/spark/src/main/scala/org/apache/spark/util/AlterTableUtil.scala ########## @@ -357,18 +358,30 @@ object AlterTableUtil { /** * This method create a new SchemaEvolutionEntry and adds to SchemaEvolutionEntry List * - * @param addColumnSchema added new column schema - * @param deletedColumnSchema old column schema which is deleted + * @param addColumnSchema added new column schema + * @param deletedColumnSchema old column schema which is deleted + * @param addedColumnsList list of added column schemas + * @param deletedColumnsList list of deleted column schemas * @return */ def addNewSchemaEvolutionEntry( - timeStamp: Long, + schemaEvolutionEntry: SchemaEvolutionEntry, addColumnSchema: org.apache.carbondata.format.ColumnSchema, - deletedColumnSchema: org.apache.carbondata.format.ColumnSchema): SchemaEvolutionEntry = { - val schemaEvolutionEntry = new SchemaEvolutionEntry(timeStamp) - schemaEvolutionEntry.setAdded(List(addColumnSchema).asJava) - schemaEvolutionEntry.setRemoved(List(deletedColumnSchema).asJava) - schemaEvolutionEntry + deletedColumnSchema: org.apache.carbondata.format.ColumnSchema, Review comment: please handle this according to comment in command class ########## File path: integration/spark/src/main/scala/org/apache/spark/util/AlterTableUtil.scala ########## @@ -1063,4 +1076,106 @@ object AlterTableUtil { } } } + + /** + * This method checks the structure of the old and new complex columns, and- + * 1. throws exception if the number of complex-levels in both columns does not match + * 2. throws exception if the number of children of both columns does not match + * 3. creates alteredColumnNamesMap: new_column_name -> datatype. Here new_column_name are those + * names of the columns that are altered. + * These maps will later be used while altering the table schema + */ + def validateComplexStructure(oldDimensionList: List[CarbonDimension], + newDimensionList: List[DataTypeInfo], + alteredColumnNamesMap: mutable.LinkedHashMap[String, String]): Unit = { + if (oldDimensionList == null && newDimensionList == null) { + throw new UnsupportedOperationException("Both old and new dimensions are null") + } else if (oldDimensionList == null || newDimensionList == null) { + throw new UnsupportedOperationException("Either the old or the new dimension is null") + } else if (oldDimensionList.size != newDimensionList.size) { + throw new UnsupportedOperationException( + "Number of children of old and new complex columns are not the same") + } else { + for ((newDimensionInfo, i) <- newDimensionList.zipWithIndex) { + val oldDimensionInfo = oldDimensionList(i) + val old_column_name = oldDimensionInfo.getColName.split('.').last Review comment: use constants for point at all places ########## File path: integration/spark/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/AlterTableColumnRenameTestCase.scala ########## @@ -48,6 +52,298 @@ class AlterTableColumnRenameTestCase extends QueryTest with BeforeAndAfterAll { sql("drop table simple_table") } + test("Rename more than one column at a time in one operation") { + sql("drop table if exists test_rename") + sql("CREATE TABLE test_rename (str struct<a:struct<b:int, d:int>, c:int>) STORED AS carbondata") + sql("insert into test_rename values(named_struct('a',named_struct('b',12,'d',12), 'c', 12))") + sql("alter table test_rename change str str22 struct<a11:struct<b2:int, d:int>, c:int>") + sql("insert into test_rename values(named_struct('a11',named_struct('b2',24,'d',24), 'c', 24))") + + val rows = sql("select str22.a11.b2 from test_rename").collect() + assert(rows(0).equals(Row(12)) && rows(1).equals(Row(24))) + // check if old column names are still present + val ex1 = intercept[AnalysisException] { + sql("select str from test_rename").show(false) + } + assert(ex1.getMessage.contains("cannot resolve '`str`'")) + + val ex2 = intercept[AnalysisException] { + sql("select str.a from test_rename").show(false) + } + assert(ex2.getMessage.contains("cannot resolve '`str.a`'")) + + // check un-altered columns + val rows1 = sql("select str22.c from test_rename").collect() + val rows2 = sql("select str22.a11.d from test_rename").collect() + assert(rows1.sameElements(Array(Row(12), Row(24)))) + assert(rows2.sameElements(Array(Row(12), Row(24)))) + } + + test("rename complex columns with invalid structure/duplicate-names/Map-type") { + sql("drop table if exists test_rename") + sql( + "CREATE TABLE test_rename (str struct<a:int,b:long>, str2 struct<a:int,b:long>, map1 " + + "map<string, string>, str3 struct<a:int, b:map<string, string>>) STORED AS carbondata") + + val ex1 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str str struct<a:array<int>,b:long>") + } + assert(ex1.getMessage + .contains( + "column rename operation failed: Altering datatypes of any child column is not supported")) + + val ex2 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str str struct<a:int,b:long,c:int>") + } + assert(ex2.getMessage + .contains( + "column rename operation failed: Number of children of old and new complex columns are " + + "not the same")) + + val ex3 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str str int") + } + assert(ex3.getMessage + .contains( + "column rename operation failed: Old and new complex columns are not compatible " + + "in structure")) + + val ex4 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str str struct<a:int,a:long>") + } + assert(ex4.getMessage + .contains( + "column rename operation failed: Duplicate columns are present")) + + val ex5 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str str2 struct<a:int,b:long>") + } + assert(ex5.getMessage + .contains( + "Column Rename Operation failed. New column name str2 already exists in table test_rename")) + + val ex6 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change map1 map2 map<string, struct<a:int>>") + } + assert(ex6.getMessage + .contains("rename operation failed: Alter rename is unsupported for Map datatype column")) + + val ex7 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str3 str33 struct<a:int, bc:map<string, string>>") + } + assert(ex7.getMessage + .contains( + "rename operation failed: Cannot alter complex structure that includes map type column")) + + val ex8 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str2 str22 struct<>") + } + assert(ex8.getMessage + .contains( + "rename operation failed: Either the old or the new dimension is null")) + + // ensure all failed rename operations have been reverted to original state + val describe = sql("desc table test_rename") + assert(describe.collect().size == 4) + assertResult(1)(describe.filter( + "col_name='str' and data_type = 'struct<a:int,b:bigint>'").count()) + assertResult(1)(describe.filter( + "col_name='str2' and data_type = 'struct<a:int,b:bigint>'").count()) + assertResult(1)(describe.filter( + "col_name='map1' and data_type = 'map<string,string>'").count()) + assertResult(1)(describe.filter( + "col_name='str3' and data_type = 'struct<a:int,b:map<string,string>>'").count()) + } + + def checkAnswerUtil1(df1: DataFrame, df2: DataFrame, df3: DataFrame) { + checkAnswer(df1, Seq(Row(Row(Row(2))))) + checkAnswer(df2, Seq(Row(Row(2)))) + checkAnswer(df3, Seq(Row(2))) + } + + def checkAnswerUtil2(df1: DataFrame, df2: DataFrame, df3: DataFrame) { + checkAnswer(df1, Seq(Row(Row(Row(2))), Row(Row(Row(3))))) + checkAnswer(df2, Seq(Row(Row(2)), Row(Row(3)))) + checkAnswer(df3, Seq(Row(2), Row(3))) + } + + test("test alter rename struct of (primitive/struct/array)") { + sql("drop table if exists test_rename") + sql("CREATE TABLE test_rename (str1 struct<a:int>, str2 struct<a:struct<b:int>>, str3 " + + "struct<a:struct<b:struct<c:int>>>, intfield int) STORED AS carbondata") + sql("insert into test_rename values(named_struct('a', 2), " + + "named_struct('a', named_struct('b', 2)), named_struct('a', named_struct('b', " + + "named_struct('c', 2))), 1)") + + // Operation 1: rename parent column from str2 to str22 and read old rows + sql("alter table test_rename change str2 str22 struct<a:struct<b:int>>") + var df1 = sql("select str22 from test_rename") + var df2 = sql("select str22.a from test_rename") + var df3 = sql("select str22.a.b from test_rename") + assert(df1.collect().size == 1 && df2.collect().size == 1 && df3.collect().size == 1) + checkAnswerUtil1(df1, df2, df3) + + // Operation 2: rename child column from a to a11 + sql("alter table test_rename change str22 str22 struct<a11:struct<b:int>>") + df1 = sql("select str22 from test_rename") + df2 = sql("select str22.a11 from test_rename") + df3 = sql("select str22.a11.b from test_rename") + assert(df1.collect().size == 1 && df2.collect().size == 1 && df3.collect().size == 1) + checkAnswerUtil1(df1, df2, df3) + + // Operation 3: rename parent column from str22 to str33 + sql("alter table test_rename change str22 str33 struct<a11:struct<b:int>>") + df1 = sql("select str33 from test_rename") + df2 = sql("select str33.a11 from test_rename") + df3 = sql("select str33.a11.b from test_rename") + assert(df1.collect().size == 1 && df2.collect().size == 1 && df3.collect().size == 1) + checkAnswerUtil1(df1, df2, df3) + + // insert new rows + sql("insert into test_rename values(named_struct('a', 3), " + + "named_struct('a', named_struct('b', 3)), named_struct('a', named_struct('b', " + + "named_struct('c', 3))), 2)") + df1 = sql("select str33 from test_rename") + df2 = sql("select str33.a11 from test_rename") + df3 = sql("select str33.a11.b from test_rename") + assert(df1.collect().size == 2 && df2.collect().size == 2 && df3.collect().size == 2) + checkAnswerUtil2(df1, df2, df3) + + // Operation 4: rename child column from a11 to a22 & b to b11 + sql("alter table test_rename change str33 str33 struct<a22:struct<b11:int>>") + df1 = sql("select str33 from test_rename") + df2 = sql("select str33.a22 from test_rename") + df3 = sql("select str33.a22.b11 from test_rename") + assert(df1.collect().size == 2 && df2.collect().size == 2 && df3.collect().size == 2) + checkAnswerUtil2(df1, df2, df3) + + // Operation 5: rename primitive column from intField to intField2 + sql("alter table test_rename change intField intField2 int") + + val describe = sql("desc table test_rename") + assert(describe.collect().size == 4) + assertResult(1)(describe.filter( + "col_name='str1' and data_type = 'struct<a:int>'").count()) + assertResult(1)(describe.filter( + "col_name='str33' and data_type = 'struct<a22:struct<b11:int>>'").count()) + assertResult(1)(describe.filter( + "col_name='str3' and data_type = 'struct<a:struct<b:struct<c:int>>>'").count()) + + // validate schema evolution entries for 4 above alter operations + val (addedColumns, removedColumns, noOfEvolutions) = returnValuesAfterSchemaEvolution( + "test_rename") + validateSchemaEvolution(addedColumns, removedColumns, noOfEvolutions) + } + + def returnValuesAfterSchemaEvolution(tableName: String): (Seq[ColumnSchema], Seq[ColumnSchema], + Int) = { + val carbonTable = CarbonEnv.getCarbonTable(None, tableName)(sqlContext.sparkSession) + val schemaEvolutionList = carbonTable.getTableInfo + .getFactTable + .getSchemaEvolution() + .getSchemaEvolutionEntryList() + var addedColumns = Seq[ColumnSchema]() + var removedColumns = Seq[ColumnSchema]() + for (i <- 0 until schemaEvolutionList.size()) { + addedColumns ++= + JavaConverters + .asScalaIteratorConverter(schemaEvolutionList.get(i).getAdded.iterator()) + .asScala + .toSeq + + removedColumns ++= + JavaConverters + .asScalaIteratorConverter(schemaEvolutionList.get(i).getRemoved.iterator()) + .asScala + .toSeq + } + (addedColumns, removedColumns, schemaEvolutionList.size() - 1) + } + + def validateSchemaEvolution(added: Seq[ColumnSchema], removed: Seq[ColumnSchema], + noOfEvolutions: Int): Unit = { + assert(noOfEvolutions == 5 && added.size == 11 && removed.size == 11) + // asserting only first 6 entries of added and removed columns + assert( + added(0).getColumnName.equals("str22") && removed(0).getColumnName.equals("str2") && + added(1).getColumnName.equals("str22.a") && removed(1).getColumnName.equals("str2.a") && + added(2).getColumnName.equals("str22.a.b") && removed(2).getColumnName.equals("str2.a.b") && + added(3).getColumnName.equals("str22.a11") && removed(3).getColumnName.equals("str22.a") && + added(4).getColumnName.equals("str22.a11.b") && removed(4).getColumnName.equals("str22.a.b")&& + added(5).getColumnName.equals("str33") && removed(5).getColumnName.equals("str22")) + } + + test("test alter rename array of (primitive/array/struct)") { + sql("drop table if exists test_rename") + sql( + "CREATE TABLE test_rename (arr1 array<int>, arr2 array<array<int>>, arr3 array<string>, " + + "arr4 array<struct<a:int>>) STORED AS carbondata") + sql( + "insert into test_rename values (array(1,2,3), array(array(1,2),array(3,4)), array('hello'," + + "'world'), array(named_struct('a',45)))") + + sql("alter table test_rename change arr1 arr11 array<int>") + val df1 = sql("select arr11 from test_rename") + assert(df1.collect.size == 1) + checkAnswer(df1, Seq(Row(make(Array(1, 2, 3))))) + + sql("alter table test_rename change arr2 arr22 array<array<int>>") + val df2 = sql("select arr22 from test_rename") + assert(df2.collect.size == 1) + checkAnswer(df2, Seq(Row(make(Array(make(Array(1, 2)), make(Array(3, 4))))))) + + sql("alter table test_rename change arr3 arr33 array<string>") + val df3 = sql("select arr33 from test_rename") + assert(df3.collect.size == 1) + checkAnswer(sql("select arr33 from test_rename"), Seq(Row(make(Array("hello", "world"))))) + + sql("alter table test_rename change arr4 arr44 array<struct<a:int>>") + sql("alter table test_rename change arr44 arr44 array<struct<a11:int>>") + + val df4 = sql("select arr44.a11 from test_rename") + assert(df4.collect.size == 1) + checkAnswer(df4, Seq(Row(make(Array(45))))) + + // test for new inserted row + sql( + "insert into test_rename values (array(11,22,33), array(array(11,22),array(33,44)), array" + + "('hello11', 'world11'), array(named_struct('a',4555)))") + val rows = sql("select arr11, arr22, arr33, arr44.a11 from test_rename").collect + assert(rows.size == 2) + val secondRow = rows(1) + assert(secondRow(0).equals(make(Array(11, 22, 33))) && + secondRow(1).equals(make(Array(make(Array(11, 22)), make(Array(33, 44))))) && + secondRow(2).equals(make(Array("hello11", "world11")))) + } + + test("validate alter change datatype for complex children columns") { + sql("drop table if exists test_rename") + sql( + "CREATE TABLE test_rename (str struct<a:int,b:long>) STORED AS carbondata") + + val ex1 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str str struct<a:long,b:long>") + } + assert(ex1.getMessage + .contains( + "column rename operation failed: Altering datatypes of any child column is not supported")) + } + + test("test change comment in case of complex types") { + sql("drop table if exists test_rename") + sql( + "CREATE TABLE test_rename (str struct<a:int> comment 'comment') STORED AS carbondata") + sql("alter table test_rename change str str struct<a:int> comment 'new comment'") + var describe = sql("desc table test_rename") + var count = describe.filter("col_name='str' and comment = 'new comment'").count() + assertResult(1)(count) + + sql("alter table test_rename change str str struct<a1:int> comment 'new comment 2'") + describe = sql("desc table test_rename") + count = describe.filter("col_name='str' and comment = 'new comment 2'").count() + assertResult(1)(count) + } Review comment: please add a test case with compaction and SI ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/schema/CarbonAlterTableColRenameDataTypeChangeCommand.scala ########## @@ -180,33 +200,50 @@ private[sql] case class CarbonAlterTableColRenameDataTypeChangeCommand( } if (alterTableColRenameAndDataTypeChangeModel.isColumnRename) { // validate the columns to be renamed - validColumnsForRenaming(carbonColumns, oldCarbonColumn.head, carbonTable) + validColumnsForRenaming(carbonColumns, carbonTable) } if (isDataTypeChange) { // validate the columns to change datatype - validateColumnDataType(alterTableColRenameAndDataTypeChangeModel.dataTypeInfo, + AlterTableUtil.validateColumnDataType(alterTableColRenameAndDataTypeChangeModel + .dataTypeInfo, oldCarbonColumn.head) } // read the latest schema file val tableInfo: TableInfo = metaStore.getThriftTableInfo(carbonTable) // maintain the added column for schema evolution history - var addColumnSchema: ColumnSchema = null + var addedTableColumnSchema: ColumnSchema = null var deletedColumnSchema: ColumnSchema = null var schemaEvolutionEntry: SchemaEvolutionEntry = null + var addedColumnsList: List[ColumnSchema] = List.empty[ColumnSchema] + var deletedColumnsList: List[ColumnSchema] = List.empty[ColumnSchema] val columnSchemaList = tableInfo.fact_table.table_columns.asScala.filter(!_.isInvisible) + // to validate duplicate children columns + var uniqueColumnSet: mutable.Set[String] = mutable.Set.empty + /* + * columnSchemaList is a flat structure containing all column schemas including both parent + * and child. + * It is iterated and rename/change-datatype update are made in this list itself. + * Entry is made to the schemaEvolutionEntry for each of the update. + */ columnSchemaList.foreach { columnSchema => - if (columnSchema.column_name.equalsIgnoreCase(oldColumnName)) { - deletedColumnSchema = columnSchema.deepCopy() - if (alterTableColRenameAndDataTypeChangeModel.isColumnRename) { - // if only column rename, just get the column schema and rename, make a - // schemaEvolutionEntry + val columnSchemaName = columnSchema.column_name Review comment: ```suggestion val columnName = columnSchema.column_name ``` ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/schema/CarbonAlterTableColRenameDataTypeChangeCommand.scala ########## @@ -180,33 +200,50 @@ private[sql] case class CarbonAlterTableColRenameDataTypeChangeCommand( } if (alterTableColRenameAndDataTypeChangeModel.isColumnRename) { // validate the columns to be renamed - validColumnsForRenaming(carbonColumns, oldCarbonColumn.head, carbonTable) + validColumnsForRenaming(carbonColumns, carbonTable) } if (isDataTypeChange) { // validate the columns to change datatype - validateColumnDataType(alterTableColRenameAndDataTypeChangeModel.dataTypeInfo, + AlterTableUtil.validateColumnDataType(alterTableColRenameAndDataTypeChangeModel + .dataTypeInfo, oldCarbonColumn.head) } // read the latest schema file val tableInfo: TableInfo = metaStore.getThriftTableInfo(carbonTable) // maintain the added column for schema evolution history - var addColumnSchema: ColumnSchema = null + var addedTableColumnSchema: ColumnSchema = null var deletedColumnSchema: ColumnSchema = null var schemaEvolutionEntry: SchemaEvolutionEntry = null + var addedColumnsList: List[ColumnSchema] = List.empty[ColumnSchema] + var deletedColumnsList: List[ColumnSchema] = List.empty[ColumnSchema] val columnSchemaList = tableInfo.fact_table.table_columns.asScala.filter(!_.isInvisible) + // to validate duplicate children columns + var uniqueColumnSet: mutable.Set[String] = mutable.Set.empty + /* + * columnSchemaList is a flat structure containing all column schemas including both parent + * and child. + * It is iterated and rename/change-datatype update are made in this list itself. + * Entry is made to the schemaEvolutionEntry for each of the update. + */ columnSchemaList.foreach { columnSchema => - if (columnSchema.column_name.equalsIgnoreCase(oldColumnName)) { - deletedColumnSchema = columnSchema.deepCopy() - if (alterTableColRenameAndDataTypeChangeModel.isColumnRename) { - // if only column rename, just get the column schema and rename, make a - // schemaEvolutionEntry + val columnSchemaName = columnSchema.column_name + val isTableColumn = columnSchemaName.equalsIgnoreCase(oldColumnName) Review comment: ```suggestion val columnToAlter = columnSchemaName.equalsIgnoreCase(oldColumnName) ``` ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/schema/CarbonAlterTableColRenameDataTypeChangeCommand.scala ########## @@ -180,33 +200,50 @@ private[sql] case class CarbonAlterTableColRenameDataTypeChangeCommand( } if (alterTableColRenameAndDataTypeChangeModel.isColumnRename) { // validate the columns to be renamed - validColumnsForRenaming(carbonColumns, oldCarbonColumn.head, carbonTable) + validColumnsForRenaming(carbonColumns, carbonTable) } if (isDataTypeChange) { // validate the columns to change datatype - validateColumnDataType(alterTableColRenameAndDataTypeChangeModel.dataTypeInfo, + AlterTableUtil.validateColumnDataType(alterTableColRenameAndDataTypeChangeModel + .dataTypeInfo, oldCarbonColumn.head) } // read the latest schema file val tableInfo: TableInfo = metaStore.getThriftTableInfo(carbonTable) // maintain the added column for schema evolution history - var addColumnSchema: ColumnSchema = null + var addedTableColumnSchema: ColumnSchema = null var deletedColumnSchema: ColumnSchema = null var schemaEvolutionEntry: SchemaEvolutionEntry = null + var addedColumnsList: List[ColumnSchema] = List.empty[ColumnSchema] + var deletedColumnsList: List[ColumnSchema] = List.empty[ColumnSchema] val columnSchemaList = tableInfo.fact_table.table_columns.asScala.filter(!_.isInvisible) + // to validate duplicate children columns + var uniqueColumnSet: mutable.Set[String] = mutable.Set.empty + /* + * columnSchemaList is a flat structure containing all column schemas including both parent + * and child. + * It is iterated and rename/change-datatype update are made in this list itself. + * Entry is made to the schemaEvolutionEntry for each of the update. + */ columnSchemaList.foreach { columnSchema => - if (columnSchema.column_name.equalsIgnoreCase(oldColumnName)) { - deletedColumnSchema = columnSchema.deepCopy() - if (alterTableColRenameAndDataTypeChangeModel.isColumnRename) { - // if only column rename, just get the column schema and rename, make a - // schemaEvolutionEntry + val columnSchemaName = columnSchema.column_name + val isTableColumn = columnSchemaName.equalsIgnoreCase(oldColumnName) Review comment: ```suggestion val isTheColumnToAlter = columnSchemaName.equalsIgnoreCase(oldColumnName) ``` ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/schema/CarbonAlterTableColRenameDataTypeChangeCommand.scala ########## @@ -180,33 +200,50 @@ private[sql] case class CarbonAlterTableColRenameDataTypeChangeCommand( } if (alterTableColRenameAndDataTypeChangeModel.isColumnRename) { // validate the columns to be renamed - validColumnsForRenaming(carbonColumns, oldCarbonColumn.head, carbonTable) + validColumnsForRenaming(carbonColumns, carbonTable) } if (isDataTypeChange) { // validate the columns to change datatype - validateColumnDataType(alterTableColRenameAndDataTypeChangeModel.dataTypeInfo, + AlterTableUtil.validateColumnDataType(alterTableColRenameAndDataTypeChangeModel + .dataTypeInfo, oldCarbonColumn.head) } // read the latest schema file val tableInfo: TableInfo = metaStore.getThriftTableInfo(carbonTable) // maintain the added column for schema evolution history - var addColumnSchema: ColumnSchema = null + var addedTableColumnSchema: ColumnSchema = null var deletedColumnSchema: ColumnSchema = null var schemaEvolutionEntry: SchemaEvolutionEntry = null + var addedColumnsList: List[ColumnSchema] = List.empty[ColumnSchema] + var deletedColumnsList: List[ColumnSchema] = List.empty[ColumnSchema] val columnSchemaList = tableInfo.fact_table.table_columns.asScala.filter(!_.isInvisible) + // to validate duplicate children columns + var uniqueColumnSet: mutable.Set[String] = mutable.Set.empty + /* + * columnSchemaList is a flat structure containing all column schemas including both parent + * and child. + * It is iterated and rename/change-datatype update are made in this list itself. + * Entry is made to the schemaEvolutionEntry for each of the update. + */ columnSchemaList.foreach { columnSchema => - if (columnSchema.column_name.equalsIgnoreCase(oldColumnName)) { - deletedColumnSchema = columnSchema.deepCopy() - if (alterTableColRenameAndDataTypeChangeModel.isColumnRename) { - // if only column rename, just get the column schema and rename, make a - // schemaEvolutionEntry + val columnSchemaName = columnSchema.column_name + val isTableColumn = columnSchemaName.equalsIgnoreCase(oldColumnName) + var isSchemaEntryRequired = false + deletedColumnSchema = columnSchema.deepCopy() + + if (isTableColumn) { + // isColumnRename will be true if the table-column/parent-column name has been altered, + // just get the columnSchema and rename, and make a schemaEvolutionEntry + if (isColumnRename) { columnSchema.setColumn_name(newColumnName) + isSchemaEntryRequired = true Review comment: you can remove this variable, directly call the `AltertableUtil ` API, one call will be with primitive case or parent column case, one more will be child class, it will be clean instead of playing with boolean variables. ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/schema/CarbonAlterTableColRenameDataTypeChangeCommand.scala ########## @@ -225,12 +264,46 @@ private[sql] case class CarbonAlterTableColRenameDataTypeChangeCommand( newColumnProperties.put(CarbonCommonConstants.COLUMN_COMMENT, newColumnComment.get) columnSchema.setColumnProperties(newColumnProperties) } - addColumnSchema = columnSchema + addedTableColumnSchema = columnSchema + } else if (isComplexChild(columnSchema)) { + if (alteredColumnNamesMap.contains(columnSchemaName)) { + // matches exactly + val newComplexChildName = alteredColumnNamesMap(columnSchemaName) + columnSchema.setColumn_name(newComplexChildName) + isSchemaEntryRequired = true + } else { + val alteredParent = checkIfParentIsAltered(columnSchemaName) + /* + * Lets say, if complex schema is: str struct<a: int> + * and if parent column is changed from str -> str2 + * then its child name should also be changed from str.a -> str2.a + */ + if (alteredParent != null) { + val newParent = alteredColumnNamesMap(alteredParent) + val newComplexChildName = newParent + columnSchemaName + .split(alteredParent)(1) + columnSchema.setColumn_name(newComplexChildName) + isSchemaEntryRequired = true + } + } + } + // validate duplicate child columns + if (uniqueColumnSet.contains(columnSchema.getColumn_name)) { + throw new UnsupportedOperationException("Duplicate columns are present") + } + + // make a new schema evolution entry after column rename or datatype change + if (isSchemaEntryRequired) { + addedColumnsList ++= List(columnSchema) + deletedColumnsList ++= List(deletedColumnSchema) timeStamp = System.currentTimeMillis() - // make a new schema evolution entry after column rename or datatype change - schemaEvolutionEntry = AlterTableUtil - .addNewSchemaEvolutionEntry(timeStamp, addColumnSchema, deletedColumnSchema) + schemaEvolutionEntry = AlterTableUtil.addNewSchemaEvolutionEntry(schemaEvolutionEntry, + columnSchema, Review comment: ```suggestion addedTableColumnSchema, ``` ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/schema/CarbonAlterTableColRenameDataTypeChangeCommand.scala ########## @@ -279,6 +352,24 @@ private[sql] case class CarbonAlterTableColRenameDataTypeChangeCommand( Seq.empty } + private def isComplexChild(columnSchema: ColumnSchema): Boolean = { + columnSchema.column_name.contains(CarbonCommonConstants.POINT) + } + + private def isChildOfColumn(columnSchemaName: String, oldColumnName: String): Boolean = { Review comment: ```suggestion private def isChildOfTheGivenColumn(columnSchemaName: String, oldColumnName: String): Boolean = { ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
akkio-97 commented on pull request #4129: URL: https://github.com/apache/carbondata/pull/4129#issuecomment-853770424 retest this please -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
Indhumathi27 commented on a change in pull request #4129: URL: https://github.com/apache/carbondata/pull/4129#discussion_r644509268 ########## File path: docs/ddl-of-carbondata.md ########## @@ -819,33 +819,47 @@ Users can specify which columns to include and exclude for local dictionary gene - Invalid scenarios * Change of decimal precision from (10,2) to (10,5) is invalid as in this case only scale is increased but total number of digits remains the same. * Change the comment of the partition column + * Rename operation fails if the structure of the complex column has been altered. Please ensure the old and new columns are compatible with Review comment: Looks like, the links under RENAME COLUMN / CHANGE COLUMN NAME/TYPE/COMMENT is not working. Can you check and fix it ########## File path: docs/ddl-of-carbondata.md ########## @@ -819,33 +819,47 @@ Users can specify which columns to include and exclude for local dictionary gene - Invalid scenarios * Change of decimal precision from (10,2) to (10,5) is invalid as in this case only scale is increased but total number of digits remains the same. * Change the comment of the partition column + * Rename operation fails if the structure of the complex column has been altered. Please ensure the old and new columns are compatible with Review comment: Looks like, the links under RENAME COLUMN / CHANGE COLUMN NAME/TYPE/COMMENT is not navigating to the description. Can you check and fix it ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/catalyst/CarbonParserUtil.scala ########## @@ -1132,10 +1135,60 @@ object CarbonParserUtil { } else if (scale < 0 || scale > 38) { throw new MalformedCarbonCommandException("Invalid value for scale") } - DataTypeInfo("decimal", precision, scale) + DataTypeInfo(columnName, "decimal", precision, scale) + case _ => + DataTypeInfo(columnName, + DataTypeConverterUtil.convertToCarbonType(dataType).getName.toLowerCase) + } + } + + /** + * This method will return the instantiated DataTypeInfo by parsing the column + */ + def parseColumn(columnName: String, dataType: DataType, + values: Option[List[(Int, Int)]]): DataTypeInfo = { + // creates parent dataTypeInfo first + val dataTypeInfo = CarbonParserUtil.parseDataType( Review comment: ```suggestion val dataTypeName = DataTypeConverterUtil.convertToCarbonType(dataType.typeName).getName val dataTypeInfo = CarbonParserUtil.parseDataType(columnName, dataTypeName.toLowerCase,values) ``` ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/catalyst/CarbonParserUtil.scala ########## @@ -1132,10 +1135,60 @@ object CarbonParserUtil { } else if (scale < 0 || scale > 38) { throw new MalformedCarbonCommandException("Invalid value for scale") } - DataTypeInfo("decimal", precision, scale) + DataTypeInfo(columnName, "decimal", precision, scale) + case _ => + DataTypeInfo(columnName, + DataTypeConverterUtil.convertToCarbonType(dataType).getName.toLowerCase) + } + } + + /** + * This method will return the instantiated DataTypeInfo by parsing the column + */ + def parseColumn(columnName: String, dataType: DataType, + values: Option[List[(Int, Int)]]): DataTypeInfo = { + // creates parent dataTypeInfo first + val dataTypeInfo = CarbonParserUtil.parseDataType( + columnName, + DataTypeConverterUtil + .convertToCarbonType(dataType.typeName) + .getName + .toLowerCase, + values) + // check which child type is present and create children dataTypeInfo accordingly + dataType match { + case arrayType: ArrayType => + val childType: DataType = arrayType.elementType + val childName = columnName + ".val" + val childValues = childType match { + case d: DecimalType => Some(List((d.precision, d.scale))) + case _ => None + } + var childTypeInfoList: List[DataTypeInfo] = null + val childDatatypeInfo = parseColumn(childName, childType, childValues) + childTypeInfoList = List(childDatatypeInfo) Review comment: Could directly set, dataTypeInfo.setChildren( List(childDatatypeInfo)), since childTypeInfoList is not used anywhere else -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
CarbonDataQA2 commented on pull request #4129: URL: https://github.com/apache/carbondata/pull/4129#issuecomment-853741821 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
akkio-97 commented on a change in pull request #4129: URL: https://github.com/apache/carbondata/pull/4129#discussion_r644549681 ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/schema/CarbonAlterTableColRenameDataTypeChangeCommand.scala ########## @@ -279,6 +404,23 @@ private[sql] case class CarbonAlterTableColRenameDataTypeChangeCommand( Seq.empty } + private def isComplexChild(columnSchema: ColumnSchema): Boolean = { + columnSchema.column_name.contains(CarbonCommonConstants.POINT) + } + + private def isChildOfOldColumn(columnSchemaName: String, oldColumnName: String): Boolean = { + columnSchemaName.startsWith(oldColumnName + CarbonCommonConstants.POINT) + } + + private def checkIfParentIsAltered(columnSchemaName: String): String = { Review comment: if str.a has changed to str.b. the map only stores this change. Its children should also be changed which is not stored in the map, since this is an indirect change ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/catalyst/CarbonParserUtil.scala ########## @@ -1132,10 +1135,60 @@ object CarbonParserUtil { } else if (scale < 0 || scale > 38) { throw new MalformedCarbonCommandException("Invalid value for scale") } - DataTypeInfo("decimal", precision, scale) + DataTypeInfo(columnName, "decimal", precision, scale) + case _ => + DataTypeInfo(columnName, + DataTypeConverterUtil.convertToCarbonType(dataType).getName.toLowerCase) + } + } + + /** + * This method will return the instantiated DataTypeInfo by parsing the column + */ + def parseColumn(columnName: String, dataType: DataType, + values: Option[List[(Int, Int)]]): DataTypeInfo = { + // creates parent dataTypeInfo first + val dataTypeInfo = CarbonParserUtil.parseDataType( Review comment: done ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/catalyst/CarbonParserUtil.scala ########## @@ -1132,10 +1135,60 @@ object CarbonParserUtil { } else if (scale < 0 || scale > 38) { throw new MalformedCarbonCommandException("Invalid value for scale") } - DataTypeInfo("decimal", precision, scale) + DataTypeInfo(columnName, "decimal", precision, scale) + case _ => + DataTypeInfo(columnName, + DataTypeConverterUtil.convertToCarbonType(dataType).getName.toLowerCase) + } + } + + /** + * This method will return the instantiated DataTypeInfo by parsing the column + */ + def parseColumn(columnName: String, dataType: DataType, + values: Option[List[(Int, Int)]]): DataTypeInfo = { + // creates parent dataTypeInfo first + val dataTypeInfo = CarbonParserUtil.parseDataType( + columnName, + DataTypeConverterUtil + .convertToCarbonType(dataType.typeName) + .getName + .toLowerCase, + values) + // check which child type is present and create children dataTypeInfo accordingly + dataType match { + case arrayType: ArrayType => + val childType: DataType = arrayType.elementType + val childName = columnName + ".val" + val childValues = childType match { + case d: DecimalType => Some(List((d.precision, d.scale))) + case _ => None + } + var childTypeInfoList: List[DataTypeInfo] = null + val childDatatypeInfo = parseColumn(childName, childType, childValues) + childTypeInfoList = List(childDatatypeInfo) Review comment: done ########## File path: integration/spark/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/AlterTableColumnRenameTestCase.scala ########## @@ -48,6 +52,262 @@ class AlterTableColumnRenameTestCase extends QueryTest with BeforeAndAfterAll { sql("drop table simple_table") } + test("Rename more than one column at a time in one operation") { + sql("drop table if exists test_rename") + sql("CREATE TABLE test_rename (str struct<a:struct<b:int, d:int>, c:int>) STORED AS carbondata") + sql("insert into test_rename values(named_struct('a11',named_struct('b2',12,'d',12), 'c', 12))") + sql("alter table test_rename change str str22 struct<a11:struct<b2:int, d:int>, c:int>") + sql("insert into test_rename values(named_struct('a11',named_struct('b2',24,'d',24), 'c', 24))") + + val rows = sql("select str22.a11.b2 from test_rename").collect() + assert(rows(0).equals(Row(12)) && rows(1).equals(Row(24))) + // check if old column names are still present + val ex1 = intercept[AnalysisException] { + sql("select str from test_rename").show(false) + } + assert(ex1.getMessage + .contains("cannot resolve '`str`' given input columns: [test_rename.str22]")) + + val ex2 = intercept[AnalysisException] { + sql("select str.a from test_rename").show(false) + } + assert(ex2.getMessage + .contains("cannot resolve '`str.a`' given input columns: [test_rename.str22]")) + + // check un-altered columns + val rows1 = sql("select str22.c from test_rename").collect() + val rows2 = sql("select str22.a11.d from test_rename").collect() + assert(rows1.sameElements(Array(Row(12), Row(24)))) + assert(rows2.sameElements(Array(Row(12), Row(24)))) + } + + test("rename complex columns with invalid structure/duplicate names/Map type") { + sql("drop table if exists test_rename") + sql( + "CREATE TABLE test_rename (str struct<a:int,b:long>, str2 struct<a:int,b:long>, map1 " + + "map<string, string>, str3 struct<a:int, b:map<string, string>>) STORED AS carbondata") + + val ex1 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str str struct<a:array<int>,b:long>") + } + assert(ex1.getMessage + .contains( + "column rename operation failed: because datatypes of complex children cannot be altered")) + + val ex2 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str str struct<a:int,b:long,c:int>") + } + assert(ex2.getMessage + .contains( + "column rename operation failed: because number of children of old and new complex " + + "columns are not the same")) + + val ex3 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str str int") + } + assert(ex3.getMessage + .contains( + "column rename operation failed: because old and new complex columns are not compatible " + + "in structure")) + + val ex4 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str str struct<a:int,a:long>") + } + assert(ex4.getMessage + .contains( + "column rename operation failed: because duplicate columns are present")) + + val ex5 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str str2 struct<a:int,b:long>") + } + assert(ex5.getMessage + .contains( + "Column Rename Operation failed. New column name str2 already exists in table test_rename")) + + val ex6 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change map1 map2 map<string, struct<a:int>>") + } + assert(ex6.getMessage + .contains("rename operation failed: cannot alter map type column")) + + val ex7 = intercept[ProcessMetaDataException] { + sql("alter table test_rename change str3 str33 struct<a:int, bc:map<string, string>>") + } + assert(ex7.getMessage + .contains( + "rename operation failed: cannot alter complex structure that includes map type column")) + } + + def checkAnswerUtil1(df1: DataFrame, df2: DataFrame, df3: DataFrame) { + checkAnswer(df1, Seq(Row(Row(Row(2))))) + checkAnswer(df2, Seq(Row(Row(2)))) + checkAnswer(df3, Seq(Row(2))) + } + + def checkAnswerUtil2(df1: DataFrame, df2: DataFrame, df3: DataFrame) { + checkAnswer(df1, Seq(Row(Row(Row(2))), Row(Row(Row(3))))) + checkAnswer(df2, Seq(Row(Row(2)), Row(Row(3)))) + checkAnswer(df3, Seq(Row(2), Row(3))) + } + + test("test alter rename struct of (primitive/struct/array)") { + sql("drop table if exists test_rename") + sql("CREATE TABLE test_rename (str1 struct<a:int>, str2 struct<a:struct<b:int>>, str3 " + + "struct<a:struct<b:struct<c:int>>>, intfield int) STORED AS carbondata") + sql("insert into test_rename values(named_struct('a', 2), " + + "named_struct('a', named_struct('b', 2)), named_struct('a', named_struct('b', " + + "named_struct('c', 2))), 1)") + + // rename parent column from str2 to str22 and read old rows + sql("alter table test_rename change str2 str22 struct<a:struct<b:int>>") + var df1 = sql("select str22 from test_rename") + var df2 = sql("select str22.a from test_rename") + var df3 = sql("select str22.a.b from test_rename") + assert(df1.collect().size == 1 && df2.collect().size == 1 && df3.collect().size == 1) + checkAnswerUtil1(df1, df2, df3) + + // rename child column from a to a11 + sql("alter table test_rename change str22 str22 struct<a11:struct<b:int>>") + df1 = sql("select str22 from test_rename") + df2 = sql("select str22.a11 from test_rename") + df3 = sql("select str22.a11.b from test_rename") + assert(df1.collect().size == 1 && df2.collect().size == 1 && df3.collect().size == 1) + checkAnswerUtil1(df1, df2, df3) + + // rename parent column from str22 to str33 + sql("alter table test_rename change str22 str33 struct<a11:struct<b:int>>") + df1 = sql("select str33 from test_rename") + df2 = sql("select str33.a11 from test_rename") + df3 = sql("select str33.a11.b from test_rename") + assert(df1.collect().size == 1 && df2.collect().size == 1 && df3.collect().size == 1) + checkAnswerUtil1(df1, df2, df3) + + // insert new rows + sql("insert into test_rename values(named_struct('a', 3), " + + "named_struct('a', named_struct('b', 3)), named_struct('a', named_struct('b', " + + "named_struct('c', 3))), 2)") + df1 = sql("select str33 from test_rename") + df2 = sql("select str33.a11 from test_rename") + df3 = sql("select str33.a11.b from test_rename") + assert(df1.collect().size == 2 && df2.collect().size == 2 && df3.collect().size == 2) + checkAnswerUtil2(df1, df2, df3) + + sql("alter table test_rename change str33 str33 struct<a11:struct<b11:int>>") + sql("alter table test_rename change str33 str33 struct<a22:struct<b11:int>>") + df1 = sql("select str33 from test_rename") + df2 = sql("select str33.a22 from test_rename") + df3 = sql("select str33.a22.b11 from test_rename") + assert(df1.collect().size == 2 && df2.collect().size == 2 && df3.collect().size == 2) + checkAnswerUtil2(df1, df2, df3) + + val desc = sql("desc table test_rename").collect() + assert(desc(0)(0).equals("str1")) + assert(desc(1)(0).equals("str33")) + assert(desc(1)(1).equals("struct<a22:struct<b11:int>>")) + assert(desc(2)(0).equals("str3")) + } + + test("test alter rename array of (primitive/array/struct)") { + sql("drop table if exists test_rename") + sql( + "CREATE TABLE test_rename (arr1 array<int>, arr2 array<array<int>>, arr3 array<string>, " + + "arr4 array<struct<a:int>>) STORED AS carbondata") + sql( + "insert into test_rename values (array(1,2,3), array(array(1,2),array(3,4)), array('hello'," + + "'world'), array(named_struct('a',45)))") + + sql("alter table test_rename change arr1 arr11 array<int>") Review comment: parent name is changed. We are using it to query right? ########## File path: docs/ddl-of-carbondata.md ########## @@ -819,33 +819,47 @@ Users can specify which columns to include and exclude for local dictionary gene - Invalid scenarios * Change of decimal precision from (10,2) to (10,5) is invalid as in this case only scale is increased but total number of digits remains the same. * Change the comment of the partition column + * Rename operation fails if the structure of the complex column has been altered. Please ensure the old and new columns are compatible with Review comment: rename and change column name are both the same and they are being navigated to the same description ########## File path: docs/ddl-of-carbondata.md ########## @@ -819,33 +819,47 @@ Users can specify which columns to include and exclude for local dictionary gene - Invalid scenarios * Change of decimal precision from (10,2) to (10,5) is invalid as in this case only scale is increased but total number of digits remains the same. * Change the comment of the partition column + * Rename operation fails if the structure of the complex column has been altered. Please ensure the old and new columns are compatible with Review comment: done -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
akkio-97 commented on a change in pull request #4129: URL: https://github.com/apache/carbondata/pull/4129#discussion_r645487350 ########## File path: docs/ddl-of-carbondata.md ########## @@ -805,7 +805,7 @@ Users can specify which columns to include and exclude for local dictionary gene 2. If a column to be dropped has any Secondary index table created on them, drop column operation fails and the user will be asked to drop the corresponding SI table first before going for actual drop. - - #### CHANGE COLUMN NAME/TYPE/COMMENT + - #### change-column-nametype Review comment: link was not directing to its description properly due to wrong name. This name was not chosen by me but was already there. Changing it to change-column-name-type-comment ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/schema/CarbonAlterTableColRenameDataTypeChangeCommand.scala ########## @@ -225,12 +264,46 @@ private[sql] case class CarbonAlterTableColRenameDataTypeChangeCommand( newColumnProperties.put(CarbonCommonConstants.COLUMN_COMMENT, newColumnComment.get) columnSchema.setColumnProperties(newColumnProperties) } - addColumnSchema = columnSchema + addedTableColumnSchema = columnSchema + } else if (isComplexChild(columnSchema)) { + if (alteredColumnNamesMap.contains(columnSchemaName)) { + // matches exactly + val newComplexChildName = alteredColumnNamesMap(columnSchemaName) + columnSchema.setColumn_name(newComplexChildName) + isSchemaEntryRequired = true + } else { + val alteredParent = checkIfParentIsAltered(columnSchemaName) + /* + * Lets say, if complex schema is: str struct<a: int> + * and if parent column is changed from str -> str2 + * then its child name should also be changed from str.a -> str2.a + */ + if (alteredParent != null) { + val newParent = alteredColumnNamesMap(alteredParent) + val newComplexChildName = newParent + columnSchemaName + .split(alteredParent)(1) + columnSchema.setColumn_name(newComplexChildName) + isSchemaEntryRequired = true + } + } + } + // validate duplicate child columns + if (uniqueColumnSet.contains(columnSchema.getColumn_name)) { + throw new UnsupportedOperationException("Duplicate columns are present") + } + + // make a new schema evolution entry after column rename or datatype change + if (isSchemaEntryRequired) { + addedColumnsList ++= List(columnSchema) + deletedColumnsList ++= List(deletedColumnSchema) timeStamp = System.currentTimeMillis() - // make a new schema evolution entry after column rename or datatype change - schemaEvolutionEntry = AlterTableUtil - .addNewSchemaEvolutionEntry(timeStamp, addColumnSchema, deletedColumnSchema) + schemaEvolutionEntry = AlterTableUtil.addNewSchemaEvolutionEntry(schemaEvolutionEntry, Review comment: Done. yes I have changed the API to consider altered column names of both children and parent. Sending only added and deleted list now in parameters. And have retained the null check ########## File path: integration/spark/src/main/scala/org/apache/spark/util/AlterTableUtil.scala ########## @@ -357,18 +358,30 @@ object AlterTableUtil { /** * This method create a new SchemaEvolutionEntry and adds to SchemaEvolutionEntry List * - * @param addColumnSchema added new column schema - * @param deletedColumnSchema old column schema which is deleted + * @param addColumnSchema added new column schema + * @param deletedColumnSchema old column schema which is deleted + * @param addedColumnsList list of added column schemas + * @param deletedColumnsList list of deleted column schemas * @return */ def addNewSchemaEvolutionEntry( - timeStamp: Long, + schemaEvolutionEntry: SchemaEvolutionEntry, addColumnSchema: org.apache.carbondata.format.ColumnSchema, - deletedColumnSchema: org.apache.carbondata.format.ColumnSchema): SchemaEvolutionEntry = { - val schemaEvolutionEntry = new SchemaEvolutionEntry(timeStamp) - schemaEvolutionEntry.setAdded(List(addColumnSchema).asJava) - schemaEvolutionEntry.setRemoved(List(deletedColumnSchema).asJava) - schemaEvolutionEntry + deletedColumnSchema: org.apache.carbondata.format.ColumnSchema, Review comment: done ########## File path: integration/spark/src/main/scala/org/apache/spark/util/AlterTableUtil.scala ########## @@ -1063,4 +1076,106 @@ object AlterTableUtil { } } } + + /** + * This method checks the structure of the old and new complex columns, and- + * 1. throws exception if the number of complex-levels in both columns does not match + * 2. throws exception if the number of children of both columns does not match + * 3. creates alteredColumnNamesMap: new_column_name -> datatype. Here new_column_name are those + * names of the columns that are altered. + * These maps will later be used while altering the table schema + */ + def validateComplexStructure(oldDimensionList: List[CarbonDimension], + newDimensionList: List[DataTypeInfo], + alteredColumnNamesMap: mutable.LinkedHashMap[String, String]): Unit = { + if (oldDimensionList == null && newDimensionList == null) { + throw new UnsupportedOperationException("Both old and new dimensions are null") + } else if (oldDimensionList == null || newDimensionList == null) { + throw new UnsupportedOperationException("Either the old or the new dimension is null") + } else if (oldDimensionList.size != newDimensionList.size) { + throw new UnsupportedOperationException( + "Number of children of old and new complex columns are not the same") + } else { + for ((newDimensionInfo, i) <- newDimensionList.zipWithIndex) { + val oldDimensionInfo = oldDimensionList(i) + val old_column_name = oldDimensionInfo.getColName.split('.').last Review comment: Done. There is no constant for character dot. So I created one ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/schema/CarbonAlterTableColRenameDataTypeChangeCommand.scala ########## @@ -180,33 +200,50 @@ private[sql] case class CarbonAlterTableColRenameDataTypeChangeCommand( } if (alterTableColRenameAndDataTypeChangeModel.isColumnRename) { // validate the columns to be renamed - validColumnsForRenaming(carbonColumns, oldCarbonColumn.head, carbonTable) + validColumnsForRenaming(carbonColumns, carbonTable) } if (isDataTypeChange) { // validate the columns to change datatype - validateColumnDataType(alterTableColRenameAndDataTypeChangeModel.dataTypeInfo, + AlterTableUtil.validateColumnDataType(alterTableColRenameAndDataTypeChangeModel + .dataTypeInfo, oldCarbonColumn.head) } // read the latest schema file val tableInfo: TableInfo = metaStore.getThriftTableInfo(carbonTable) // maintain the added column for schema evolution history - var addColumnSchema: ColumnSchema = null + var addedTableColumnSchema: ColumnSchema = null var deletedColumnSchema: ColumnSchema = null var schemaEvolutionEntry: SchemaEvolutionEntry = null + var addedColumnsList: List[ColumnSchema] = List.empty[ColumnSchema] + var deletedColumnsList: List[ColumnSchema] = List.empty[ColumnSchema] val columnSchemaList = tableInfo.fact_table.table_columns.asScala.filter(!_.isInvisible) + // to validate duplicate children columns + var uniqueColumnSet: mutable.Set[String] = mutable.Set.empty + /* + * columnSchemaList is a flat structure containing all column schemas including both parent + * and child. + * It is iterated and rename/change-datatype update are made in this list itself. + * Entry is made to the schemaEvolutionEntry for each of the update. + */ columnSchemaList.foreach { columnSchema => - if (columnSchema.column_name.equalsIgnoreCase(oldColumnName)) { - deletedColumnSchema = columnSchema.deepCopy() - if (alterTableColRenameAndDataTypeChangeModel.isColumnRename) { - // if only column rename, just get the column schema and rename, make a - // schemaEvolutionEntry + val columnSchemaName = columnSchema.column_name Review comment: done ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/schema/CarbonAlterTableColRenameDataTypeChangeCommand.scala ########## @@ -180,33 +200,50 @@ private[sql] case class CarbonAlterTableColRenameDataTypeChangeCommand( } if (alterTableColRenameAndDataTypeChangeModel.isColumnRename) { // validate the columns to be renamed - validColumnsForRenaming(carbonColumns, oldCarbonColumn.head, carbonTable) + validColumnsForRenaming(carbonColumns, carbonTable) } if (isDataTypeChange) { // validate the columns to change datatype - validateColumnDataType(alterTableColRenameAndDataTypeChangeModel.dataTypeInfo, + AlterTableUtil.validateColumnDataType(alterTableColRenameAndDataTypeChangeModel + .dataTypeInfo, oldCarbonColumn.head) } // read the latest schema file val tableInfo: TableInfo = metaStore.getThriftTableInfo(carbonTable) // maintain the added column for schema evolution history - var addColumnSchema: ColumnSchema = null + var addedTableColumnSchema: ColumnSchema = null var deletedColumnSchema: ColumnSchema = null var schemaEvolutionEntry: SchemaEvolutionEntry = null + var addedColumnsList: List[ColumnSchema] = List.empty[ColumnSchema] + var deletedColumnsList: List[ColumnSchema] = List.empty[ColumnSchema] val columnSchemaList = tableInfo.fact_table.table_columns.asScala.filter(!_.isInvisible) + // to validate duplicate children columns + var uniqueColumnSet: mutable.Set[String] = mutable.Set.empty + /* + * columnSchemaList is a flat structure containing all column schemas including both parent + * and child. + * It is iterated and rename/change-datatype update are made in this list itself. + * Entry is made to the schemaEvolutionEntry for each of the update. + */ columnSchemaList.foreach { columnSchema => - if (columnSchema.column_name.equalsIgnoreCase(oldColumnName)) { - deletedColumnSchema = columnSchema.deepCopy() - if (alterTableColRenameAndDataTypeChangeModel.isColumnRename) { - // if only column rename, just get the column schema and rename, make a - // schemaEvolutionEntry + val columnSchemaName = columnSchema.column_name + val isTableColumn = columnSchemaName.equalsIgnoreCase(oldColumnName) Review comment: changed it to isTableColumnAltered -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
akkio-97 commented on a change in pull request #4129: URL: https://github.com/apache/carbondata/pull/4129#discussion_r645487697 ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/schema/CarbonAlterTableColRenameDataTypeChangeCommand.scala ########## @@ -279,6 +352,24 @@ private[sql] case class CarbonAlterTableColRenameDataTypeChangeCommand( Seq.empty } + private def isComplexChild(columnSchema: ColumnSchema): Boolean = { + columnSchema.column_name.contains(CarbonCommonConstants.POINT) + } + + private def isChildOfColumn(columnSchemaName: String, oldColumnName: String): Boolean = { Review comment: done -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
CarbonDataQA2 commented on pull request #4129: URL: https://github.com/apache/carbondata/pull/4129#issuecomment-854688244 Build Failed with Spark 2.3.4, Please check CI http://121.244.95.60:12602/job/ApacheCarbonPRBuilder2.3/5502/ -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
CarbonDataQA2 commented on pull request #4129: URL: https://github.com/apache/carbondata/pull/4129#issuecomment-854689978 Build Success with Spark 2.4.5, Please check CI http://121.244.95.60:12602/job/ApacheCarbon_PR_Builder_2.4.5/3759/ -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
akashrn5 commented on a change in pull request #4129: URL: https://github.com/apache/carbondata/pull/4129#discussion_r645573633 ########## File path: core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java ########## @@ -1898,6 +1898,12 @@ private CarbonCommonConstants() { */ public static final String POINT = "."; + /** + * POINT as a character + */ + public static final char CHAR_POINT = '.'; Review comment: please remove this, no need to add as char, use the existing one `POINT` ########## File path: docs/ddl-of-carbondata.md ########## @@ -805,7 +805,7 @@ Users can specify which columns to include and exclude for local dictionary gene 2. If a column to be dropped has any Secondary index table created on them, drop column operation fails and the user will be asked to drop the corresponding SI table first before going for actual drop. - - #### CHANGE COLUMN NAME/TYPE/COMMENT + - #### change-column-name-type-comment Review comment: please make it capital letters like before and please check if link is working and also do not give the - in between words, please refer `SET/UNSET` section and follow the same here ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/schema/CarbonAlterTableColRenameDataTypeChangeCommand.scala ########## @@ -34,31 +34,26 @@ import org.apache.carbondata.core.constants.CarbonCommonConstants import org.apache.carbondata.core.features.TableOperation import org.apache.carbondata.core.locks.{ICarbonLock, LockUsage} import org.apache.carbondata.core.metadata.converter.ThriftWrapperSchemaConverterImpl -import org.apache.carbondata.core.metadata.datatype.DecimalType +import org.apache.carbondata.core.metadata.datatype.{DataTypes, DecimalType} import org.apache.carbondata.core.metadata.schema.table.CarbonTable -import org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn +import org.apache.carbondata.core.metadata.schema.table.column.{CarbonColumn, CarbonDimension} import org.apache.carbondata.events.{AlterTableColRenameAndDataTypeChangePostEvent, AlterTableColRenameAndDataTypeChangePreEvent, OperationContext, OperationListenerBus} import org.apache.carbondata.format.{ColumnSchema, SchemaEvolutionEntry, TableInfo} import org.apache.carbondata.spark.util.DataTypeConverterUtil abstract class CarbonAlterTableColumnRenameCommand(oldColumnName: String, newColumnName: String) extends MetadataCommand { - protected def validColumnsForRenaming(carbonColumns: mutable.Buffer[CarbonColumn], - oldCarbonColumn: CarbonColumn, + protected def validColumnsForRenaming(columnSchemaList: mutable.Buffer[ColumnSchema], + alteredColumnNamesMap: mutable.LinkedHashMap[String, String], carbonTable: CarbonTable): Unit = { // check whether new column name is already an existing column name - if (carbonColumns.exists(_.getColName.equalsIgnoreCase(newColumnName))) { - throw new MalformedCarbonCommandException(s"Column Rename Operation failed. New " + - s"column name $newColumnName already exists" + - s" in table ${ carbonTable.getTableName }") - } - - // if the column rename is for complex column, block the operation - if (oldCarbonColumn.isComplex) { - throw new MalformedCarbonCommandException(s"Column Rename Operation failed. Rename " + - s"column is unsupported for complex datatype " + - s"column ${ oldCarbonColumn.getColName }") + for ((oldColumnName, newColumnName) <- alteredColumnNamesMap) { + if (columnSchemaList.exists(_.getColumn_name.equalsIgnoreCase(newColumnName))) { + throw new MalformedCarbonCommandException(s"Column Rename Operation failed. New " + + s"column name $newColumnName already exists" + + s" in table ${ carbonTable.getTableName }") + } Review comment: please do not use standard for loop, use in a functional way to check and throw exception. ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/schema/CarbonAlterTableColRenameDataTypeChangeCommand.scala ########## @@ -69,12 +64,11 @@ abstract class CarbonAlterTableColumnRenameCommand(oldColumnName: String, newCol if (col.getColumnName.equalsIgnoreCase(oldColumnName)) { throw new MalformedCarbonCommandException( s"Column Rename Operation failed. Renaming " + - s"the bucket column $oldColumnName is not " + - s"allowed") + s"the bucket column $oldColumnName is not " + + s"allowed") } } } - Review comment: remove this ########## File path: docs/ddl-of-carbondata.md ########## @@ -866,6 +880,7 @@ Users can specify which columns to include and exclude for local dictionary gene **NOTE:** * Merge index is supported on streaming table from carbondata 2.0.1 version. But streaming segments (ROW_V1) cannot create merge index. + * Rename column name is only supported for complex columns including array and struct. Review comment: ```suggestion * Rename column name is not supported for MAP type. ``` ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/schema/CarbonAlterTableColRenameDataTypeChangeCommand.scala ########## @@ -69,12 +64,11 @@ abstract class CarbonAlterTableColumnRenameCommand(oldColumnName: String, newCol if (col.getColumnName.equalsIgnoreCase(oldColumnName)) { throw new MalformedCarbonCommandException( s"Column Rename Operation failed. Renaming " + - s"the bucket column $oldColumnName is not " + - s"allowed") + s"the bucket column $oldColumnName is not " + Review comment: revert unnecessary change ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/schema/CarbonAlterTableColRenameDataTypeChangeCommand.scala ########## @@ -143,27 +143,49 @@ private[sql] case class CarbonAlterTableColRenameDataTypeChangeCommand( val newColumnPrecision = alterTableColRenameAndDataTypeChangeModel.dataTypeInfo.precision val newColumnScale = alterTableColRenameAndDataTypeChangeModel.dataTypeInfo.scale // set isDataTypeChange flag - if (oldCarbonColumn.head.getDataType.getName - .equalsIgnoreCase(alterTableColRenameAndDataTypeChangeModel.dataTypeInfo.dataType)) { + val oldDatatype = oldCarbonColumn.head.getDataType + val newDatatype = alterTableColRenameAndDataTypeChangeModel.dataTypeInfo.dataType + if (isColumnRename && (DataTypes.isMapType(oldDatatype) || + newDatatype.equalsIgnoreCase(CarbonCommonConstants.MAP))) { + throw new UnsupportedOperationException( + "Alter rename is unsupported for Map datatype column") + } + if (oldDatatype.getName.equalsIgnoreCase(newDatatype)) { val newColumnPrecision = alterTableColRenameAndDataTypeChangeModel.dataTypeInfo.precision val newColumnScale = alterTableColRenameAndDataTypeChangeModel.dataTypeInfo.scale // if the source datatype is decimal and there is change in precision and scale, then // along with rename, datatype change is also required for the command, so set the // isDataTypeChange flag to true in this case - if (oldCarbonColumn.head.getDataType.getName.equalsIgnoreCase("decimal") && - (oldCarbonColumn.head.getDataType.asInstanceOf[DecimalType].getPrecision != + if (DataTypes.isDecimal(oldDatatype) && + (oldDatatype.asInstanceOf[DecimalType].getPrecision != newColumnPrecision || - oldCarbonColumn.head.getDataType.asInstanceOf[DecimalType].getScale != + oldDatatype.asInstanceOf[DecimalType].getScale != newColumnScale)) { isDataTypeChange = true } + if (DataTypes.isArrayType(oldDatatype) || DataTypes.isStructType(oldDatatype)) { + val oldParent = oldCarbonColumn.head + val oldChildren = oldParent.asInstanceOf[CarbonDimension].getListOfChildDimensions.asScala + .toList + AlterTableUtil.validateComplexStructure(oldChildren, + alterTableColRenameAndDataTypeChangeModel.dataTypeInfo.getChildren(), + alteredColumnNamesMap) + } } else { + if (oldDatatype.isComplexType || + newDatatype.equalsIgnoreCase(CarbonCommonConstants.ARRAY) || + newDatatype.equalsIgnoreCase(CarbonCommonConstants.STRUCT)) { + throw new UnsupportedOperationException( + "Old and new complex columns are not compatible in structure") Review comment: `oldDatatype.isComplexType` only required, other conditions u can remove ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/catalyst/CarbonParserUtil.scala ########## @@ -1088,15 +1089,15 @@ object CarbonParserUtil { private def appendParentForEachChild(field: Field, parentName: String): Field = { field.dataType.getOrElse("NIL") match { case "Array" | "Struct" | "Map" => - val newChildren = field.children - .map(_.map(appendParentForEachChild(_, parentName + "." + field.column))) - field.copy(column = parentName + "." + field.column, - name = Some(parentName + "." + field.name.getOrElse(None)), + val newChildren = field.children.map(_.map(appendParentForEachChild(_, + parentName + CarbonCommonConstants.POINT + field.column))) + field.copy(column = parentName + CarbonCommonConstants.POINT + field.column, + name = Some(parentName + CarbonCommonConstants.POINT + field.name.getOrElse(None)), Review comment: please correct the style once, (Ctrl + Shift + Alt + L) ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/schema/CarbonAlterTableColRenameDataTypeChangeCommand.scala ########## @@ -143,27 +143,49 @@ private[sql] case class CarbonAlterTableColRenameDataTypeChangeCommand( val newColumnPrecision = alterTableColRenameAndDataTypeChangeModel.dataTypeInfo.precision val newColumnScale = alterTableColRenameAndDataTypeChangeModel.dataTypeInfo.scale // set isDataTypeChange flag - if (oldCarbonColumn.head.getDataType.getName - .equalsIgnoreCase(alterTableColRenameAndDataTypeChangeModel.dataTypeInfo.dataType)) { + val oldDatatype = oldCarbonColumn.head.getDataType + val newDatatype = alterTableColRenameAndDataTypeChangeModel.dataTypeInfo.dataType + if (isColumnRename && (DataTypes.isMapType(oldDatatype) || + newDatatype.equalsIgnoreCase(CarbonCommonConstants.MAP))) { + throw new UnsupportedOperationException( + "Alter rename is unsupported for Map datatype column") Review comment: please check only for maptype and throw exception at the beginning only. Always make sure to make validations before making any other operation to avoid unnecessary code executions ########## File path: integration/spark/src/main/scala/org/apache/spark/util/AlterTableUtil.scala ########## @@ -1063,4 +1069,108 @@ object AlterTableUtil { } } } + + /** + * This method checks the structure of the old and new complex columns, and- + * 1. throws exception if the number of complex-levels in both columns does not match + * 2. throws exception if the number of children of both columns does not match + * 3. creates alteredColumnNamesMap: new_column_name -> datatype. Here new_column_name are those + * names of the columns that are altered. + * These maps will later be used while altering the table schema + */ + def validateComplexStructure(oldDimensionList: List[CarbonDimension], + newDimensionList: List[DataTypeInfo], + alteredColumnNamesMap: mutable.LinkedHashMap[String, String]): Unit = { + if (oldDimensionList == null && newDimensionList == null) { + throw new UnsupportedOperationException("Both old and new dimensions are null") + } else if (oldDimensionList == null || newDimensionList == null) { + throw new UnsupportedOperationException("Either the old or the new dimension is null") + } else if (oldDimensionList.size != newDimensionList.size) { + throw new UnsupportedOperationException( + "Number of children of old and new complex columns are not the same") + } else { + for ((newDimensionInfo, i) <- newDimensionList.zipWithIndex) { + val oldDimensionInfo = oldDimensionList(i) + val old_column_name = oldDimensionInfo.getColName.split(CarbonCommonConstants.CHAR_POINT). + last + val old_column_datatype = oldDimensionInfo.getDataType.getName + val new_column_name = newDimensionInfo.columnName.split(CarbonCommonConstants.CHAR_POINT). + last + val new_column_datatype = newDimensionInfo.dataType + if (!old_column_datatype.equalsIgnoreCase(new_column_datatype)) { + /** + * datatypes of complex children cannot be altered. So throwing exception for now. + * TODO: use alteredColumnDatatypesMap to update the carbon schema + */ + throw new UnsupportedOperationException( Review comment: please use `//` for comment ########## File path: integration/spark/src/main/scala/org/apache/spark/util/AlterTableUtil.scala ########## @@ -357,18 +358,23 @@ object AlterTableUtil { /** * This method create a new SchemaEvolutionEntry and adds to SchemaEvolutionEntry List * - * @param addColumnSchema added new column schema - * @param deletedColumnSchema old column schema which is deleted + * @param addedColumnsList list of added column schemas + * @param deletedColumnsList list of deleted column schemas * @return */ def addNewSchemaEvolutionEntry( - timeStamp: Long, - addColumnSchema: org.apache.carbondata.format.ColumnSchema, - deletedColumnSchema: org.apache.carbondata.format.ColumnSchema): SchemaEvolutionEntry = { - val schemaEvolutionEntry = new SchemaEvolutionEntry(timeStamp) - schemaEvolutionEntry.setAdded(List(addColumnSchema).asJava) - schemaEvolutionEntry.setRemoved(List(deletedColumnSchema).asJava) - schemaEvolutionEntry + schemaEvolutionEntry: SchemaEvolutionEntry, + addedColumnsList: List[org.apache.carbondata.format.ColumnSchema], + deletedColumnsList: List[org.apache.carbondata.format.ColumnSchema]): SchemaEvolutionEntry = { + val timeStamp = System.currentTimeMillis() + var newSchemaEvolutionEntry = schemaEvolutionEntry; + if (newSchemaEvolutionEntry == null) { + newSchemaEvolutionEntry = new SchemaEvolutionEntry(timeStamp) + } + newSchemaEvolutionEntry.setTime_stamp(timeStamp) Review comment: dont change the timestamp, add to entry only once and change the if condition at line 371, don't override the variable ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/schema/CarbonAlterTableColRenameDataTypeChangeCommand.scala ########## @@ -178,35 +200,51 @@ private[sql] case class CarbonAlterTableColRenameDataTypeChangeCommand( } } } - if (alterTableColRenameAndDataTypeChangeModel.isColumnRename) { + + // read the latest schema file + val tableInfo: TableInfo = metaStore.getThriftTableInfo(carbonTable) + val columnSchemaList = tableInfo.fact_table.table_columns.asScala.filter(!_.isInvisible) + if (!alteredColumnNamesMap.isEmpty) { // validate the columns to be renamed - validColumnsForRenaming(carbonColumns, oldCarbonColumn.head, carbonTable) + validColumnsForRenaming(columnSchemaList, alteredColumnNamesMap, carbonTable) Review comment: do not use thrift object here, please get the list of columns from carbon table object -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
akkio-97 commented on a change in pull request #4129: URL: https://github.com/apache/carbondata/pull/4129#discussion_r645697097 ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/schema/CarbonAlterTableColRenameDataTypeChangeCommand.scala ########## @@ -143,27 +143,49 @@ private[sql] case class CarbonAlterTableColRenameDataTypeChangeCommand( val newColumnPrecision = alterTableColRenameAndDataTypeChangeModel.dataTypeInfo.precision val newColumnScale = alterTableColRenameAndDataTypeChangeModel.dataTypeInfo.scale // set isDataTypeChange flag - if (oldCarbonColumn.head.getDataType.getName - .equalsIgnoreCase(alterTableColRenameAndDataTypeChangeModel.dataTypeInfo.dataType)) { + val oldDatatype = oldCarbonColumn.head.getDataType + val newDatatype = alterTableColRenameAndDataTypeChangeModel.dataTypeInfo.dataType + if (isColumnRename && (DataTypes.isMapType(oldDatatype) || + newDatatype.equalsIgnoreCase(CarbonCommonConstants.MAP))) { + throw new UnsupportedOperationException( + "Alter rename is unsupported for Map datatype column") + } + if (oldDatatype.getName.equalsIgnoreCase(newDatatype)) { val newColumnPrecision = alterTableColRenameAndDataTypeChangeModel.dataTypeInfo.precision val newColumnScale = alterTableColRenameAndDataTypeChangeModel.dataTypeInfo.scale // if the source datatype is decimal and there is change in precision and scale, then // along with rename, datatype change is also required for the command, so set the // isDataTypeChange flag to true in this case - if (oldCarbonColumn.head.getDataType.getName.equalsIgnoreCase("decimal") && - (oldCarbonColumn.head.getDataType.asInstanceOf[DecimalType].getPrecision != + if (DataTypes.isDecimal(oldDatatype) && + (oldDatatype.asInstanceOf[DecimalType].getPrecision != newColumnPrecision || - oldCarbonColumn.head.getDataType.asInstanceOf[DecimalType].getScale != + oldDatatype.asInstanceOf[DecimalType].getScale != newColumnScale)) { isDataTypeChange = true } + if (DataTypes.isArrayType(oldDatatype) || DataTypes.isStructType(oldDatatype)) { + val oldParent = oldCarbonColumn.head + val oldChildren = oldParent.asInstanceOf[CarbonDimension].getListOfChildDimensions.asScala + .toList + AlterTableUtil.validateComplexStructure(oldChildren, + alterTableColRenameAndDataTypeChangeModel.dataTypeInfo.getChildren(), + alteredColumnNamesMap) + } } else { + if (oldDatatype.isComplexType || + newDatatype.equalsIgnoreCase(CarbonCommonConstants.ARRAY) || + newDatatype.equalsIgnoreCase(CarbonCommonConstants.STRUCT)) { + throw new UnsupportedOperationException( + "Old and new complex columns are not compatible in structure") Review comment: done ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/schema/CarbonAlterTableColRenameDataTypeChangeCommand.scala ########## @@ -143,27 +143,49 @@ private[sql] case class CarbonAlterTableColRenameDataTypeChangeCommand( val newColumnPrecision = alterTableColRenameAndDataTypeChangeModel.dataTypeInfo.precision val newColumnScale = alterTableColRenameAndDataTypeChangeModel.dataTypeInfo.scale // set isDataTypeChange flag - if (oldCarbonColumn.head.getDataType.getName - .equalsIgnoreCase(alterTableColRenameAndDataTypeChangeModel.dataTypeInfo.dataType)) { + val oldDatatype = oldCarbonColumn.head.getDataType + val newDatatype = alterTableColRenameAndDataTypeChangeModel.dataTypeInfo.dataType + if (isColumnRename && (DataTypes.isMapType(oldDatatype) || + newDatatype.equalsIgnoreCase(CarbonCommonConstants.MAP))) { + throw new UnsupportedOperationException( + "Alter rename is unsupported for Map datatype column") Review comment: check should be on isColumnRename too because, comments can be changed in case of maps. And it is dependent on oldCarbonColumn, which is instantiated right above it. So I think exception is thrown at the right place. You can suggest a place if required. ########## File path: integration/spark/src/main/scala/org/apache/spark/util/AlterTableUtil.scala ########## @@ -357,18 +358,23 @@ object AlterTableUtil { /** * This method create a new SchemaEvolutionEntry and adds to SchemaEvolutionEntry List * - * @param addColumnSchema added new column schema - * @param deletedColumnSchema old column schema which is deleted + * @param addedColumnsList list of added column schemas + * @param deletedColumnsList list of deleted column schemas * @return */ def addNewSchemaEvolutionEntry( - timeStamp: Long, - addColumnSchema: org.apache.carbondata.format.ColumnSchema, - deletedColumnSchema: org.apache.carbondata.format.ColumnSchema): SchemaEvolutionEntry = { - val schemaEvolutionEntry = new SchemaEvolutionEntry(timeStamp) - schemaEvolutionEntry.setAdded(List(addColumnSchema).asJava) - schemaEvolutionEntry.setRemoved(List(deletedColumnSchema).asJava) - schemaEvolutionEntry + schemaEvolutionEntry: SchemaEvolutionEntry, + addedColumnsList: List[org.apache.carbondata.format.ColumnSchema], + deletedColumnsList: List[org.apache.carbondata.format.ColumnSchema]): SchemaEvolutionEntry = { + val timeStamp = System.currentTimeMillis() + var newSchemaEvolutionEntry = schemaEvolutionEntry; + if (newSchemaEvolutionEntry == null) { + newSchemaEvolutionEntry = new SchemaEvolutionEntry(timeStamp) + } + newSchemaEvolutionEntry.setTime_stamp(timeStamp) Review comment: done ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/schema/CarbonAlterTableColRenameDataTypeChangeCommand.scala ########## @@ -178,35 +200,51 @@ private[sql] case class CarbonAlterTableColRenameDataTypeChangeCommand( } } } - if (alterTableColRenameAndDataTypeChangeModel.isColumnRename) { + + // read the latest schema file + val tableInfo: TableInfo = metaStore.getThriftTableInfo(carbonTable) + val columnSchemaList = tableInfo.fact_table.table_columns.asScala.filter(!_.isInvisible) + if (!alteredColumnNamesMap.isEmpty) { // validate the columns to be renamed - validColumnsForRenaming(carbonColumns, oldCarbonColumn.head, carbonTable) + validColumnsForRenaming(columnSchemaList, alteredColumnNamesMap, carbonTable) Review comment: done -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
akkio-97 commented on a change in pull request #4129: URL: https://github.com/apache/carbondata/pull/4129#discussion_r645731693 ########## File path: docs/ddl-of-carbondata.md ########## @@ -805,7 +805,7 @@ Users can specify which columns to include and exclude for local dictionary gene 2. If a column to be dropped has any Secondary index table created on them, drop column operation fails and the user will be asked to drop the corresponding SI table first before going for actual drop. - - #### CHANGE COLUMN NAME/TYPE/COMMENT + - #### change-column-name-type-comment Review comment: Done ########## File path: core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java ########## @@ -1898,6 +1898,12 @@ private CarbonCommonConstants() { */ public static final String POINT = "."; + /** + * POINT as a character + */ + public static final char CHAR_POINT = '.'; Review comment: used it in split method. Split in scala uses regex and not string. I now used POINT only and converted to character -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
akkio-97 commented on a change in pull request #4129: URL: https://github.com/apache/carbondata/pull/4129#discussion_r645731717 ########## File path: core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java ########## @@ -1898,6 +1898,12 @@ private CarbonCommonConstants() { */ public static final String POINT = "."; + /** + * POINT as a character + */ + public static final char CHAR_POINT = '.'; Review comment: used it in split method. Split in scala uses regex and not string. I now used string POINT only and converted to character -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
CarbonDataQA2 commented on pull request #4129: URL: https://github.com/apache/carbondata/pull/4129#issuecomment-854941481 Build Success with Spark 2.4.5, Please check CI http://121.244.95.60:12602/job/ApacheCarbon_PR_Builder_2.4.5/3765/ -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
CarbonDataQA2 commented on pull request #4129: URL: https://github.com/apache/carbondata/pull/4129#issuecomment-854942269 Build Failed with Spark 2.3.4, Please check CI http://121.244.95.60:12602/job/ApacheCarbonPRBuilder2.3/5508/ -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
Free forum by Nabble | Edit this page |