Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/1583 Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/1726/ --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1583#discussion_r154983469 --- Diff: integration/spark-common/src/main/scala/org/apache/carbondata/events/RefreshTableEvents.scala --- @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.events + +import org.apache.spark.sql._ + +import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier + +/** + * Class for handling operations before start of a load process. --- End diff -- There already event to load process --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1583#discussion_r154983865 --- Diff: integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala --- @@ -1044,48 +1045,48 @@ abstract class CarbonDDLSqlParser extends AbstractCarbonSparkSQLParser { dataType match { case "string" => Field(field.column, Some("String"), field.name, Some(null), field.parent, - field.storeType, field.schemaOrdinal, field.precision, field.scale, field.rawSchema - ) + field.storeType, field.schemaOrdinal, field.precision, field.scale, field.rawSchema, + field.columnComment) case "smallint" => Field(field.column, Some("SmallInt"), field.name, Some(null), field.parent, field.storeType, field.schemaOrdinal, - field.precision, field.scale, field.rawSchema) + field.precision, field.scale, field.rawSchema, field.columnComment) case "integer" | "int" => Field(field.column, Some("Integer"), field.name, Some(null), field.parent, field.storeType, field.schemaOrdinal, - field.precision, field.scale, field.rawSchema) + field.precision, field.scale, field.rawSchema, field.columnComment) case "long" => Field(field.column, Some("Long"), field.name, Some(null), field.parent, - field.storeType, field.schemaOrdinal, field.precision, field.scale, field.rawSchema - ) + field.storeType, field.schemaOrdinal, field.precision, field.scale, field.rawSchema, + field.columnComment) case "double" => Field(field.column, Some("Double"), field.name, Some(null), field.parent, - field.storeType, field.schemaOrdinal, field.precision, field.scale, field.rawSchema - ) + field.storeType, field.schemaOrdinal, field.precision, field.scale, field.rawSchema, + field.columnComment) --- End diff -- Is this change for this PR? If not for this PR, please raise another PR for it --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1583#discussion_r154984011 --- Diff: integration/spark-common/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchemaCommon.scala --- @@ -47,6 +47,7 @@ import org.apache.carbondata.spark.util.DataTypeConverterUtil case class TableModel( ifNotExistsSet: Boolean, + var databaseName: String, --- End diff -- There is already a databaseNameOp in line 51 --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1583#discussion_r154984440 --- Diff: integration/spark-common/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchemaCommon.scala --- @@ -320,13 +322,8 @@ class AlterTableColumnSchemaGenerator( // TODO: move this to carbon store API object TableNewProcessor { def apply( - cm: TableModel, - identifier: AbsoluteTableIdentifier): TableInfo = { - new TableNewProcessor( - cm, - identifier.getDatabaseName, - identifier.getTableName, - identifier.getTablePath).process + cm: TableModel): TableInfo = { --- End diff -- Do not change this, otherwise tablePath will be lost --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1583#discussion_r154984765 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/RefreshCarbonTableCommand.scala --- @@ -0,0 +1,213 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.command.management + +import java.util + +import scala.collection.JavaConverters._ + +import org.apache.spark.sql._ +import org.apache.spark.sql.execution.command.MetadataCommand +import org.apache.spark.sql.execution.command.table.CarbonCreateTableCommand +import org.apache.spark.sql.util.CarbonException + +import org.apache.carbondata.common.logging.{LogService, LogServiceFactory} +import org.apache.carbondata.core.constants.CarbonCommonConstants +import org.apache.carbondata.core.datastore.impl.FileFactory +import org.apache.carbondata.core.metadata.{AbsoluteTableIdentifier, CarbonTableIdentifier} +import org.apache.carbondata.core.metadata.schema.table.{DataMapSchema, TableInfo} +import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema +import org.apache.carbondata.core.util.CarbonProperties +import org.apache.carbondata.core.util.path.CarbonStorePath +import org.apache.carbondata.events.{OperationContext, OperationListenerBus, RefreshTablePostExecutionEvent, RefreshTablePreExecutionEvent} + +/** + * Command to register carbon table from existing carbon table data + */ +case class RefreshCarbonTableCommand( + dbName: Option[String], + tableName: String) + extends MetadataCommand { + val LOGGER: LogService = + LogServiceFactory.getLogService(this.getClass.getName) + + override def processMetadata(sparkSession: SparkSession): Seq[Row] = { + val metaStore = CarbonEnv.getInstance(sparkSession).carbonMetastore + val databaseName = CarbonEnv.getDatabaseName(dbName)(sparkSession) + val databaseLocation = CarbonEnv.getDatabaseLocation(databaseName, sparkSession) + // Steps + // 1. get table path + // 2. perform the below steps + // 2.1 check if the table already register with hive then ignore and continue with the next + // schema + // 2.2 register the table with the hive check if the table being registered has aggregate table + // then do the below steps + // 2.2.1 validate that all the aggregate tables are copied at the store location. + // 2.2.2 Register the aggregate tables + val tablePath = databaseLocation + CarbonCommonConstants.FILE_SEPARATOR + tableName --- End diff -- use `CarbonEnv.getTablePath` to get the path --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1583#discussion_r154986226 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonCreateTableCommand.scala --- @@ -29,25 +29,34 @@ import org.apache.carbondata.common.logging.LogServiceFactory import org.apache.carbondata.core.constants.CarbonCommonConstants import org.apache.carbondata.core.exception.InvalidConfigurationException import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier -import org.apache.carbondata.core.metadata.schema.table.TableInfo +import org.apache.carbondata.core.metadata.schema.table.{CarbonTable, TableInfo} import org.apache.carbondata.core.util.CarbonUtil import org.apache.carbondata.events.{CreateTablePostExecutionEvent, CreateTablePreExecutionEvent, OperationContext, OperationListenerBus} +import org.apache.carbondata.spark.util.CarbonSparkUtil case class CarbonCreateTableCommand( - cm: TableModel, + tableInfo: TableInfo, --- End diff -- I do not think you can create the TableInfo in parser, since it does not have the sparkSession, so the dbName maybe wrong. You can need to use `CarbonEnv.getDatabaseName` to get the dbName, it requires sparkSession --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1583#discussion_r154986380 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonCreateTableCommand.scala --- @@ -81,15 +90,13 @@ case class CarbonCreateTableCommand( val carbonSchemaString = catalog.generateTableSchemaString(tableInfo, tableIdentifier) if (createDSTable) { try { - val fields = new Array[Field](cm.dimCols.size + cm.msrCols.size) - cm.dimCols.foreach(f => fields(f.schemaOrdinal) = f) - cm.msrCols.foreach(f => fields(f.schemaOrdinal) = f) - - sparkSession.sparkContext.setLocalProperty(EXECUTION_ID_KEY, null) val tablePath = tableIdentifier.getTablePath + val carbonRelation = CarbonSparkUtil.createCarbonRelation(tableInfo, tablePath) + val rawSchema = CarbonSparkUtil.getRawSchema(carbonRelation) --- End diff -- Why this is needed --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1583#discussion_r154987030 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/hive/CarbonMetaStore.scala --- @@ -142,6 +142,10 @@ trait CarbonMetaStore { def getTableFromMetadataCache(database: String, tableName: String): Option[CarbonTable] + def getTableInfo(absoluteTableIdentifier: AbsoluteTableIdentifier) --- End diff -- Why this is required? It is better not to add more interface --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on the issue:
https://github.com/apache/carbondata/pull/1583 It seems there is a lot of change related to column comment, please separate them to another PR if not related to this PR --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/1583 Build Success with Spark 2.2.0, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/480/ --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on the issue:
https://github.com/apache/carbondata/pull/1583 SDV Build Success , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/2108/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/1583 Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/1732/ --- |
In reply to this post by qiuchenjian-2
Github user mohammadshahidkhan commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1583#discussion_r155038052 --- Diff: integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala --- @@ -1044,48 +1045,48 @@ abstract class CarbonDDLSqlParser extends AbstractCarbonSparkSQLParser { dataType match { case "string" => Field(field.column, Some("String"), field.name, Some(null), field.parent, - field.storeType, field.schemaOrdinal, field.precision, field.scale, field.rawSchema - ) + field.storeType, field.schemaOrdinal, field.precision, field.scale, field.rawSchema, + field.columnComment) case "smallint" => Field(field.column, Some("SmallInt"), field.name, Some(null), field.parent, field.storeType, field.schemaOrdinal, - field.precision, field.scale, field.rawSchema) + field.precision, field.scale, field.rawSchema, field.columnComment) case "integer" | "int" => Field(field.column, Some("Integer"), field.name, Some(null), field.parent, field.storeType, field.schemaOrdinal, - field.precision, field.scale, field.rawSchema) + field.precision, field.scale, field.rawSchema, field.columnComment) case "long" => Field(field.column, Some("Long"), field.name, Some(null), field.parent, - field.storeType, field.schemaOrdinal, field.precision, field.scale, field.rawSchema - ) + field.storeType, field.schemaOrdinal, field.precision, field.scale, field.rawSchema, + field.columnComment) case "double" => Field(field.column, Some("Double"), field.name, Some(null), field.parent, - field.storeType, field.schemaOrdinal, field.precision, field.scale, field.rawSchema - ) + field.storeType, field.schemaOrdinal, field.precision, field.scale, field.rawSchema, + field.columnComment) --- End diff -- this is needed as part of this pr only --- |
In reply to this post by qiuchenjian-2
Github user mohammadshahidkhan commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1583#discussion_r155041379 --- Diff: integration/spark-common/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchemaCommon.scala --- @@ -320,13 +322,8 @@ class AlterTableColumnSchemaGenerator( // TODO: move this to carbon store API object TableNewProcessor { def apply( - cm: TableModel, - identifier: AbsoluteTableIdentifier): TableInfo = { - new TableNewProcessor( - cm, - identifier.getDatabaseName, - identifier.getTableName, - identifier.getTablePath).process + cm: TableModel): TableInfo = { --- End diff -- We have to pass the TableInfo to the CarbonCreateTableCommand so that we can use the CarbonCreateTableCommand flow from the RefreshCarbonTableCommand. Any ways for supporting the external table path, the tablePath is being passed to CarbonCreateTableCommand --- |
In reply to this post by qiuchenjian-2
Github user mohammadshahidkhan commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1583#discussion_r155043480 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/RefreshCarbonTableCommand.scala --- @@ -0,0 +1,213 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.command.management + +import java.util + +import scala.collection.JavaConverters._ + +import org.apache.spark.sql._ +import org.apache.spark.sql.execution.command.MetadataCommand +import org.apache.spark.sql.execution.command.table.CarbonCreateTableCommand +import org.apache.spark.sql.util.CarbonException + +import org.apache.carbondata.common.logging.{LogService, LogServiceFactory} +import org.apache.carbondata.core.constants.CarbonCommonConstants +import org.apache.carbondata.core.datastore.impl.FileFactory +import org.apache.carbondata.core.metadata.{AbsoluteTableIdentifier, CarbonTableIdentifier} +import org.apache.carbondata.core.metadata.schema.table.{DataMapSchema, TableInfo} +import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema +import org.apache.carbondata.core.util.CarbonProperties +import org.apache.carbondata.core.util.path.CarbonStorePath +import org.apache.carbondata.events.{OperationContext, OperationListenerBus, RefreshTablePostExecutionEvent, RefreshTablePreExecutionEvent} + +/** + * Command to register carbon table from existing carbon table data + */ +case class RefreshCarbonTableCommand( + dbName: Option[String], + tableName: String) + extends MetadataCommand { + val LOGGER: LogService = + LogServiceFactory.getLogService(this.getClass.getName) + + override def processMetadata(sparkSession: SparkSession): Seq[Row] = { + val metaStore = CarbonEnv.getInstance(sparkSession).carbonMetastore + val databaseName = CarbonEnv.getDatabaseName(dbName)(sparkSession) + val databaseLocation = CarbonEnv.getDatabaseLocation(databaseName, sparkSession) + // Steps + // 1. get table path + // 2. perform the below steps + // 2.1 check if the table already register with hive then ignore and continue with the next + // schema + // 2.2 register the table with the hive check if the table being registered has aggregate table + // then do the below steps + // 2.2.1 validate that all the aggregate tables are copied at the store location. + // 2.2.2 Register the aggregate tables + val tablePath = databaseLocation + CarbonCommonConstants.FILE_SEPARATOR + tableName --- End diff -- fixed --- |
In reply to this post by qiuchenjian-2
Github user mohammadshahidkhan commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1583#discussion_r155044945 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonCreateTableCommand.scala --- @@ -29,25 +29,34 @@ import org.apache.carbondata.common.logging.LogServiceFactory import org.apache.carbondata.core.constants.CarbonCommonConstants import org.apache.carbondata.core.exception.InvalidConfigurationException import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier -import org.apache.carbondata.core.metadata.schema.table.TableInfo +import org.apache.carbondata.core.metadata.schema.table.{CarbonTable, TableInfo} import org.apache.carbondata.core.util.CarbonUtil import org.apache.carbondata.events.{CreateTablePostExecutionEvent, CreateTablePreExecutionEvent, OperationContext, OperationListenerBus} +import org.apache.carbondata.spark.util.CarbonSparkUtil case class CarbonCreateTableCommand( - cm: TableModel, + tableInfo: TableInfo, --- End diff -- correct if user does not pass the database name while creating table, then database name will not there that is why added the following code in CarbonCreateTableCommand.processMetadata var databaseOpt : Option[String] = None if(tableInfo.getDatabaseName != null) { databaseOpt = Some(tableInfo.getDatabaseName) } val dbName = CarbonEnv.getDatabaseName(databaseOpt)(sparkSession) --- |
In reply to this post by qiuchenjian-2
Github user mohammadshahidkhan commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1583#discussion_r155045624 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonCreateTableCommand.scala --- @@ -81,15 +90,13 @@ case class CarbonCreateTableCommand( val carbonSchemaString = catalog.generateTableSchemaString(tableInfo, tableIdentifier) if (createDSTable) { try { - val fields = new Array[Field](cm.dimCols.size + cm.msrCols.size) - cm.dimCols.foreach(f => fields(f.schemaOrdinal) = f) - cm.msrCols.foreach(f => fields(f.schemaOrdinal) = f) - - sparkSession.sparkContext.setLocalProperty(EXECUTION_ID_KEY, null) val tablePath = tableIdentifier.getTablePath + val carbonRelation = CarbonSparkUtil.createCarbonRelation(tableInfo, tablePath) + val rawSchema = CarbonSparkUtil.getRawSchema(carbonRelation) --- End diff -- While registering the table with hive we need rawSchema, earlier this we were getting from the column Field --- |
In reply to this post by qiuchenjian-2
Github user mohammadshahidkhan commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1583#discussion_r155046109 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/hive/CarbonMetaStore.scala --- @@ -142,6 +142,10 @@ trait CarbonMetaStore { def getTableFromMetadataCache(database: String, tableName: String): Option[CarbonTable] + def getTableInfo(absoluteTableIdentifier: AbsoluteTableIdentifier) --- End diff -- many places we are reading schema from the tablePath, ie we decided to move it at common place. Once this PR is merged will refactor code from other places to use this interface. --- |
In reply to this post by qiuchenjian-2
Github user mohammadshahidkhan commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1583#discussion_r155051361 --- Diff: integration/spark-common/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchemaCommon.scala --- @@ -47,6 +47,7 @@ import org.apache.carbondata.spark.util.DataTypeConverterUtil case class TableModel( ifNotExistsSet: Boolean, + var databaseName: String, --- End diff -- fixed --- |
Free forum by Nabble | Edit this page |