CarbonDataQA1 commented on pull request #3875: URL: https://github.com/apache/carbondata/pull/3875#issuecomment-704139470 Build Success with Spark 2.4.5, Please check CI http://121.244.95.60:12545/job/ApacheCarbon_PR_Builder_2.4.5/2562/ ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
ajantha-bhat commented on a change in pull request #3875: URL: https://github.com/apache/carbondata/pull/3875#discussion_r507464156 ########## File path: integration/hive/src/main/java/org/apache/carbondata/hive/MapredCarbonOutputFormat.java ########## @@ -92,6 +95,14 @@ public void checkOutputSpecs(FileSystem fileSystem, JobConf jobConf) throws IOEx } String tablePath = FileFactory.getCarbonFile(carbonLoadModel.getTablePath()).getAbsolutePath(); TaskAttemptID taskAttemptID = TaskAttemptID.forName(jc.get("mapred.task.id")); + // taskAttemptID will be null when the insert job is fired from presto. Presto send the JobConf + // and since presto does not use the MR framework for execution, the mapred.task.id will be + // null, so prepare a new ID. + if (taskAttemptID == null) { + SimpleDateFormat formatter = new SimpleDateFormat("yyyyMMddHHmm"); + String jobTrackerId = formatter.format(new Date()); + taskAttemptID = new TaskAttemptID(jobTrackerId, 0, TaskType.MAP, 0, 0); Review comment: Concurrent insert may use same taskAttemptID. Can you use a UUID as taskAttemptID or check how ORC writer is doing? ########## File path: integration/presto/src/main/prestosql/org/apache/carbondata/presto/CarbonDataFileWriter.java ########## @@ -0,0 +1,188 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.presto; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.Arrays; +import java.util.List; +import java.util.Properties; + +import org.apache.carbondata.common.logging.LogServiceFactory; +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.hadoop.api.CarbonTableOutputFormat; +import org.apache.carbondata.hive.CarbonHiveSerDe; +import org.apache.carbondata.hive.MapredCarbonOutputFormat; +import org.apache.carbondata.presto.impl.CarbonTableConfig; + +import com.google.common.collect.ImmutableList; +import io.prestosql.plugin.hive.HiveFileWriter; +import io.prestosql.plugin.hive.HiveType; +import io.prestosql.plugin.hive.HiveWriteUtils; +import io.prestosql.spi.Page; +import io.prestosql.spi.PrestoException; +import io.prestosql.spi.block.Block; +import io.prestosql.spi.type.Type; +import io.prestosql.spi.type.TypeManager; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.FileSinkOperator; +import org.apache.hadoop.hive.ql.io.HiveOutputFormat; +import org.apache.hadoop.hive.ql.io.IOConstants; +import org.apache.hadoop.hive.ql.io.parquet.serde.ArrayWritableObjectInspector; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.Reporter; +import org.apache.log4j.Logger; + +import static com.google.common.collect.ImmutableList.toImmutableList; +import static io.prestosql.plugin.hive.HiveErrorCode.HIVE_WRITER_DATA_ERROR; +import static java.util.Objects.requireNonNull; +import static java.util.stream.Collectors.toList; +import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.COMPRESSRESULT; + +/** + * This class implements HiveFileWriter and it creates the carbonFileWriter to write the page data + * sent from presto. + */ +public class CarbonDataFileWriter implements HiveFileWriter { + + private static final Logger LOG = + LogServiceFactory.getLogService(CarbonDataFileWriter.class.getName()); + + private final JobConf configuration; + private final Path outPutPath; + private final FileSinkOperator.RecordWriter recordWriter; + private final CarbonHiveSerDe serDe; + private final int fieldCount; + private final Object row; + private final SettableStructObjectInspector tableInspector; + private final List<StructField> structFields; + private final HiveWriteUtils.FieldSetter[] setters; + + private boolean isCommitDone; + + public CarbonDataFileWriter(Path outPutPath, List<String> inputColumnNames, Properties properties, + JobConf configuration, TypeManager typeManager) throws SerDeException { + requireNonNull(outPutPath, "path is null"); + // take the outputPath same as location in compliance with the carbon store folder structure. + this.outPutPath = new Path(properties.getProperty("location")); + this.configuration = requireNonNull(configuration, "conf is null"); + List<String> columnNames = Arrays + .asList(properties.getProperty(IOConstants.COLUMNS, "").split(CarbonCommonConstants.COMMA)); + List<Type> fileColumnTypes = + HiveType.toHiveTypes(properties.getProperty(IOConstants.COLUMNS_TYPES, "")).stream() + .map(hiveType -> hiveType.getType(typeManager)).collect(toList()); + this.fieldCount = columnNames.size(); + this.serDe = new CarbonHiveSerDe(); + serDe.initialize(configuration, properties); + this.tableInspector = (ArrayWritableObjectInspector) serDe.getObjectInspector(); + + this.structFields = + ImmutableList.copyOf(inputColumnNames.stream().map(tableInspector::getStructFieldRef) + .collect(toImmutableList())); + + this.row = tableInspector.create(); + + this.setters = new HiveWriteUtils.FieldSetter[structFields.size()]; + for (int i = 0; i < setters.length; i++) { + setters[i] = HiveWriteUtils.createFieldSetter(tableInspector, row, structFields.get(i), + fileColumnTypes.get(structFields.get(i).getFieldID())); + } + String encodedLoadModel = this.configuration.get(CarbonTableConfig.CARBON_PRESTO_LOAD_MODEL); + if (StringUtils.isNotEmpty(encodedLoadModel)) { + this.configuration.set(CarbonTableOutputFormat.LOAD_MODEL, encodedLoadModel); + } + try { + boolean compress = HiveConf.getBoolVar(this.configuration, COMPRESSRESULT); + Object writer = + Class.forName(MapredCarbonOutputFormat.class.getName()).getConstructor().newInstance(); + this.recordWriter = ((MapredCarbonOutputFormat<?>) writer) + .getHiveRecordWriter(this.configuration, this.outPutPath, Text.class, compress, + properties, Reporter.NULL); + } catch (Exception e) { + LOG.error("error while initializing writer", e); + throw new RuntimeException("writer class not found"); + } + } + + @Override + public long getWrittenBytes() { + if (isCommitDone) { + try { + return outPutPath.getFileSystem(configuration).getFileStatus(outPutPath).getLen(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + return 0; + } + + @Override + public long getSystemMemoryUsage() { + return 0; Review comment: better to throw unsupported error for `getSystemMemoryUsage` and `getValidationCpuNanos`, instead of sending 0 which results in wrong metrics? ########## File path: integration/presto/src/main/prestosql/org/apache/carbondata/presto/CarbonDataLocationService.java ########## @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.presto; + +import com.google.inject.Inject; +import io.prestosql.plugin.hive.HdfsEnvironment; +import io.prestosql.plugin.hive.HiveLocationService; +import io.prestosql.plugin.hive.HiveWriteUtils; +import io.prestosql.plugin.hive.LocationHandle; +import io.prestosql.plugin.hive.metastore.SemiTransactionalHiveMetastore; +import io.prestosql.plugin.hive.metastore.Table; +import io.prestosql.spi.connector.ConnectorSession; +import org.apache.hadoop.fs.Path; + +public class CarbonDataLocationService extends HiveLocationService { + + private final HdfsEnvironment hdfsEnvironment; + + @Inject + public CarbonDataLocationService(HdfsEnvironment hdfsEnvironment) { + super(hdfsEnvironment); + this.hdfsEnvironment = hdfsEnvironment; + } + + @Override + public LocationHandle forNewTable(SemiTransactionalHiveMetastore metastore, + ConnectorSession session, String schemaName, String tableName) { + // TODO: test in cloud scenario in S3/OBS and make it compatible for cloud scenario + super.forNewTable(metastore, session, schemaName, tableName); + HdfsEnvironment.HdfsContext context = + new HdfsEnvironment.HdfsContext(session, schemaName, tableName); + Path targetPath = HiveWriteUtils + .getTableDefaultLocation(context, metastore, this.hdfsEnvironment, schemaName, tableName); + return new LocationHandle(targetPath, targetPath, false, + LocationHandle.WriteMode.DIRECT_TO_TARGET_NEW_DIRECTORY); + } + + @Override + public LocationHandle forExistingTable(SemiTransactionalHiveMetastore metastore, + ConnectorSession session, Table table) { + // TODO: test in cloud scenario in S3/OBS and make it compatible for cloud scenario Review comment: Is the testing on S3 is completed ? If not please test and handle in this PR. ########## File path: integration/presto/src/main/prestosql/org/apache/carbondata/presto/CarbondataModule.java ########## @@ -127,7 +127,8 @@ public void configure(Binder binder) { .in(Scopes.SINGLETON); binder.bind(HivePartitionManager.class).in(Scopes.SINGLETON); binder.bind(LocationService.class).to(HiveLocationService.class).in(Scopes.SINGLETON); - binder.bind(HiveMetadataFactory.class).in(Scopes.SINGLETON); + binder.bind(HiveLocationService.class).to(CarbonDataLocationService.class).in(Scopes.SINGLETON); Review comment: Could you please make one small interaction diagram that contains which all Hive classes carbon extends to support insert into (Similar to what we have for query flow) and upload it in JIRA ? ########## File path: integration/presto/src/main/prestosql/org/apache/carbondata/presto/CarbonDataFileWriter.java ########## @@ -0,0 +1,188 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.presto; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.Arrays; +import java.util.List; +import java.util.Properties; + +import org.apache.carbondata.common.logging.LogServiceFactory; +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.hadoop.api.CarbonTableOutputFormat; +import org.apache.carbondata.hive.CarbonHiveSerDe; +import org.apache.carbondata.hive.MapredCarbonOutputFormat; +import org.apache.carbondata.presto.impl.CarbonTableConfig; + +import com.google.common.collect.ImmutableList; +import io.prestosql.plugin.hive.HiveFileWriter; Review comment: can you support for prestodb also, not just the prestosql ? I think same non-common classes has to be copied there also ########## File path: integration/presto/src/test/scala/org/apache/carbondata/presto/integrationtest/PrestoInsertIntoTableTestCase.scala ########## @@ -0,0 +1,207 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.presto.integrationtest + +import java.io.File +import java.util +import java.util.UUID +import java.util.concurrent.{Callable, Executor, Executors, Future} + +import scala.collection.JavaConverters._ + +import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuiteLike} + +import org.apache.carbondata.common.logging.LogServiceFactory +import org.apache.carbondata.core.constants.CarbonCommonConstants +import org.apache.carbondata.core.datastore.filesystem.{CarbonFile, CarbonFileFilter} +import org.apache.carbondata.core.datastore.impl.FileFactory +import org.apache.carbondata.core.metadata.schema.SchemaReader +import org.apache.carbondata.core.metadata.{AbsoluteTableIdentifier, CarbonTableIdentifier} +import org.apache.carbondata.core.statusmanager.SegmentStatusManager +import org.apache.carbondata.core.util.path.CarbonTablePath +import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil} +import org.apache.carbondata.presto.server.PrestoServer +import org.apache.carbondata.presto.util.CarbonDataStoreCreator + +class PrestoInsertIntoTableTestCase extends FunSuiteLike with BeforeAndAfterAll with BeforeAndAfterEach { + + private val logger = LogServiceFactory + .getLogService(classOf[PrestoAllDataTypeTest].getCanonicalName) + + private val rootPath = new File(this.getClass.getResource("/").getPath + + "../../../..").getCanonicalPath + private val storePath = s"$rootPath/integration/presto/target/store" + private val prestoServer = new PrestoServer + private val executorService = Executors.newFixedThreadPool(1) + + override def beforeAll: Unit = { + CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_WRITTEN_BY_APPNAME, + "Presto") + val map = new util.HashMap[String, String]() + map.put("hive.metastore", "file") + map.put("hive.metastore.catalog.dir", s"file://$storePath") + map.put("hive.allow-drop-table", "true") + prestoServer.startServer("testdb", map) + prestoServer.execute("drop schema if exists testdb") + prestoServer.execute("create schema testdb") + } + + override protected def beforeEach(): Unit = { + val query = "create table testdb.testtable(ID int, date date, country varchar, name varchar, phonetype varchar, serialname varchar,salary decimal(6,1), bonus decimal(8,6), monthlyBonus decimal(5,3), dob timestamp, shortField smallint, iscurrentemployee boolean) with(format='CARBONDATA') " + createTable(query, "testdb", "testtable") + } + + private def createTable(query: String, databaseName: String, tableName: String): Unit = { + prestoServer.execute(s"drop table if exists ${databaseName}.${tableName}") + prestoServer.execute(query) + logger.info("Creating The Carbon Store") + val absoluteTableIdentifier: AbsoluteTableIdentifier = getAbsoluteIdentifier(databaseName, tableName) + CarbonDataStoreCreator.createTable(absoluteTableIdentifier, true) + logger.info(s"\nCarbon store is created at location: $storePath") + } + + private def getAbsoluteIdentifier(dbName: String, + tableName: String) = { + val absoluteTableIdentifier = AbsoluteTableIdentifier.from( + storePath + "/" + dbName + "/" + tableName, + new CarbonTableIdentifier(dbName, + tableName, + UUID.randomUUID().toString)) + absoluteTableIdentifier + } + + test("test insert with different storage format names") { + val query1 = "create table testdb.testtable(ID int, date date, country varchar, name varchar, phonetype varchar, serialname varchar,salary decimal(6,1), bonus decimal(8,6), monthlyBonus decimal(5,3), dob timestamp, shortField smallint, iscurrentemployee boolean) with(format='CARBONDATA') " + val query2 = "create table testdb.testtable(ID int, date date, country varchar, name varchar, phonetype varchar, serialname varchar,salary decimal(6,1), bonus decimal(8,6), monthlyBonus decimal(5,3), dob timestamp, shortField smallint, iscurrentemployee boolean) with(format='CARBON') " + val query3 = "create table testdb.testtable(ID int, date date, country varchar, name varchar, phonetype varchar, serialname varchar,salary decimal(6,1), bonus decimal(8,6), monthlyBonus decimal(5,3), dob timestamp, shortField smallint, iscurrentemployee boolean) with(format='ORG.APACHE.CARBONDATA.FORMAT') " + createTable(query1, "testdb", "testtable") + createTable(query2, "testdb", "testtable") + createTable(query3, "testdb", "testtable") + prestoServer.execute("insert into testdb.testtable values(10, current_date, 'INDIA', 'Chandler', 'qwerty', 'usn20392',10000.0,16.234567,25.678,timestamp '1994-06-14 05:00:09',smallint '23', true)") + val absoluteTableIdentifier: AbsoluteTableIdentifier = getAbsoluteIdentifier("testdb", "testtable") + val carbonTable = SchemaReader.readCarbonTableFromStore(absoluteTableIdentifier) + val segmentPath = CarbonTablePath.getSegmentPath(carbonTable.getTablePath, "0") + assert(FileFactory.getCarbonFile(segmentPath).isFileExist) + } + + test("test insert into one segment and check folder structure") { + prestoServer.execute("insert into testdb.testtable values(10, current_date, 'INDIA', 'Chandler', 'qwerty', 'usn20392',10000.0,16.234567,25.678,timestamp '1994-06-14 05:00:09',smallint '23', true)") + prestoServer.execute("insert into testdb.testtable values(10, current_date, 'INDIA', 'Chandler', 'qwerty', 'usn20392',10000.0,16.234567,25.678,timestamp '1994-06-14 05:00:09',smallint '23', true)") + val absoluteTableIdentifier: AbsoluteTableIdentifier = getAbsoluteIdentifier("testdb", "testtable") + val carbonTable = SchemaReader.readCarbonTableFromStore(absoluteTableIdentifier) + val tablePath = carbonTable.getTablePath + val segment0Path = CarbonTablePath.getSegmentPath(tablePath, "0") + val segment1Path = CarbonTablePath.getSegmentPath(tablePath, "1") + val segment0 = FileFactory.getCarbonFile(segment0Path) + assert(segment0.isFileExist) + assert(segment0.listFiles(new CarbonFileFilter { + override def accept(file: CarbonFile): Boolean = { + file.getName.endsWith(CarbonTablePath.CARBON_DATA_EXT) || + file.getName.endsWith(CarbonTablePath.MERGE_INDEX_FILE_EXT) + } + }).length == 2) + val segment1 = FileFactory.getCarbonFile(segment1Path) + assert(segment1.isFileExist) + assert(segment1.listFiles(new CarbonFileFilter { + override def accept(file: CarbonFile): Boolean = { + file.getName.endsWith(CarbonTablePath.CARBON_DATA_EXT) || + file.getName.endsWith(CarbonTablePath.MERGE_INDEX_FILE_EXT) + } + }).length == 2) + val segmentsPath = CarbonTablePath.getSegmentFilesLocation(tablePath) + assert(FileFactory.getCarbonFile(segmentsPath).isFileExist && FileFactory.getCarbonFile(segmentsPath).listFiles(true).size() == 2) + val metadataFolderPath = CarbonTablePath.getMetadataPath(tablePath) + FileFactory.getCarbonFile(metadataFolderPath).listFiles(new CarbonFileFilter { + override def accept(file: CarbonFile): Boolean = { + file.getName.endsWith(CarbonTablePath.TABLE_STATUS_FILE) + } + }) + } + + test("test insert into many segments and check segment count and data count") { + prestoServer.execute("insert into testdb.testtable values(10, current_date, 'INDIA', 'Chandler', 'qwerty', 'usn20392',10000.0,16.234567,25.678,timestamp '1994-06-14 05:00:09',smallint '23', true)") + prestoServer.execute("insert into testdb.testtable values(10, current_date, 'INDIA', 'Chandler', 'qwerty', 'usn20392',10000.0,16.234567,25.678,timestamp '1998-12-16 10:12:09',smallint '23', true)") + prestoServer.execute("insert into testdb.testtable values(10, current_date, 'INDIA', 'Chandler', 'qwerty', 'usn20392',10000.0,16.234567,25.678,timestamp '1994-06-14 05:00:09',smallint '23', true)") + prestoServer.execute("insert into testdb.testtable values(10, current_date, 'INDIA', 'Chandler', 'qwerty', 'usn20392',10000.0,16.234567,25.678,timestamp '1998-12-16 10:12:09',smallint '23', true)") + val absoluteTableIdentifier: AbsoluteTableIdentifier = getAbsoluteIdentifier("testdb", "testtable") + val carbonTable = SchemaReader.readCarbonTableFromStore(absoluteTableIdentifier) + val segmentFoldersLocation = CarbonTablePath.getPartitionDir(carbonTable.getTablePath) + assert(FileFactory.getCarbonFile(segmentFoldersLocation).listFiles(false).size() == 8) + val actualResult1: List[Map[String, Any]] = prestoServer + .executeQuery("select count(*) AS RESULT from testdb.testtable") + val expectedResult1: List[Map[String, Any]] = List(Map("RESULT" -> 4)) + assert(actualResult1.equals(expectedResult1)) + // filter query + val actualResult2: List[Map[String, Any]] = prestoServer + .executeQuery("select count(*) AS RESULT from testdb.testtable WHERE dob = timestamp '1998-12-16 10:12:09'") + val expectedResult2: List[Map[String, Any]] = List(Map("RESULT" -> 2)) + assert(actualResult2.equals(expectedResult2)) + } + + test("test if the table status contains the segment file name for each load") { + prestoServer.execute("insert into testdb.testtable values(10, current_date, 'INDIA', 'Chandler', 'qwerty', 'usn20392',10000.0,16.234567,25.678,timestamp '1994-06-14 05:00:09',smallint '23', true)") + val absoluteTableIdentifier: AbsoluteTableIdentifier = getAbsoluteIdentifier("testdb", "testtable") + val carbonTable = SchemaReader.readCarbonTableFromStore(absoluteTableIdentifier) + val ssm = new SegmentStatusManager(carbonTable.getAbsoluteTableIdentifier) + ssm.getValidAndInvalidSegments.getValidSegments.asScala.foreach { segment => + val loadMetadataDetails = segment.getLoadMetadataDetails + assert(loadMetadataDetails.getSegmentFile != null) + } + } + + test("test for query when insert in progress") { + prestoServer.execute("insert into testdb.testtable values(10, current_date, 'INDIA', 'Chandler', 'qwerty', 'usn20392',10000.0,16.234567,25.678,timestamp '1994-06-14 05:00:09',smallint '23', true)") + val query = "insert into testdb.testtable values(10, current_date, 'INDIA', 'Chandler', 'qwerty', 'usn20392',10000.0,16.234567,25.678,timestamp '1994-06-14 05:00:09',smallint '23', true)" + val asyncQuery = runSqlAsync(query) + val actualResult1: List[Map[String, Any]] = prestoServer.executeQuery("select count(*) AS RESULT from testdb.testtable WHERE dob = timestamp '1994-06-14 05:00:09'") + val expectedResult1: List[Map[String, Any]] = List(Map("RESULT" -> 1)) + assert(actualResult1.equals(expectedResult1)) + assert(asyncQuery.get().equalsIgnoreCase("PASS")) + val actualResult2: List[Map[String, Any]] = prestoServer.executeQuery("select count(*) AS RESULT from testdb.testtable WHERE dob = timestamp '1994-06-14 05:00:09'") + val expectedResult2: List[Map[String, Any]] = List(Map("RESULT" -> 2)) + assert(actualResult2.equals(expectedResult2)) + } + + class QueryTask(query: String) extends Callable[String] { + override def call(): String = { + var result = "PASS" + try { + prestoServer.execute(query) + } catch { + case ex: Exception => + println(ex.printStackTrace()) + result = "FAIL" + } + result + } + } + + private def runSqlAsync(sql: String): Future[String] = { + val future = executorService.submit( + new QueryTask(sql) + ) + Thread.sleep(2) + future + } + + override def afterAll(): Unit = { Review comment: Please update the documentaion mentioning the insert syntax, what and all supported and what and all open. ########## File path: integration/hive/src/main/java/org/apache/carbondata/hive/util/HiveCarbonUtil.java ########## @@ -137,7 +137,7 @@ public static CarbonLoadModel getCarbonLoadModel(String tableName, String databa carbonTable = CarbonTable.buildFromTableInfo( SchemaReader.inferSchema(absoluteTableIdentifier, false, configuration)); } - carbonTable.setTransactionalTable(false); + carbonTable.setTransactionalTable(true); Review comment: Hive is not writing the schema file, it supports only non-transactional write. So, setting true will impact Hive. ########## File path: integration/presto/src/main/prestosql/org/apache/carbondata/presto/CarbonDataInsertTableHandle.java ########## @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.presto; + +import java.util.List; +import java.util.Map; +import java.util.Optional; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.collect.ImmutableMap; +import io.prestosql.plugin.hive.HiveBucketProperty; +import io.prestosql.plugin.hive.HiveColumnHandle; +import io.prestosql.plugin.hive.HiveInsertTableHandle; +import io.prestosql.plugin.hive.HiveStorageFormat; +import io.prestosql.plugin.hive.LocationHandle; +import io.prestosql.plugin.hive.metastore.HivePageSinkMetadata; +import io.prestosql.spi.connector.ConnectorInsertTableHandle; + +import static java.util.Objects.requireNonNull; + +public class CarbonDataInsertTableHandle extends HiveInsertTableHandle implements Review comment: Do we really need CarbonDataHandleResolver and this class ? I don't see any carbon specific changes. May be we can directly use HiveInsertTableHandle in carbondata-presto insert flow ########## File path: integration/presto/src/main/prestosql/org/apache/carbondata/presto/CarbonDataLocationService.java ########## @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.presto; + +import com.google.inject.Inject; +import io.prestosql.plugin.hive.HdfsEnvironment; +import io.prestosql.plugin.hive.HiveLocationService; +import io.prestosql.plugin.hive.HiveWriteUtils; +import io.prestosql.plugin.hive.LocationHandle; +import io.prestosql.plugin.hive.metastore.SemiTransactionalHiveMetastore; +import io.prestosql.plugin.hive.metastore.Table; +import io.prestosql.spi.connector.ConnectorSession; +import org.apache.hadoop.fs.Path; + +public class CarbonDataLocationService extends HiveLocationService { Review comment: same suggestion for this also, may be can directly use HiveLocationService ########## File path: integration/presto/src/main/prestosql/org/apache/carbondata/presto/CarbonDataFileWriter.java ########## @@ -0,0 +1,188 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.presto; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.Arrays; +import java.util.List; +import java.util.Properties; + +import org.apache.carbondata.common.logging.LogServiceFactory; +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.hadoop.api.CarbonTableOutputFormat; +import org.apache.carbondata.hive.CarbonHiveSerDe; +import org.apache.carbondata.hive.MapredCarbonOutputFormat; +import org.apache.carbondata.presto.impl.CarbonTableConfig; + +import com.google.common.collect.ImmutableList; +import io.prestosql.plugin.hive.HiveFileWriter; +import io.prestosql.plugin.hive.HiveType; +import io.prestosql.plugin.hive.HiveWriteUtils; +import io.prestosql.spi.Page; +import io.prestosql.spi.PrestoException; +import io.prestosql.spi.block.Block; +import io.prestosql.spi.type.Type; +import io.prestosql.spi.type.TypeManager; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.FileSinkOperator; +import org.apache.hadoop.hive.ql.io.HiveOutputFormat; +import org.apache.hadoop.hive.ql.io.IOConstants; +import org.apache.hadoop.hive.ql.io.parquet.serde.ArrayWritableObjectInspector; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.Reporter; +import org.apache.log4j.Logger; + +import static com.google.common.collect.ImmutableList.toImmutableList; +import static io.prestosql.plugin.hive.HiveErrorCode.HIVE_WRITER_DATA_ERROR; +import static java.util.Objects.requireNonNull; +import static java.util.stream.Collectors.toList; +import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.COMPRESSRESULT; + +/** + * This class implements HiveFileWriter and it creates the carbonFileWriter to write the page data + * sent from presto. + */ +public class CarbonDataFileWriter implements HiveFileWriter { + + private static final Logger LOG = + LogServiceFactory.getLogService(CarbonDataFileWriter.class.getName()); + + private final JobConf configuration; + private final Path outPutPath; + private final FileSinkOperator.RecordWriter recordWriter; + private final CarbonHiveSerDe serDe; + private final int fieldCount; + private final Object row; + private final SettableStructObjectInspector tableInspector; + private final List<StructField> structFields; + private final HiveWriteUtils.FieldSetter[] setters; + + private boolean isCommitDone; + + public CarbonDataFileWriter(Path outPutPath, List<String> inputColumnNames, Properties properties, + JobConf configuration, TypeManager typeManager) throws SerDeException { + requireNonNull(outPutPath, "path is null"); + // take the outputPath same as location in compliance with the carbon store folder structure. + this.outPutPath = new Path(properties.getProperty("location")); + this.configuration = requireNonNull(configuration, "conf is null"); + List<String> columnNames = Arrays + .asList(properties.getProperty(IOConstants.COLUMNS, "").split(CarbonCommonConstants.COMMA)); + List<Type> fileColumnTypes = + HiveType.toHiveTypes(properties.getProperty(IOConstants.COLUMNS_TYPES, "")).stream() + .map(hiveType -> hiveType.getType(typeManager)).collect(toList()); + this.fieldCount = columnNames.size(); + this.serDe = new CarbonHiveSerDe(); + serDe.initialize(configuration, properties); + this.tableInspector = (ArrayWritableObjectInspector) serDe.getObjectInspector(); + + this.structFields = + ImmutableList.copyOf(inputColumnNames.stream().map(tableInspector::getStructFieldRef) + .collect(toImmutableList())); + + this.row = tableInspector.create(); + + this.setters = new HiveWriteUtils.FieldSetter[structFields.size()]; + for (int i = 0; i < setters.length; i++) { + setters[i] = HiveWriteUtils.createFieldSetter(tableInspector, row, structFields.get(i), + fileColumnTypes.get(structFields.get(i).getFieldID())); + } + String encodedLoadModel = this.configuration.get(CarbonTableConfig.CARBON_PRESTO_LOAD_MODEL); + if (StringUtils.isNotEmpty(encodedLoadModel)) { + this.configuration.set(CarbonTableOutputFormat.LOAD_MODEL, encodedLoadModel); + } + try { + boolean compress = HiveConf.getBoolVar(this.configuration, COMPRESSRESULT); + Object writer = + Class.forName(MapredCarbonOutputFormat.class.getName()).getConstructor().newInstance(); + this.recordWriter = ((MapredCarbonOutputFormat<?>) writer) + .getHiveRecordWriter(this.configuration, this.outPutPath, Text.class, compress, + properties, Reporter.NULL); + } catch (Exception e) { + LOG.error("error while initializing writer", e); + throw new RuntimeException("writer class not found"); + } + } + + @Override + public long getWrittenBytes() { + if (isCommitDone) { + try { + return outPutPath.getFileSystem(configuration).getFileStatus(outPutPath).getLen(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + return 0; + } + + @Override + public long getSystemMemoryUsage() { + return 0; + } + + @Override + public void appendRows(Page dataPage) { + for (int position = 0; position < dataPage.getPositionCount(); position++) { + appendRow(dataPage, position); + } + } + + private void appendRow(Page dataPage, int position) { + for (int field = 0; field < fieldCount; field++) { + Block block = dataPage.getBlock(field); + if (block.isNull(position)) { + tableInspector.setStructFieldData(row, structFields.get(field), null); + } else { + setters[field].setField(block, position); + } + } + try { + recordWriter.write(serDe.serialize(row, tableInspector)); + } catch (SerDeException | IOException e) { + throw new PrestoException(HIVE_WRITER_DATA_ERROR, e); Review comment: please check whether need to call `recordWriter.close` for failure case (need to call this, if commit is not called in failure case) ########## File path: integration/presto/src/main/prestosql/org/apache/carbondata/presto/CarbonDataPageSinkProvider.java ########## @@ -0,0 +1,182 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.presto; + +import java.util.List; +import java.util.Map; +import java.util.OptionalInt; +import java.util.Set; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import com.google.common.util.concurrent.ListeningExecutorService; +import com.google.inject.Inject; +import io.airlift.event.client.EventClient; +import io.airlift.json.JsonCodec; +import io.airlift.units.DataSize; +import io.prestosql.plugin.hive.HdfsEnvironment; +import io.prestosql.plugin.hive.HiveConfig; +import io.prestosql.plugin.hive.HiveFileWriterFactory; +import io.prestosql.plugin.hive.HivePageSink; +import io.prestosql.plugin.hive.HivePageSinkProvider; +import io.prestosql.plugin.hive.HiveSessionProperties; +import io.prestosql.plugin.hive.HiveWritableTableHandle; +import io.prestosql.plugin.hive.HiveWriterStats; +import io.prestosql.plugin.hive.LocationService; +import io.prestosql.plugin.hive.OrcFileWriterFactory; +import io.prestosql.plugin.hive.PartitionUpdate; +import io.prestosql.plugin.hive.metastore.HiveMetastore; +import io.prestosql.plugin.hive.metastore.HivePageSinkMetadataProvider; +import io.prestosql.plugin.hive.metastore.SortingColumn; +import io.prestosql.spi.NodeManager; +import io.prestosql.spi.PageIndexerFactory; +import io.prestosql.spi.PageSorter; +import io.prestosql.spi.connector.ConnectorInsertTableHandle; +import io.prestosql.spi.connector.ConnectorPageSink; +import io.prestosql.spi.connector.ConnectorSession; +import io.prestosql.spi.connector.ConnectorTransactionHandle; +import io.prestosql.spi.type.TypeManager; + +import static com.google.common.util.concurrent.MoreExecutors.listeningDecorator; +import static io.airlift.concurrent.Threads.daemonThreadsNamed; +import static io.prestosql.plugin.hive.metastore.CachingHiveMetastore.memoizeMetastore; +import static java.util.Objects.requireNonNull; +import static java.util.concurrent.Executors.newFixedThreadPool; + +public class CarbonDataPageSinkProvider extends HivePageSinkProvider { + + private final Set<HiveFileWriterFactory> fileWriterFactories; + private final HdfsEnvironment hdfsEnvironment; + private final PageSorter pageSorter; + private final HiveMetastore metastore; + private final PageIndexerFactory pageIndexerFactory; + private final TypeManager typeManager; + private final int maxOpenPartitions; + private final int maxOpenSortFiles; + private final DataSize writerSortBufferSize; + private final boolean immutablePartitions; + private final LocationService locationService; + private final ListeningExecutorService writeVerificationExecutor; + private final JsonCodec<PartitionUpdate> partitionUpdateCodec; + private final NodeManager nodeManager; + private final EventClient eventClient; + private final HiveSessionProperties hiveSessionProperties; + private final HiveWriterStats hiveWriterStats; + private final OrcFileWriterFactory orcFileWriterFactory; + private final long perTransactionMetastoreCacheMaximumSize; + + @Inject + public CarbonDataPageSinkProvider(Set<HiveFileWriterFactory> fileWriterFactories, + HdfsEnvironment hdfsEnvironment, PageSorter pageSorter, HiveMetastore metastore, + PageIndexerFactory pageIndexerFactory, TypeManager typeManager, HiveConfig config, + LocationService locationService, JsonCodec<PartitionUpdate> partitionUpdateCodec, + NodeManager nodeManager, EventClient eventClient, HiveSessionProperties hiveSessionProperties, + HiveWriterStats hiveWriterStats, OrcFileWriterFactory orcFileWriterFactory) { + super(fileWriterFactories, hdfsEnvironment, pageSorter, metastore, pageIndexerFactory, Review comment: I think inititialization to super class here, not needed as not calling super class anywhere. Please check it. ########## File path: integration/presto/src/test/scala/org/apache/carbondata/presto/integrationtest/PrestoInsertIntoTableTestCase.scala ########## @@ -0,0 +1,207 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.presto.integrationtest + +import java.io.File +import java.util +import java.util.UUID +import java.util.concurrent.{Callable, Executor, Executors, Future} + +import scala.collection.JavaConverters._ + +import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuiteLike} + +import org.apache.carbondata.common.logging.LogServiceFactory +import org.apache.carbondata.core.constants.CarbonCommonConstants +import org.apache.carbondata.core.datastore.filesystem.{CarbonFile, CarbonFileFilter} +import org.apache.carbondata.core.datastore.impl.FileFactory +import org.apache.carbondata.core.metadata.schema.SchemaReader +import org.apache.carbondata.core.metadata.{AbsoluteTableIdentifier, CarbonTableIdentifier} +import org.apache.carbondata.core.statusmanager.SegmentStatusManager +import org.apache.carbondata.core.util.path.CarbonTablePath +import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil} +import org.apache.carbondata.presto.server.PrestoServer +import org.apache.carbondata.presto.util.CarbonDataStoreCreator + +class PrestoInsertIntoTableTestCase extends FunSuiteLike with BeforeAndAfterAll with BeforeAndAfterEach { + + private val logger = LogServiceFactory + .getLogService(classOf[PrestoAllDataTypeTest].getCanonicalName) + + private val rootPath = new File(this.getClass.getResource("/").getPath + + "../../../..").getCanonicalPath + private val storePath = s"$rootPath/integration/presto/target/store" + private val prestoServer = new PrestoServer + private val executorService = Executors.newFixedThreadPool(1) + + override def beforeAll: Unit = { + CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_WRITTEN_BY_APPNAME, Review comment: is it mandatory for user to set CARBON_WRITTEN_BY_APPNAME carbon property ? can we move this in the CarbonDataFileWriter itself ? ########## File path: integration/presto/src/test/scala/org/apache/carbondata/presto/integrationtest/PrestoInsertIntoTableTestCase.scala ########## @@ -0,0 +1,207 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.presto.integrationtest + +import java.io.File +import java.util +import java.util.UUID +import java.util.concurrent.{Callable, Executor, Executors, Future} + +import scala.collection.JavaConverters._ + +import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuiteLike} + +import org.apache.carbondata.common.logging.LogServiceFactory +import org.apache.carbondata.core.constants.CarbonCommonConstants +import org.apache.carbondata.core.datastore.filesystem.{CarbonFile, CarbonFileFilter} +import org.apache.carbondata.core.datastore.impl.FileFactory +import org.apache.carbondata.core.metadata.schema.SchemaReader +import org.apache.carbondata.core.metadata.{AbsoluteTableIdentifier, CarbonTableIdentifier} +import org.apache.carbondata.core.statusmanager.SegmentStatusManager +import org.apache.carbondata.core.util.path.CarbonTablePath +import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil} +import org.apache.carbondata.presto.server.PrestoServer +import org.apache.carbondata.presto.util.CarbonDataStoreCreator + +class PrestoInsertIntoTableTestCase extends FunSuiteLike with BeforeAndAfterAll with BeforeAndAfterEach { + + private val logger = LogServiceFactory + .getLogService(classOf[PrestoAllDataTypeTest].getCanonicalName) + + private val rootPath = new File(this.getClass.getResource("/").getPath + + "../../../..").getCanonicalPath + private val storePath = s"$rootPath/integration/presto/target/store" + private val prestoServer = new PrestoServer + private val executorService = Executors.newFixedThreadPool(1) + + override def beforeAll: Unit = { + CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_WRITTEN_BY_APPNAME, + "Presto") + val map = new util.HashMap[String, String]() + map.put("hive.metastore", "file") + map.put("hive.metastore.catalog.dir", s"file://$storePath") + map.put("hive.allow-drop-table", "true") + prestoServer.startServer("testdb", map) + prestoServer.execute("drop schema if exists testdb") + prestoServer.execute("create schema testdb") + } + + override protected def beforeEach(): Unit = { + val query = "create table testdb.testtable(ID int, date date, country varchar, name varchar, phonetype varchar, serialname varchar,salary decimal(6,1), bonus decimal(8,6), monthlyBonus decimal(5,3), dob timestamp, shortField smallint, iscurrentemployee boolean) with(format='CARBONDATA') " + createTable(query, "testdb", "testtable") + } + + private def createTable(query: String, databaseName: String, tableName: String): Unit = { + prestoServer.execute(s"drop table if exists ${databaseName}.${tableName}") + prestoServer.execute(query) + logger.info("Creating The Carbon Store") + val absoluteTableIdentifier: AbsoluteTableIdentifier = getAbsoluteIdentifier(databaseName, tableName) + CarbonDataStoreCreator.createTable(absoluteTableIdentifier, true) + logger.info(s"\nCarbon store is created at location: $storePath") + } + + private def getAbsoluteIdentifier(dbName: String, + tableName: String) = { + val absoluteTableIdentifier = AbsoluteTableIdentifier.from( + storePath + "/" + dbName + "/" + tableName, + new CarbonTableIdentifier(dbName, + tableName, + UUID.randomUUID().toString)) + absoluteTableIdentifier + } + + test("test insert with different storage format names") { + val query1 = "create table testdb.testtable(ID int, date date, country varchar, name varchar, phonetype varchar, serialname varchar,salary decimal(6,1), bonus decimal(8,6), monthlyBonus decimal(5,3), dob timestamp, shortField smallint, iscurrentemployee boolean) with(format='CARBONDATA') " + val query2 = "create table testdb.testtable(ID int, date date, country varchar, name varchar, phonetype varchar, serialname varchar,salary decimal(6,1), bonus decimal(8,6), monthlyBonus decimal(5,3), dob timestamp, shortField smallint, iscurrentemployee boolean) with(format='CARBON') " + val query3 = "create table testdb.testtable(ID int, date date, country varchar, name varchar, phonetype varchar, serialname varchar,salary decimal(6,1), bonus decimal(8,6), monthlyBonus decimal(5,3), dob timestamp, shortField smallint, iscurrentemployee boolean) with(format='ORG.APACHE.CARBONDATA.FORMAT') " + createTable(query1, "testdb", "testtable") + createTable(query2, "testdb", "testtable") + createTable(query3, "testdb", "testtable") + prestoServer.execute("insert into testdb.testtable values(10, current_date, 'INDIA', 'Chandler', 'qwerty', 'usn20392',10000.0,16.234567,25.678,timestamp '1994-06-14 05:00:09',smallint '23', true)") Review comment: so this insert will happen to which of the 3 table ? for me this testcase is bit confusing. Please improvise. ########## File path: integration/presto/src/test/scala/org/apache/carbondata/presto/integrationtest/PrestoInsertIntoTableTestCase.scala ########## @@ -0,0 +1,207 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.presto.integrationtest + +import java.io.File +import java.util +import java.util.UUID +import java.util.concurrent.{Callable, Executor, Executors, Future} + +import scala.collection.JavaConverters._ + +import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuiteLike} + +import org.apache.carbondata.common.logging.LogServiceFactory +import org.apache.carbondata.core.constants.CarbonCommonConstants +import org.apache.carbondata.core.datastore.filesystem.{CarbonFile, CarbonFileFilter} +import org.apache.carbondata.core.datastore.impl.FileFactory +import org.apache.carbondata.core.metadata.schema.SchemaReader +import org.apache.carbondata.core.metadata.{AbsoluteTableIdentifier, CarbonTableIdentifier} +import org.apache.carbondata.core.statusmanager.SegmentStatusManager +import org.apache.carbondata.core.util.path.CarbonTablePath +import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil} +import org.apache.carbondata.presto.server.PrestoServer +import org.apache.carbondata.presto.util.CarbonDataStoreCreator + +class PrestoInsertIntoTableTestCase extends FunSuiteLike with BeforeAndAfterAll with BeforeAndAfterEach { + + private val logger = LogServiceFactory + .getLogService(classOf[PrestoAllDataTypeTest].getCanonicalName) + + private val rootPath = new File(this.getClass.getResource("/").getPath + + "../../../..").getCanonicalPath + private val storePath = s"$rootPath/integration/presto/target/store" + private val prestoServer = new PrestoServer + private val executorService = Executors.newFixedThreadPool(1) + + override def beforeAll: Unit = { + CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_WRITTEN_BY_APPNAME, + "Presto") + val map = new util.HashMap[String, String]() + map.put("hive.metastore", "file") + map.put("hive.metastore.catalog.dir", s"file://$storePath") + map.put("hive.allow-drop-table", "true") + prestoServer.startServer("testdb", map) + prestoServer.execute("drop schema if exists testdb") + prestoServer.execute("create schema testdb") + } + + override protected def beforeEach(): Unit = { + val query = "create table testdb.testtable(ID int, date date, country varchar, name varchar, phonetype varchar, serialname varchar,salary decimal(6,1), bonus decimal(8,6), monthlyBonus decimal(5,3), dob timestamp, shortField smallint, iscurrentemployee boolean) with(format='CARBONDATA') " + createTable(query, "testdb", "testtable") + } + + private def createTable(query: String, databaseName: String, tableName: String): Unit = { + prestoServer.execute(s"drop table if exists ${databaseName}.${tableName}") + prestoServer.execute(query) + logger.info("Creating The Carbon Store") + val absoluteTableIdentifier: AbsoluteTableIdentifier = getAbsoluteIdentifier(databaseName, tableName) + CarbonDataStoreCreator.createTable(absoluteTableIdentifier, true) + logger.info(s"\nCarbon store is created at location: $storePath") + } + + private def getAbsoluteIdentifier(dbName: String, + tableName: String) = { + val absoluteTableIdentifier = AbsoluteTableIdentifier.from( + storePath + "/" + dbName + "/" + tableName, + new CarbonTableIdentifier(dbName, + tableName, + UUID.randomUUID().toString)) + absoluteTableIdentifier + } + + test("test insert with different storage format names") { + val query1 = "create table testdb.testtable(ID int, date date, country varchar, name varchar, phonetype varchar, serialname varchar,salary decimal(6,1), bonus decimal(8,6), monthlyBonus decimal(5,3), dob timestamp, shortField smallint, iscurrentemployee boolean) with(format='CARBONDATA') " + val query2 = "create table testdb.testtable(ID int, date date, country varchar, name varchar, phonetype varchar, serialname varchar,salary decimal(6,1), bonus decimal(8,6), monthlyBonus decimal(5,3), dob timestamp, shortField smallint, iscurrentemployee boolean) with(format='CARBON') " + val query3 = "create table testdb.testtable(ID int, date date, country varchar, name varchar, phonetype varchar, serialname varchar,salary decimal(6,1), bonus decimal(8,6), monthlyBonus decimal(5,3), dob timestamp, shortField smallint, iscurrentemployee boolean) with(format='ORG.APACHE.CARBONDATA.FORMAT') " + createTable(query1, "testdb", "testtable") + createTable(query2, "testdb", "testtable") + createTable(query3, "testdb", "testtable") + prestoServer.execute("insert into testdb.testtable values(10, current_date, 'INDIA', 'Chandler', 'qwerty', 'usn20392',10000.0,16.234567,25.678,timestamp '1994-06-14 05:00:09',smallint '23', true)") + val absoluteTableIdentifier: AbsoluteTableIdentifier = getAbsoluteIdentifier("testdb", "testtable") + val carbonTable = SchemaReader.readCarbonTableFromStore(absoluteTableIdentifier) + val segmentPath = CarbonTablePath.getSegmentPath(carbonTable.getTablePath, "0") + assert(FileFactory.getCarbonFile(segmentPath).isFileExist) + } + + test("test insert into one segment and check folder structure") { + prestoServer.execute("insert into testdb.testtable values(10, current_date, 'INDIA', 'Chandler', 'qwerty', 'usn20392',10000.0,16.234567,25.678,timestamp '1994-06-14 05:00:09',smallint '23', true)") + prestoServer.execute("insert into testdb.testtable values(10, current_date, 'INDIA', 'Chandler', 'qwerty', 'usn20392',10000.0,16.234567,25.678,timestamp '1994-06-14 05:00:09',smallint '23', true)") + val absoluteTableIdentifier: AbsoluteTableIdentifier = getAbsoluteIdentifier("testdb", "testtable") + val carbonTable = SchemaReader.readCarbonTableFromStore(absoluteTableIdentifier) + val tablePath = carbonTable.getTablePath + val segment0Path = CarbonTablePath.getSegmentPath(tablePath, "0") + val segment1Path = CarbonTablePath.getSegmentPath(tablePath, "1") + val segment0 = FileFactory.getCarbonFile(segment0Path) + assert(segment0.isFileExist) + assert(segment0.listFiles(new CarbonFileFilter { + override def accept(file: CarbonFile): Boolean = { + file.getName.endsWith(CarbonTablePath.CARBON_DATA_EXT) || + file.getName.endsWith(CarbonTablePath.MERGE_INDEX_FILE_EXT) + } + }).length == 2) + val segment1 = FileFactory.getCarbonFile(segment1Path) + assert(segment1.isFileExist) + assert(segment1.listFiles(new CarbonFileFilter { + override def accept(file: CarbonFile): Boolean = { + file.getName.endsWith(CarbonTablePath.CARBON_DATA_EXT) || + file.getName.endsWith(CarbonTablePath.MERGE_INDEX_FILE_EXT) + } + }).length == 2) + val segmentsPath = CarbonTablePath.getSegmentFilesLocation(tablePath) + assert(FileFactory.getCarbonFile(segmentsPath).isFileExist && FileFactory.getCarbonFile(segmentsPath).listFiles(true).size() == 2) + val metadataFolderPath = CarbonTablePath.getMetadataPath(tablePath) + FileFactory.getCarbonFile(metadataFolderPath).listFiles(new CarbonFileFilter { + override def accept(file: CarbonFile): Boolean = { + file.getName.endsWith(CarbonTablePath.TABLE_STATUS_FILE) + } + }) + } + + test("test insert into many segments and check segment count and data count") { Review comment: can you please add all the primitive data type and validate the data for one row ? (As from your description, complex is not yet supported) ########## File path: integration/presto/src/test/scala/org/apache/carbondata/presto/integrationtest/PrestoInsertIntoTableTestCase.scala ########## @@ -0,0 +1,207 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.presto.integrationtest + +import java.io.File +import java.util +import java.util.UUID +import java.util.concurrent.{Callable, Executor, Executors, Future} + +import scala.collection.JavaConverters._ + +import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuiteLike} + +import org.apache.carbondata.common.logging.LogServiceFactory +import org.apache.carbondata.core.constants.CarbonCommonConstants +import org.apache.carbondata.core.datastore.filesystem.{CarbonFile, CarbonFileFilter} +import org.apache.carbondata.core.datastore.impl.FileFactory +import org.apache.carbondata.core.metadata.schema.SchemaReader +import org.apache.carbondata.core.metadata.{AbsoluteTableIdentifier, CarbonTableIdentifier} +import org.apache.carbondata.core.statusmanager.SegmentStatusManager +import org.apache.carbondata.core.util.path.CarbonTablePath +import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil} +import org.apache.carbondata.presto.server.PrestoServer +import org.apache.carbondata.presto.util.CarbonDataStoreCreator + +class PrestoInsertIntoTableTestCase extends FunSuiteLike with BeforeAndAfterAll with BeforeAndAfterEach { + + private val logger = LogServiceFactory + .getLogService(classOf[PrestoAllDataTypeTest].getCanonicalName) + + private val rootPath = new File(this.getClass.getResource("/").getPath + + "../../../..").getCanonicalPath + private val storePath = s"$rootPath/integration/presto/target/store" + private val prestoServer = new PrestoServer + private val executorService = Executors.newFixedThreadPool(1) + + override def beforeAll: Unit = { + CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_WRITTEN_BY_APPNAME, + "Presto") + val map = new util.HashMap[String, String]() + map.put("hive.metastore", "file") + map.put("hive.metastore.catalog.dir", s"file://$storePath") + map.put("hive.allow-drop-table", "true") + prestoServer.startServer("testdb", map) + prestoServer.execute("drop schema if exists testdb") + prestoServer.execute("create schema testdb") + } + + override protected def beforeEach(): Unit = { + val query = "create table testdb.testtable(ID int, date date, country varchar, name varchar, phonetype varchar, serialname varchar,salary decimal(6,1), bonus decimal(8,6), monthlyBonus decimal(5,3), dob timestamp, shortField smallint, iscurrentemployee boolean) with(format='CARBONDATA') " + createTable(query, "testdb", "testtable") + } + + private def createTable(query: String, databaseName: String, tableName: String): Unit = { + prestoServer.execute(s"drop table if exists ${databaseName}.${tableName}") + prestoServer.execute(query) + logger.info("Creating The Carbon Store") + val absoluteTableIdentifier: AbsoluteTableIdentifier = getAbsoluteIdentifier(databaseName, tableName) + CarbonDataStoreCreator.createTable(absoluteTableIdentifier, true) + logger.info(s"\nCarbon store is created at location: $storePath") + } + + private def getAbsoluteIdentifier(dbName: String, + tableName: String) = { + val absoluteTableIdentifier = AbsoluteTableIdentifier.from( + storePath + "/" + dbName + "/" + tableName, + new CarbonTableIdentifier(dbName, + tableName, + UUID.randomUUID().toString)) + absoluteTableIdentifier + } + + test("test insert with different storage format names") { + val query1 = "create table testdb.testtable(ID int, date date, country varchar, name varchar, phonetype varchar, serialname varchar,salary decimal(6,1), bonus decimal(8,6), monthlyBonus decimal(5,3), dob timestamp, shortField smallint, iscurrentemployee boolean) with(format='CARBONDATA') " + val query2 = "create table testdb.testtable(ID int, date date, country varchar, name varchar, phonetype varchar, serialname varchar,salary decimal(6,1), bonus decimal(8,6), monthlyBonus decimal(5,3), dob timestamp, shortField smallint, iscurrentemployee boolean) with(format='CARBON') " + val query3 = "create table testdb.testtable(ID int, date date, country varchar, name varchar, phonetype varchar, serialname varchar,salary decimal(6,1), bonus decimal(8,6), monthlyBonus decimal(5,3), dob timestamp, shortField smallint, iscurrentemployee boolean) with(format='ORG.APACHE.CARBONDATA.FORMAT') " + createTable(query1, "testdb", "testtable") Review comment: we allow same table name with different storage format ?! ########## File path: integration/presto/src/test/scala/org/apache/carbondata/presto/integrationtest/PrestoInsertIntoTableTestCase.scala ########## @@ -0,0 +1,207 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.presto.integrationtest + +import java.io.File +import java.util +import java.util.UUID +import java.util.concurrent.{Callable, Executor, Executors, Future} + +import scala.collection.JavaConverters._ + +import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuiteLike} + +import org.apache.carbondata.common.logging.LogServiceFactory +import org.apache.carbondata.core.constants.CarbonCommonConstants +import org.apache.carbondata.core.datastore.filesystem.{CarbonFile, CarbonFileFilter} +import org.apache.carbondata.core.datastore.impl.FileFactory +import org.apache.carbondata.core.metadata.schema.SchemaReader +import org.apache.carbondata.core.metadata.{AbsoluteTableIdentifier, CarbonTableIdentifier} +import org.apache.carbondata.core.statusmanager.SegmentStatusManager +import org.apache.carbondata.core.util.path.CarbonTablePath +import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil} +import org.apache.carbondata.presto.server.PrestoServer +import org.apache.carbondata.presto.util.CarbonDataStoreCreator + +class PrestoInsertIntoTableTestCase extends FunSuiteLike with BeforeAndAfterAll with BeforeAndAfterEach { + + private val logger = LogServiceFactory + .getLogService(classOf[PrestoAllDataTypeTest].getCanonicalName) + + private val rootPath = new File(this.getClass.getResource("/").getPath + + "../../../..").getCanonicalPath + private val storePath = s"$rootPath/integration/presto/target/store" + private val prestoServer = new PrestoServer + private val executorService = Executors.newFixedThreadPool(1) + + override def beforeAll: Unit = { + CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_WRITTEN_BY_APPNAME, + "Presto") + val map = new util.HashMap[String, String]() + map.put("hive.metastore", "file") + map.put("hive.metastore.catalog.dir", s"file://$storePath") + map.put("hive.allow-drop-table", "true") + prestoServer.startServer("testdb", map) + prestoServer.execute("drop schema if exists testdb") + prestoServer.execute("create schema testdb") + } + + override protected def beforeEach(): Unit = { + val query = "create table testdb.testtable(ID int, date date, country varchar, name varchar, phonetype varchar, serialname varchar,salary decimal(6,1), bonus decimal(8,6), monthlyBonus decimal(5,3), dob timestamp, shortField smallint, iscurrentemployee boolean) with(format='CARBONDATA') " Review comment: is it possible to enhance the create table to take options (for table properties) or it is already supported ? ########## File path: integration/presto/src/test/scala/org/apache/carbondata/presto/integrationtest/PrestoInsertIntoTableTestCase.scala ########## @@ -0,0 +1,207 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.presto.integrationtest + +import java.io.File +import java.util +import java.util.UUID +import java.util.concurrent.{Callable, Executor, Executors, Future} + +import scala.collection.JavaConverters._ + +import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuiteLike} + +import org.apache.carbondata.common.logging.LogServiceFactory +import org.apache.carbondata.core.constants.CarbonCommonConstants +import org.apache.carbondata.core.datastore.filesystem.{CarbonFile, CarbonFileFilter} +import org.apache.carbondata.core.datastore.impl.FileFactory +import org.apache.carbondata.core.metadata.schema.SchemaReader +import org.apache.carbondata.core.metadata.{AbsoluteTableIdentifier, CarbonTableIdentifier} +import org.apache.carbondata.core.statusmanager.SegmentStatusManager +import org.apache.carbondata.core.util.path.CarbonTablePath +import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil} +import org.apache.carbondata.presto.server.PrestoServer +import org.apache.carbondata.presto.util.CarbonDataStoreCreator + +class PrestoInsertIntoTableTestCase extends FunSuiteLike with BeforeAndAfterAll with BeforeAndAfterEach { + + private val logger = LogServiceFactory + .getLogService(classOf[PrestoAllDataTypeTest].getCanonicalName) + + private val rootPath = new File(this.getClass.getResource("/").getPath + + "../../../..").getCanonicalPath + private val storePath = s"$rootPath/integration/presto/target/store" + private val prestoServer = new PrestoServer + private val executorService = Executors.newFixedThreadPool(1) + + override def beforeAll: Unit = { + CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_WRITTEN_BY_APPNAME, + "Presto") + val map = new util.HashMap[String, String]() + map.put("hive.metastore", "file") + map.put("hive.metastore.catalog.dir", s"file://$storePath") + map.put("hive.allow-drop-table", "true") + prestoServer.startServer("testdb", map) + prestoServer.execute("drop schema if exists testdb") + prestoServer.execute("create schema testdb") + } + + override protected def beforeEach(): Unit = { + val query = "create table testdb.testtable(ID int, date date, country varchar, name varchar, phonetype varchar, serialname varchar,salary decimal(6,1), bonus decimal(8,6), monthlyBonus decimal(5,3), dob timestamp, shortField smallint, iscurrentemployee boolean) with(format='CARBONDATA') " + createTable(query, "testdb", "testtable") + } + + private def createTable(query: String, databaseName: String, tableName: String): Unit = { + prestoServer.execute(s"drop table if exists ${databaseName}.${tableName}") + prestoServer.execute(query) + logger.info("Creating The Carbon Store") + val absoluteTableIdentifier: AbsoluteTableIdentifier = getAbsoluteIdentifier(databaseName, tableName) + CarbonDataStoreCreator.createTable(absoluteTableIdentifier, true) + logger.info(s"\nCarbon store is created at location: $storePath") + } + + private def getAbsoluteIdentifier(dbName: String, + tableName: String) = { + val absoluteTableIdentifier = AbsoluteTableIdentifier.from( + storePath + "/" + dbName + "/" + tableName, + new CarbonTableIdentifier(dbName, + tableName, + UUID.randomUUID().toString)) + absoluteTableIdentifier + } + + test("test insert with different storage format names") { + val query1 = "create table testdb.testtable(ID int, date date, country varchar, name varchar, phonetype varchar, serialname varchar,salary decimal(6,1), bonus decimal(8,6), monthlyBonus decimal(5,3), dob timestamp, shortField smallint, iscurrentemployee boolean) with(format='CARBONDATA') " + val query2 = "create table testdb.testtable(ID int, date date, country varchar, name varchar, phonetype varchar, serialname varchar,salary decimal(6,1), bonus decimal(8,6), monthlyBonus decimal(5,3), dob timestamp, shortField smallint, iscurrentemployee boolean) with(format='CARBON') " + val query3 = "create table testdb.testtable(ID int, date date, country varchar, name varchar, phonetype varchar, serialname varchar,salary decimal(6,1), bonus decimal(8,6), monthlyBonus decimal(5,3), dob timestamp, shortField smallint, iscurrentemployee boolean) with(format='ORG.APACHE.CARBONDATA.FORMAT') " + createTable(query1, "testdb", "testtable") + createTable(query2, "testdb", "testtable") + createTable(query3, "testdb", "testtable") + prestoServer.execute("insert into testdb.testtable values(10, current_date, 'INDIA', 'Chandler', 'qwerty', 'usn20392',10000.0,16.234567,25.678,timestamp '1994-06-14 05:00:09',smallint '23', true)") + val absoluteTableIdentifier: AbsoluteTableIdentifier = getAbsoluteIdentifier("testdb", "testtable") + val carbonTable = SchemaReader.readCarbonTableFromStore(absoluteTableIdentifier) + val segmentPath = CarbonTablePath.getSegmentPath(carbonTable.getTablePath, "0") + assert(FileFactory.getCarbonFile(segmentPath).isFileExist) + } + + test("test insert into one segment and check folder structure") { + prestoServer.execute("insert into testdb.testtable values(10, current_date, 'INDIA', 'Chandler', 'qwerty', 'usn20392',10000.0,16.234567,25.678,timestamp '1994-06-14 05:00:09',smallint '23', true)") + prestoServer.execute("insert into testdb.testtable values(10, current_date, 'INDIA', 'Chandler', 'qwerty', 'usn20392',10000.0,16.234567,25.678,timestamp '1994-06-14 05:00:09',smallint '23', true)") + val absoluteTableIdentifier: AbsoluteTableIdentifier = getAbsoluteIdentifier("testdb", "testtable") + val carbonTable = SchemaReader.readCarbonTableFromStore(absoluteTableIdentifier) + val tablePath = carbonTable.getTablePath + val segment0Path = CarbonTablePath.getSegmentPath(tablePath, "0") + val segment1Path = CarbonTablePath.getSegmentPath(tablePath, "1") + val segment0 = FileFactory.getCarbonFile(segment0Path) + assert(segment0.isFileExist) + assert(segment0.listFiles(new CarbonFileFilter { + override def accept(file: CarbonFile): Boolean = { + file.getName.endsWith(CarbonTablePath.CARBON_DATA_EXT) || + file.getName.endsWith(CarbonTablePath.MERGE_INDEX_FILE_EXT) + } + }).length == 2) + val segment1 = FileFactory.getCarbonFile(segment1Path) + assert(segment1.isFileExist) + assert(segment1.listFiles(new CarbonFileFilter { + override def accept(file: CarbonFile): Boolean = { + file.getName.endsWith(CarbonTablePath.CARBON_DATA_EXT) || + file.getName.endsWith(CarbonTablePath.MERGE_INDEX_FILE_EXT) + } + }).length == 2) + val segmentsPath = CarbonTablePath.getSegmentFilesLocation(tablePath) + assert(FileFactory.getCarbonFile(segmentsPath).isFileExist && FileFactory.getCarbonFile(segmentsPath).listFiles(true).size() == 2) + val metadataFolderPath = CarbonTablePath.getMetadataPath(tablePath) + FileFactory.getCarbonFile(metadataFolderPath).listFiles(new CarbonFileFilter { + override def accept(file: CarbonFile): Boolean = { + file.getName.endsWith(CarbonTablePath.TABLE_STATUS_FILE) + } + }) + } + + test("test insert into many segments and check segment count and data count") { + prestoServer.execute("insert into testdb.testtable values(10, current_date, 'INDIA', 'Chandler', 'qwerty', 'usn20392',10000.0,16.234567,25.678,timestamp '1994-06-14 05:00:09',smallint '23', true)") + prestoServer.execute("insert into testdb.testtable values(10, current_date, 'INDIA', 'Chandler', 'qwerty', 'usn20392',10000.0,16.234567,25.678,timestamp '1998-12-16 10:12:09',smallint '23', true)") + prestoServer.execute("insert into testdb.testtable values(10, current_date, 'INDIA', 'Chandler', 'qwerty', 'usn20392',10000.0,16.234567,25.678,timestamp '1994-06-14 05:00:09',smallint '23', true)") + prestoServer.execute("insert into testdb.testtable values(10, current_date, 'INDIA', 'Chandler', 'qwerty', 'usn20392',10000.0,16.234567,25.678,timestamp '1998-12-16 10:12:09',smallint '23', true)") + val absoluteTableIdentifier: AbsoluteTableIdentifier = getAbsoluteIdentifier("testdb", "testtable") + val carbonTable = SchemaReader.readCarbonTableFromStore(absoluteTableIdentifier) + val segmentFoldersLocation = CarbonTablePath.getPartitionDir(carbonTable.getTablePath) + assert(FileFactory.getCarbonFile(segmentFoldersLocation).listFiles(false).size() == 8) + val actualResult1: List[Map[String, Any]] = prestoServer + .executeQuery("select count(*) AS RESULT from testdb.testtable") + val expectedResult1: List[Map[String, Any]] = List(Map("RESULT" -> 4)) + assert(actualResult1.equals(expectedResult1)) + // filter query + val actualResult2: List[Map[String, Any]] = prestoServer + .executeQuery("select count(*) AS RESULT from testdb.testtable WHERE dob = timestamp '1998-12-16 10:12:09'") + val expectedResult2: List[Map[String, Any]] = List(Map("RESULT" -> 2)) + assert(actualResult2.equals(expectedResult2)) + } + + test("test if the table status contains the segment file name for each load") { + prestoServer.execute("insert into testdb.testtable values(10, current_date, 'INDIA', 'Chandler', 'qwerty', 'usn20392',10000.0,16.234567,25.678,timestamp '1994-06-14 05:00:09',smallint '23', true)") + val absoluteTableIdentifier: AbsoluteTableIdentifier = getAbsoluteIdentifier("testdb", "testtable") + val carbonTable = SchemaReader.readCarbonTableFromStore(absoluteTableIdentifier) + val ssm = new SegmentStatusManager(carbonTable.getAbsoluteTableIdentifier) + ssm.getValidAndInvalidSegments.getValidSegments.asScala.foreach { segment => + val loadMetadataDetails = segment.getLoadMetadataDetails + assert(loadMetadataDetails.getSegmentFile != null) + } + } + + test("test for query when insert in progress") { + prestoServer.execute("insert into testdb.testtable values(10, current_date, 'INDIA', 'Chandler', 'qwerty', 'usn20392',10000.0,16.234567,25.678,timestamp '1994-06-14 05:00:09',smallint '23', true)") + val query = "insert into testdb.testtable values(10, current_date, 'INDIA', 'Chandler', 'qwerty', 'usn20392',10000.0,16.234567,25.678,timestamp '1994-06-14 05:00:09',smallint '23', true)" + val asyncQuery = runSqlAsync(query) + val actualResult1: List[Map[String, Any]] = prestoServer.executeQuery("select count(*) AS RESULT from testdb.testtable WHERE dob = timestamp '1994-06-14 05:00:09'") + val expectedResult1: List[Map[String, Any]] = List(Map("RESULT" -> 1)) + assert(actualResult1.equals(expectedResult1)) + assert(asyncQuery.get().equalsIgnoreCase("PASS")) + val actualResult2: List[Map[String, Any]] = prestoServer.executeQuery("select count(*) AS RESULT from testdb.testtable WHERE dob = timestamp '1994-06-14 05:00:09'") + val expectedResult2: List[Map[String, Any]] = List(Map("RESULT" -> 2)) + assert(actualResult2.equals(expectedResult2)) + } + + class QueryTask(query: String) extends Callable[String] { + override def call(): String = { + var result = "PASS" + try { + prestoServer.execute(query) + } catch { + case ex: Exception => + println(ex.printStackTrace()) + result = "FAIL" + } + result + } + } + + private def runSqlAsync(sql: String): Future[String] = { + val future = executorService.submit( + new QueryTask(sql) + ) + Thread.sleep(2) + future + } + + override def afterAll(): Unit = { Review comment: In the cluster please test concurrent insert ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
akashrn5 commented on a change in pull request #3875: URL: https://github.com/apache/carbondata/pull/3875#discussion_r508236769 ########## File path: integration/hive/src/main/java/org/apache/carbondata/hive/MapredCarbonOutputFormat.java ########## @@ -92,6 +95,14 @@ public void checkOutputSpecs(FileSystem fileSystem, JobConf jobConf) throws IOEx } String tablePath = FileFactory.getCarbonFile(carbonLoadModel.getTablePath()).getAbsolutePath(); TaskAttemptID taskAttemptID = TaskAttemptID.forName(jc.get("mapred.task.id")); + // taskAttemptID will be null when the insert job is fired from presto. Presto send the JobConf + // and since presto does not use the MR framework for execution, the mapred.task.id will be + // null, so prepare a new ID. + if (taskAttemptID == null) { + SimpleDateFormat formatter = new SimpleDateFormat("yyyyMMddHHmm"); + String jobTrackerId = formatter.format(new Date()); + taskAttemptID = new TaskAttemptID(jobTrackerId, 0, TaskType.MAP, 0, 0); Review comment: Here `taskAttemptID ` is `TaskAttemptID` object. Since for every writer it creates new task, there should be no problem. We get the jobconf from presto, we prepare the taskattemptid just for writer close purpose and initialize, so it should be fine i guess. what you think? With respect to ORC writer if you see, ORC uses the different `FIleOutPutFormat `from `mapred `package, we use `mapreduce `package, In mapred, taskcontext is not used, so they are not using this. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
akashrn5 commented on a change in pull request #3875: URL: https://github.com/apache/carbondata/pull/3875#discussion_r508264164 ########## File path: integration/presto/src/main/prestosql/org/apache/carbondata/presto/CarbonDataInsertTableHandle.java ########## @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.presto; + +import java.util.List; +import java.util.Map; +import java.util.Optional; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.collect.ImmutableMap; +import io.prestosql.plugin.hive.HiveBucketProperty; +import io.prestosql.plugin.hive.HiveColumnHandle; +import io.prestosql.plugin.hive.HiveInsertTableHandle; +import io.prestosql.plugin.hive.HiveStorageFormat; +import io.prestosql.plugin.hive.LocationHandle; +import io.prestosql.plugin.hive.metastore.HivePageSinkMetadata; +import io.prestosql.spi.connector.ConnectorInsertTableHandle; + +import static java.util.Objects.requireNonNull; + +public class CarbonDataInsertTableHandle extends HiveInsertTableHandle implements Review comment: yes, we need this. This is because we need to send the load model to all the workers during writing, which we had prepared in the `carbondataMetadata ``beginInsert`(), We do not have any info where we can add load model and send to worker. So we need `CarbonDataInsertTableHandle `, where we add in `additionalConf ` and send the load model to workers to support transaction ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
akashrn5 commented on a change in pull request #3875: URL: https://github.com/apache/carbondata/pull/3875#discussion_r508265258 ########## File path: integration/presto/src/main/prestosql/org/apache/carbondata/presto/CarbonDataFileWriter.java ########## @@ -0,0 +1,188 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.presto; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.Arrays; +import java.util.List; +import java.util.Properties; + +import org.apache.carbondata.common.logging.LogServiceFactory; +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.hadoop.api.CarbonTableOutputFormat; +import org.apache.carbondata.hive.CarbonHiveSerDe; +import org.apache.carbondata.hive.MapredCarbonOutputFormat; +import org.apache.carbondata.presto.impl.CarbonTableConfig; + +import com.google.common.collect.ImmutableList; +import io.prestosql.plugin.hive.HiveFileWriter; +import io.prestosql.plugin.hive.HiveType; +import io.prestosql.plugin.hive.HiveWriteUtils; +import io.prestosql.spi.Page; +import io.prestosql.spi.PrestoException; +import io.prestosql.spi.block.Block; +import io.prestosql.spi.type.Type; +import io.prestosql.spi.type.TypeManager; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.FileSinkOperator; +import org.apache.hadoop.hive.ql.io.HiveOutputFormat; +import org.apache.hadoop.hive.ql.io.IOConstants; +import org.apache.hadoop.hive.ql.io.parquet.serde.ArrayWritableObjectInspector; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.Reporter; +import org.apache.log4j.Logger; + +import static com.google.common.collect.ImmutableList.toImmutableList; +import static io.prestosql.plugin.hive.HiveErrorCode.HIVE_WRITER_DATA_ERROR; +import static java.util.Objects.requireNonNull; +import static java.util.stream.Collectors.toList; +import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.COMPRESSRESULT; + +/** + * This class implements HiveFileWriter and it creates the carbonFileWriter to write the page data + * sent from presto. + */ +public class CarbonDataFileWriter implements HiveFileWriter { + + private static final Logger LOG = + LogServiceFactory.getLogService(CarbonDataFileWriter.class.getName()); + + private final JobConf configuration; + private final Path outPutPath; + private final FileSinkOperator.RecordWriter recordWriter; + private final CarbonHiveSerDe serDe; + private final int fieldCount; + private final Object row; + private final SettableStructObjectInspector tableInspector; + private final List<StructField> structFields; + private final HiveWriteUtils.FieldSetter[] setters; + + private boolean isCommitDone; + + public CarbonDataFileWriter(Path outPutPath, List<String> inputColumnNames, Properties properties, + JobConf configuration, TypeManager typeManager) throws SerDeException { + requireNonNull(outPutPath, "path is null"); + // take the outputPath same as location in compliance with the carbon store folder structure. + this.outPutPath = new Path(properties.getProperty("location")); + this.configuration = requireNonNull(configuration, "conf is null"); + List<String> columnNames = Arrays + .asList(properties.getProperty(IOConstants.COLUMNS, "").split(CarbonCommonConstants.COMMA)); + List<Type> fileColumnTypes = + HiveType.toHiveTypes(properties.getProperty(IOConstants.COLUMNS_TYPES, "")).stream() + .map(hiveType -> hiveType.getType(typeManager)).collect(toList()); + this.fieldCount = columnNames.size(); + this.serDe = new CarbonHiveSerDe(); + serDe.initialize(configuration, properties); + this.tableInspector = (ArrayWritableObjectInspector) serDe.getObjectInspector(); + + this.structFields = + ImmutableList.copyOf(inputColumnNames.stream().map(tableInspector::getStructFieldRef) + .collect(toImmutableList())); + + this.row = tableInspector.create(); + + this.setters = new HiveWriteUtils.FieldSetter[structFields.size()]; + for (int i = 0; i < setters.length; i++) { + setters[i] = HiveWriteUtils.createFieldSetter(tableInspector, row, structFields.get(i), + fileColumnTypes.get(structFields.get(i).getFieldID())); + } + String encodedLoadModel = this.configuration.get(CarbonTableConfig.CARBON_PRESTO_LOAD_MODEL); + if (StringUtils.isNotEmpty(encodedLoadModel)) { + this.configuration.set(CarbonTableOutputFormat.LOAD_MODEL, encodedLoadModel); + } + try { + boolean compress = HiveConf.getBoolVar(this.configuration, COMPRESSRESULT); + Object writer = + Class.forName(MapredCarbonOutputFormat.class.getName()).getConstructor().newInstance(); + this.recordWriter = ((MapredCarbonOutputFormat<?>) writer) + .getHiveRecordWriter(this.configuration, this.outPutPath, Text.class, compress, + properties, Reporter.NULL); + } catch (Exception e) { + LOG.error("error while initializing writer", e); + throw new RuntimeException("writer class not found"); + } + } + + @Override + public long getWrittenBytes() { + if (isCommitDone) { + try { + return outPutPath.getFileSystem(configuration).getFileStatus(outPutPath).getLen(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + return 0; + } + + @Override + public long getSystemMemoryUsage() { + return 0; Review comment: actually, whoever extends the `HiveWriter`, needs to be implement these and cant throw exceptions because, `Hivewriterfactory `calls these methods for each writer registered, so if we throw exceptions here, insert job will fail. May be we need to implements these methods with stats support later once the insert is finished. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
akashrn5 commented on a change in pull request #3875: URL: https://github.com/apache/carbondata/pull/3875#discussion_r508269521 ########## File path: integration/presto/src/main/prestosql/org/apache/carbondata/presto/CarbonDataLocationService.java ########## @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.presto; + +import com.google.inject.Inject; +import io.prestosql.plugin.hive.HdfsEnvironment; +import io.prestosql.plugin.hive.HiveLocationService; +import io.prestosql.plugin.hive.HiveWriteUtils; +import io.prestosql.plugin.hive.LocationHandle; +import io.prestosql.plugin.hive.metastore.SemiTransactionalHiveMetastore; +import io.prestosql.plugin.hive.metastore.Table; +import io.prestosql.spi.connector.ConnectorSession; +import org.apache.hadoop.fs.Path; + +public class CarbonDataLocationService extends HiveLocationService { Review comment: we need `CarbonDataLocationService ` because, `HiveLocationService` always the temporary directory as store path and then move to staging directory, but we need both `targetPath `and `writePath` same and for S3 also it will work. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
akashrn5 commented on a change in pull request #3875: URL: https://github.com/apache/carbondata/pull/3875#discussion_r508270595 ########## File path: integration/presto/src/main/prestosql/org/apache/carbondata/presto/CarbonDataLocationService.java ########## @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.presto; + +import com.google.inject.Inject; +import io.prestosql.plugin.hive.HdfsEnvironment; +import io.prestosql.plugin.hive.HiveLocationService; +import io.prestosql.plugin.hive.HiveWriteUtils; +import io.prestosql.plugin.hive.LocationHandle; +import io.prestosql.plugin.hive.metastore.SemiTransactionalHiveMetastore; +import io.prestosql.plugin.hive.metastore.Table; +import io.prestosql.spi.connector.ConnectorSession; +import org.apache.hadoop.fs.Path; + +public class CarbonDataLocationService extends HiveLocationService { + + private final HdfsEnvironment hdfsEnvironment; + + @Inject + public CarbonDataLocationService(HdfsEnvironment hdfsEnvironment) { + super(hdfsEnvironment); + this.hdfsEnvironment = hdfsEnvironment; + } + + @Override + public LocationHandle forNewTable(SemiTransactionalHiveMetastore metastore, + ConnectorSession session, String schemaName, String tableName) { + // TODO: test in cloud scenario in S3/OBS and make it compatible for cloud scenario + super.forNewTable(metastore, session, schemaName, tableName); + HdfsEnvironment.HdfsContext context = + new HdfsEnvironment.HdfsContext(session, schemaName, tableName); + Path targetPath = HiveWriteUtils + .getTableDefaultLocation(context, metastore, this.hdfsEnvironment, schemaName, tableName); + return new LocationHandle(targetPath, targetPath, false, + LocationHandle.WriteMode.DIRECT_TO_TARGET_NEW_DIRECTORY); + } + + @Override + public LocationHandle forExistingTable(SemiTransactionalHiveMetastore metastore, + ConnectorSession session, Table table) { + // TODO: test in cloud scenario in S3/OBS and make it compatible for cloud scenario Review comment: testing on S3 not completed, as i dont have the environment, but i added a todo here just for tracking, because since we use both `target `and `writepath `same, it should work in s3 also, once i will check if i get any env to check, else once i check i will remove the comment from here. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
akashrn5 commented on a change in pull request #3875: URL: https://github.com/apache/carbondata/pull/3875#discussion_r508272165 ########## File path: integration/presto/src/main/prestosql/org/apache/carbondata/presto/CarbonDataPageSinkProvider.java ########## @@ -0,0 +1,182 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.presto; + +import java.util.List; +import java.util.Map; +import java.util.OptionalInt; +import java.util.Set; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import com.google.common.util.concurrent.ListeningExecutorService; +import com.google.inject.Inject; +import io.airlift.event.client.EventClient; +import io.airlift.json.JsonCodec; +import io.airlift.units.DataSize; +import io.prestosql.plugin.hive.HdfsEnvironment; +import io.prestosql.plugin.hive.HiveConfig; +import io.prestosql.plugin.hive.HiveFileWriterFactory; +import io.prestosql.plugin.hive.HivePageSink; +import io.prestosql.plugin.hive.HivePageSinkProvider; +import io.prestosql.plugin.hive.HiveSessionProperties; +import io.prestosql.plugin.hive.HiveWritableTableHandle; +import io.prestosql.plugin.hive.HiveWriterStats; +import io.prestosql.plugin.hive.LocationService; +import io.prestosql.plugin.hive.OrcFileWriterFactory; +import io.prestosql.plugin.hive.PartitionUpdate; +import io.prestosql.plugin.hive.metastore.HiveMetastore; +import io.prestosql.plugin.hive.metastore.HivePageSinkMetadataProvider; +import io.prestosql.plugin.hive.metastore.SortingColumn; +import io.prestosql.spi.NodeManager; +import io.prestosql.spi.PageIndexerFactory; +import io.prestosql.spi.PageSorter; +import io.prestosql.spi.connector.ConnectorInsertTableHandle; +import io.prestosql.spi.connector.ConnectorPageSink; +import io.prestosql.spi.connector.ConnectorSession; +import io.prestosql.spi.connector.ConnectorTransactionHandle; +import io.prestosql.spi.type.TypeManager; + +import static com.google.common.util.concurrent.MoreExecutors.listeningDecorator; +import static io.airlift.concurrent.Threads.daemonThreadsNamed; +import static io.prestosql.plugin.hive.metastore.CachingHiveMetastore.memoizeMetastore; +import static java.util.Objects.requireNonNull; +import static java.util.concurrent.Executors.newFixedThreadPool; + +public class CarbonDataPageSinkProvider extends HivePageSinkProvider { + + private final Set<HiveFileWriterFactory> fileWriterFactories; + private final HdfsEnvironment hdfsEnvironment; + private final PageSorter pageSorter; + private final HiveMetastore metastore; + private final PageIndexerFactory pageIndexerFactory; + private final TypeManager typeManager; + private final int maxOpenPartitions; + private final int maxOpenSortFiles; + private final DataSize writerSortBufferSize; + private final boolean immutablePartitions; + private final LocationService locationService; + private final ListeningExecutorService writeVerificationExecutor; + private final JsonCodec<PartitionUpdate> partitionUpdateCodec; + private final NodeManager nodeManager; + private final EventClient eventClient; + private final HiveSessionProperties hiveSessionProperties; + private final HiveWriterStats hiveWriterStats; + private final OrcFileWriterFactory orcFileWriterFactory; + private final long perTransactionMetastoreCacheMaximumSize; + + @Inject + public CarbonDataPageSinkProvider(Set<HiveFileWriterFactory> fileWriterFactories, + HdfsEnvironment hdfsEnvironment, PageSorter pageSorter, HiveMetastore metastore, + PageIndexerFactory pageIndexerFactory, TypeManager typeManager, HiveConfig config, + LocationService locationService, JsonCodec<PartitionUpdate> partitionUpdateCodec, + NodeManager nodeManager, EventClient eventClient, HiveSessionProperties hiveSessionProperties, + HiveWriterStats hiveWriterStats, OrcFileWriterFactory orcFileWriterFactory) { + super(fileWriterFactories, hdfsEnvironment, pageSorter, metastore, pageIndexerFactory, Review comment: Since no default constructor available in `HivePageSinkProvider`, we need to call super. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
akashrn5 commented on a change in pull request #3875: URL: https://github.com/apache/carbondata/pull/3875#discussion_r508273208 ########## File path: integration/presto/src/test/scala/org/apache/carbondata/presto/integrationtest/PrestoInsertIntoTableTestCase.scala ########## @@ -0,0 +1,207 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.presto.integrationtest + +import java.io.File +import java.util +import java.util.UUID +import java.util.concurrent.{Callable, Executor, Executors, Future} + +import scala.collection.JavaConverters._ + +import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuiteLike} + +import org.apache.carbondata.common.logging.LogServiceFactory +import org.apache.carbondata.core.constants.CarbonCommonConstants +import org.apache.carbondata.core.datastore.filesystem.{CarbonFile, CarbonFileFilter} +import org.apache.carbondata.core.datastore.impl.FileFactory +import org.apache.carbondata.core.metadata.schema.SchemaReader +import org.apache.carbondata.core.metadata.{AbsoluteTableIdentifier, CarbonTableIdentifier} +import org.apache.carbondata.core.statusmanager.SegmentStatusManager +import org.apache.carbondata.core.util.path.CarbonTablePath +import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil} +import org.apache.carbondata.presto.server.PrestoServer +import org.apache.carbondata.presto.util.CarbonDataStoreCreator + +class PrestoInsertIntoTableTestCase extends FunSuiteLike with BeforeAndAfterAll with BeforeAndAfterEach { + + private val logger = LogServiceFactory + .getLogService(classOf[PrestoAllDataTypeTest].getCanonicalName) + + private val rootPath = new File(this.getClass.getResource("/").getPath + + "../../../..").getCanonicalPath + private val storePath = s"$rootPath/integration/presto/target/store" + private val prestoServer = new PrestoServer + private val executorService = Executors.newFixedThreadPool(1) + + override def beforeAll: Unit = { + CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_WRITTEN_BY_APPNAME, + "Presto") + val map = new util.HashMap[String, String]() + map.put("hive.metastore", "file") + map.put("hive.metastore.catalog.dir", s"file://$storePath") + map.put("hive.allow-drop-table", "true") + prestoServer.startServer("testdb", map) + prestoServer.execute("drop schema if exists testdb") + prestoServer.execute("create schema testdb") + } + + override protected def beforeEach(): Unit = { + val query = "create table testdb.testtable(ID int, date date, country varchar, name varchar, phonetype varchar, serialname varchar,salary decimal(6,1), bonus decimal(8,6), monthlyBonus decimal(5,3), dob timestamp, shortField smallint, iscurrentemployee boolean) with(format='CARBONDATA') " + createTable(query, "testdb", "testtable") + } + + private def createTable(query: String, databaseName: String, tableName: String): Unit = { + prestoServer.execute(s"drop table if exists ${databaseName}.${tableName}") + prestoServer.execute(query) + logger.info("Creating The Carbon Store") + val absoluteTableIdentifier: AbsoluteTableIdentifier = getAbsoluteIdentifier(databaseName, tableName) + CarbonDataStoreCreator.createTable(absoluteTableIdentifier, true) + logger.info(s"\nCarbon store is created at location: $storePath") + } + + private def getAbsoluteIdentifier(dbName: String, + tableName: String) = { + val absoluteTableIdentifier = AbsoluteTableIdentifier.from( + storePath + "/" + dbName + "/" + tableName, + new CarbonTableIdentifier(dbName, + tableName, + UUID.randomUUID().toString)) + absoluteTableIdentifier + } + + test("test insert with different storage format names") { + val query1 = "create table testdb.testtable(ID int, date date, country varchar, name varchar, phonetype varchar, serialname varchar,salary decimal(6,1), bonus decimal(8,6), monthlyBonus decimal(5,3), dob timestamp, shortField smallint, iscurrentemployee boolean) with(format='CARBONDATA') " + val query2 = "create table testdb.testtable(ID int, date date, country varchar, name varchar, phonetype varchar, serialname varchar,salary decimal(6,1), bonus decimal(8,6), monthlyBonus decimal(5,3), dob timestamp, shortField smallint, iscurrentemployee boolean) with(format='CARBON') " + val query3 = "create table testdb.testtable(ID int, date date, country varchar, name varchar, phonetype varchar, serialname varchar,salary decimal(6,1), bonus decimal(8,6), monthlyBonus decimal(5,3), dob timestamp, shortField smallint, iscurrentemployee boolean) with(format='ORG.APACHE.CARBONDATA.FORMAT') " + createTable(query1, "testdb", "testtable") Review comment: No, this is just test cases to test different format names, if you see `createTable()` method, im dropping the table first and creating ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
akashrn5 commented on a change in pull request #3875: URL: https://github.com/apache/carbondata/pull/3875#discussion_r508280110 ########## File path: integration/presto/src/test/scala/org/apache/carbondata/presto/integrationtest/PrestoInsertIntoTableTestCase.scala ########## @@ -0,0 +1,207 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.presto.integrationtest + +import java.io.File +import java.util +import java.util.UUID +import java.util.concurrent.{Callable, Executor, Executors, Future} + +import scala.collection.JavaConverters._ + +import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuiteLike} + +import org.apache.carbondata.common.logging.LogServiceFactory +import org.apache.carbondata.core.constants.CarbonCommonConstants +import org.apache.carbondata.core.datastore.filesystem.{CarbonFile, CarbonFileFilter} +import org.apache.carbondata.core.datastore.impl.FileFactory +import org.apache.carbondata.core.metadata.schema.SchemaReader +import org.apache.carbondata.core.metadata.{AbsoluteTableIdentifier, CarbonTableIdentifier} +import org.apache.carbondata.core.statusmanager.SegmentStatusManager +import org.apache.carbondata.core.util.path.CarbonTablePath +import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil} +import org.apache.carbondata.presto.server.PrestoServer +import org.apache.carbondata.presto.util.CarbonDataStoreCreator + +class PrestoInsertIntoTableTestCase extends FunSuiteLike with BeforeAndAfterAll with BeforeAndAfterEach { + + private val logger = LogServiceFactory + .getLogService(classOf[PrestoAllDataTypeTest].getCanonicalName) + + private val rootPath = new File(this.getClass.getResource("/").getPath + + "../../../..").getCanonicalPath + private val storePath = s"$rootPath/integration/presto/target/store" + private val prestoServer = new PrestoServer + private val executorService = Executors.newFixedThreadPool(1) + + override def beforeAll: Unit = { + CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_WRITTEN_BY_APPNAME, Review comment: Thanks for reminding this, yes, here not needed, removed this and please check `CarbonMapredOutputFormat`, the static block adds app name as `hive`, but when its called from presto flow, the `mapred.task.id` will be null, in that case i will override with `appname `as `presto`. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
akashrn5 commented on a change in pull request #3875: URL: https://github.com/apache/carbondata/pull/3875#discussion_r508281094 ########## File path: integration/presto/src/test/scala/org/apache/carbondata/presto/integrationtest/PrestoInsertIntoTableTestCase.scala ########## @@ -0,0 +1,207 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.presto.integrationtest + +import java.io.File +import java.util +import java.util.UUID +import java.util.concurrent.{Callable, Executor, Executors, Future} + +import scala.collection.JavaConverters._ + +import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuiteLike} + +import org.apache.carbondata.common.logging.LogServiceFactory +import org.apache.carbondata.core.constants.CarbonCommonConstants +import org.apache.carbondata.core.datastore.filesystem.{CarbonFile, CarbonFileFilter} +import org.apache.carbondata.core.datastore.impl.FileFactory +import org.apache.carbondata.core.metadata.schema.SchemaReader +import org.apache.carbondata.core.metadata.{AbsoluteTableIdentifier, CarbonTableIdentifier} +import org.apache.carbondata.core.statusmanager.SegmentStatusManager +import org.apache.carbondata.core.util.path.CarbonTablePath +import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil} +import org.apache.carbondata.presto.server.PrestoServer +import org.apache.carbondata.presto.util.CarbonDataStoreCreator + +class PrestoInsertIntoTableTestCase extends FunSuiteLike with BeforeAndAfterAll with BeforeAndAfterEach { + + private val logger = LogServiceFactory + .getLogService(classOf[PrestoAllDataTypeTest].getCanonicalName) + + private val rootPath = new File(this.getClass.getResource("/").getPath + + "../../../..").getCanonicalPath + private val storePath = s"$rootPath/integration/presto/target/store" + private val prestoServer = new PrestoServer + private val executorService = Executors.newFixedThreadPool(1) + + override def beforeAll: Unit = { + CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_WRITTEN_BY_APPNAME, + "Presto") + val map = new util.HashMap[String, String]() + map.put("hive.metastore", "file") + map.put("hive.metastore.catalog.dir", s"file://$storePath") + map.put("hive.allow-drop-table", "true") + prestoServer.startServer("testdb", map) + prestoServer.execute("drop schema if exists testdb") + prestoServer.execute("create schema testdb") + } + + override protected def beforeEach(): Unit = { + val query = "create table testdb.testtable(ID int, date date, country varchar, name varchar, phonetype varchar, serialname varchar,salary decimal(6,1), bonus decimal(8,6), monthlyBonus decimal(5,3), dob timestamp, shortField smallint, iscurrentemployee boolean) with(format='CARBONDATA') " + createTable(query, "testdb", "testtable") + } + + private def createTable(query: String, databaseName: String, tableName: String): Unit = { + prestoServer.execute(s"drop table if exists ${databaseName}.${tableName}") + prestoServer.execute(query) + logger.info("Creating The Carbon Store") + val absoluteTableIdentifier: AbsoluteTableIdentifier = getAbsoluteIdentifier(databaseName, tableName) + CarbonDataStoreCreator.createTable(absoluteTableIdentifier, true) + logger.info(s"\nCarbon store is created at location: $storePath") + } + + private def getAbsoluteIdentifier(dbName: String, + tableName: String) = { + val absoluteTableIdentifier = AbsoluteTableIdentifier.from( + storePath + "/" + dbName + "/" + tableName, + new CarbonTableIdentifier(dbName, + tableName, + UUID.randomUUID().toString)) + absoluteTableIdentifier + } + + test("test insert with different storage format names") { + val query1 = "create table testdb.testtable(ID int, date date, country varchar, name varchar, phonetype varchar, serialname varchar,salary decimal(6,1), bonus decimal(8,6), monthlyBonus decimal(5,3), dob timestamp, shortField smallint, iscurrentemployee boolean) with(format='CARBONDATA') " + val query2 = "create table testdb.testtable(ID int, date date, country varchar, name varchar, phonetype varchar, serialname varchar,salary decimal(6,1), bonus decimal(8,6), monthlyBonus decimal(5,3), dob timestamp, shortField smallint, iscurrentemployee boolean) with(format='CARBON') " + val query3 = "create table testdb.testtable(ID int, date date, country varchar, name varchar, phonetype varchar, serialname varchar,salary decimal(6,1), bonus decimal(8,6), monthlyBonus decimal(5,3), dob timestamp, shortField smallint, iscurrentemployee boolean) with(format='ORG.APACHE.CARBONDATA.FORMAT') " + createTable(query1, "testdb", "testtable") + createTable(query2, "testdb", "testtable") + createTable(query3, "testdb", "testtable") + prestoServer.execute("insert into testdb.testtable values(10, current_date, 'INDIA', 'Chandler', 'qwerty', 'usn20392',10000.0,16.234567,25.678,timestamp '1994-06-14 05:00:09',smallint '23', true)") Review comment: ok, i have changed, now after every create table i will do insert, but only at last i will check for some assertions. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
akashrn5 commented on a change in pull request #3875: URL: https://github.com/apache/carbondata/pull/3875#discussion_r508282935 ########## File path: integration/presto/src/main/prestosql/org/apache/carbondata/presto/CarbonDataFileWriter.java ########## @@ -0,0 +1,188 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.presto; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.Arrays; +import java.util.List; +import java.util.Properties; + +import org.apache.carbondata.common.logging.LogServiceFactory; +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.hadoop.api.CarbonTableOutputFormat; +import org.apache.carbondata.hive.CarbonHiveSerDe; +import org.apache.carbondata.hive.MapredCarbonOutputFormat; +import org.apache.carbondata.presto.impl.CarbonTableConfig; + +import com.google.common.collect.ImmutableList; +import io.prestosql.plugin.hive.HiveFileWriter; Review comment: yes, you are right, i need to do for presto db also, but i thought, once we finish the presto-sql handling all comments, then i will directly copy and raise new PR for presto DB, it will be easy for me and as well as reviewer, as it can be directly merged. Else it will be huge PR and review will be difficult and handling will be two times. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
akashrn5 commented on a change in pull request #3875: URL: https://github.com/apache/carbondata/pull/3875#discussion_r508284573 ########## File path: integration/presto/src/test/scala/org/apache/carbondata/presto/integrationtest/PrestoInsertIntoTableTestCase.scala ########## @@ -0,0 +1,207 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.presto.integrationtest + +import java.io.File +import java.util +import java.util.UUID +import java.util.concurrent.{Callable, Executor, Executors, Future} + +import scala.collection.JavaConverters._ + +import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuiteLike} + +import org.apache.carbondata.common.logging.LogServiceFactory +import org.apache.carbondata.core.constants.CarbonCommonConstants +import org.apache.carbondata.core.datastore.filesystem.{CarbonFile, CarbonFileFilter} +import org.apache.carbondata.core.datastore.impl.FileFactory +import org.apache.carbondata.core.metadata.schema.SchemaReader +import org.apache.carbondata.core.metadata.{AbsoluteTableIdentifier, CarbonTableIdentifier} +import org.apache.carbondata.core.statusmanager.SegmentStatusManager +import org.apache.carbondata.core.util.path.CarbonTablePath +import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil} +import org.apache.carbondata.presto.server.PrestoServer +import org.apache.carbondata.presto.util.CarbonDataStoreCreator + +class PrestoInsertIntoTableTestCase extends FunSuiteLike with BeforeAndAfterAll with BeforeAndAfterEach { + + private val logger = LogServiceFactory + .getLogService(classOf[PrestoAllDataTypeTest].getCanonicalName) + + private val rootPath = new File(this.getClass.getResource("/").getPath + + "../../../..").getCanonicalPath + private val storePath = s"$rootPath/integration/presto/target/store" + private val prestoServer = new PrestoServer + private val executorService = Executors.newFixedThreadPool(1) + + override def beforeAll: Unit = { + CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_WRITTEN_BY_APPNAME, + "Presto") + val map = new util.HashMap[String, String]() + map.put("hive.metastore", "file") + map.put("hive.metastore.catalog.dir", s"file://$storePath") + map.put("hive.allow-drop-table", "true") + prestoServer.startServer("testdb", map) + prestoServer.execute("drop schema if exists testdb") + prestoServer.execute("create schema testdb") + } + + override protected def beforeEach(): Unit = { + val query = "create table testdb.testtable(ID int, date date, country varchar, name varchar, phonetype varchar, serialname varchar,salary decimal(6,1), bonus decimal(8,6), monthlyBonus decimal(5,3), dob timestamp, shortField smallint, iscurrentemployee boolean) with(format='CARBONDATA') " Review comment: no, now we haven't supported the create table right, so didn't go much deep into existing things or what presto supports, once i take up the create table support, i will handle this. Create table feature is already planned after finishing insert requirement. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
akashrn5 commented on a change in pull request #3875: URL: https://github.com/apache/carbondata/pull/3875#discussion_r508356222 ########## File path: integration/hive/src/main/java/org/apache/carbondata/hive/util/HiveCarbonUtil.java ########## @@ -137,7 +137,7 @@ public static CarbonLoadModel getCarbonLoadModel(String tableName, String databa carbonTable = CarbonTable.buildFromTableInfo( SchemaReader.inferSchema(absoluteTableIdentifier, false, configuration)); } - carbonTable.setTransactionalTable(false); + carbonTable.setTransactionalTable(true); Review comment: done ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
akashrn5 commented on a change in pull request #3875: URL: https://github.com/apache/carbondata/pull/3875#discussion_r508356404 ########## File path: integration/presto/src/main/prestosql/org/apache/carbondata/presto/CarbonDataFileWriter.java ########## @@ -0,0 +1,188 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.presto; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.Arrays; +import java.util.List; +import java.util.Properties; + +import org.apache.carbondata.common.logging.LogServiceFactory; +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.hadoop.api.CarbonTableOutputFormat; +import org.apache.carbondata.hive.CarbonHiveSerDe; +import org.apache.carbondata.hive.MapredCarbonOutputFormat; +import org.apache.carbondata.presto.impl.CarbonTableConfig; + +import com.google.common.collect.ImmutableList; +import io.prestosql.plugin.hive.HiveFileWriter; +import io.prestosql.plugin.hive.HiveType; +import io.prestosql.plugin.hive.HiveWriteUtils; +import io.prestosql.spi.Page; +import io.prestosql.spi.PrestoException; +import io.prestosql.spi.block.Block; +import io.prestosql.spi.type.Type; +import io.prestosql.spi.type.TypeManager; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.FileSinkOperator; +import org.apache.hadoop.hive.ql.io.HiveOutputFormat; +import org.apache.hadoop.hive.ql.io.IOConstants; +import org.apache.hadoop.hive.ql.io.parquet.serde.ArrayWritableObjectInspector; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.Reporter; +import org.apache.log4j.Logger; + +import static com.google.common.collect.ImmutableList.toImmutableList; +import static io.prestosql.plugin.hive.HiveErrorCode.HIVE_WRITER_DATA_ERROR; +import static java.util.Objects.requireNonNull; +import static java.util.stream.Collectors.toList; +import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.COMPRESSRESULT; + +/** + * This class implements HiveFileWriter and it creates the carbonFileWriter to write the page data + * sent from presto. + */ +public class CarbonDataFileWriter implements HiveFileWriter { + + private static final Logger LOG = + LogServiceFactory.getLogService(CarbonDataFileWriter.class.getName()); + + private final JobConf configuration; + private final Path outPutPath; + private final FileSinkOperator.RecordWriter recordWriter; + private final CarbonHiveSerDe serDe; + private final int fieldCount; + private final Object row; + private final SettableStructObjectInspector tableInspector; + private final List<StructField> structFields; + private final HiveWriteUtils.FieldSetter[] setters; + + private boolean isCommitDone; + + public CarbonDataFileWriter(Path outPutPath, List<String> inputColumnNames, Properties properties, + JobConf configuration, TypeManager typeManager) throws SerDeException { + requireNonNull(outPutPath, "path is null"); + // take the outputPath same as location in compliance with the carbon store folder structure. + this.outPutPath = new Path(properties.getProperty("location")); + this.configuration = requireNonNull(configuration, "conf is null"); + List<String> columnNames = Arrays + .asList(properties.getProperty(IOConstants.COLUMNS, "").split(CarbonCommonConstants.COMMA)); + List<Type> fileColumnTypes = + HiveType.toHiveTypes(properties.getProperty(IOConstants.COLUMNS_TYPES, "")).stream() + .map(hiveType -> hiveType.getType(typeManager)).collect(toList()); + this.fieldCount = columnNames.size(); + this.serDe = new CarbonHiveSerDe(); + serDe.initialize(configuration, properties); + this.tableInspector = (ArrayWritableObjectInspector) serDe.getObjectInspector(); + + this.structFields = + ImmutableList.copyOf(inputColumnNames.stream().map(tableInspector::getStructFieldRef) + .collect(toImmutableList())); + + this.row = tableInspector.create(); + + this.setters = new HiveWriteUtils.FieldSetter[structFields.size()]; + for (int i = 0; i < setters.length; i++) { + setters[i] = HiveWriteUtils.createFieldSetter(tableInspector, row, structFields.get(i), + fileColumnTypes.get(structFields.get(i).getFieldID())); + } + String encodedLoadModel = this.configuration.get(CarbonTableConfig.CARBON_PRESTO_LOAD_MODEL); + if (StringUtils.isNotEmpty(encodedLoadModel)) { + this.configuration.set(CarbonTableOutputFormat.LOAD_MODEL, encodedLoadModel); + } + try { + boolean compress = HiveConf.getBoolVar(this.configuration, COMPRESSRESULT); + Object writer = + Class.forName(MapredCarbonOutputFormat.class.getName()).getConstructor().newInstance(); + this.recordWriter = ((MapredCarbonOutputFormat<?>) writer) + .getHiveRecordWriter(this.configuration, this.outPutPath, Text.class, compress, + properties, Reporter.NULL); + } catch (Exception e) { + LOG.error("error while initializing writer", e); + throw new RuntimeException("writer class not found"); + } + } + + @Override + public long getWrittenBytes() { + if (isCommitDone) { + try { + return outPutPath.getFileSystem(configuration).getFileStatus(outPutPath).getLen(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + return 0; + } + + @Override + public long getSystemMemoryUsage() { + return 0; + } + + @Override + public void appendRows(Page dataPage) { + for (int position = 0; position < dataPage.getPositionCount(); position++) { + appendRow(dataPage, position); + } + } + + private void appendRow(Page dataPage, int position) { + for (int field = 0; field < fieldCount; field++) { + Block block = dataPage.getBlock(field); + if (block.isNull(position)) { + tableInspector.setStructFieldData(row, structFields.get(field), null); + } else { + setters[field].setField(block, position); + } + } + try { + recordWriter.write(serDe.serialize(row, tableInspector)); + } catch (SerDeException | IOException e) { + throw new PrestoException(HIVE_WRITER_DATA_ERROR, e); Review comment: handled in rollback ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
CarbonDataQA1 commented on pull request #3875: URL: https://github.com/apache/carbondata/pull/3875#issuecomment-712726856 Build Failed with Spark 2.3.4, Please check CI http://121.244.95.60:12545/job/ApacheCarbonPRBuilder2.3/4531/ ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
akashrn5 commented on a change in pull request #3875: URL: https://github.com/apache/carbondata/pull/3875#discussion_r508357056 ########## File path: integration/presto/src/test/scala/org/apache/carbondata/presto/integrationtest/PrestoInsertIntoTableTestCase.scala ########## @@ -0,0 +1,207 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.presto.integrationtest + +import java.io.File +import java.util +import java.util.UUID +import java.util.concurrent.{Callable, Executor, Executors, Future} + +import scala.collection.JavaConverters._ + +import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuiteLike} + +import org.apache.carbondata.common.logging.LogServiceFactory +import org.apache.carbondata.core.constants.CarbonCommonConstants +import org.apache.carbondata.core.datastore.filesystem.{CarbonFile, CarbonFileFilter} +import org.apache.carbondata.core.datastore.impl.FileFactory +import org.apache.carbondata.core.metadata.schema.SchemaReader +import org.apache.carbondata.core.metadata.{AbsoluteTableIdentifier, CarbonTableIdentifier} +import org.apache.carbondata.core.statusmanager.SegmentStatusManager +import org.apache.carbondata.core.util.path.CarbonTablePath +import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil} +import org.apache.carbondata.presto.server.PrestoServer +import org.apache.carbondata.presto.util.CarbonDataStoreCreator + +class PrestoInsertIntoTableTestCase extends FunSuiteLike with BeforeAndAfterAll with BeforeAndAfterEach { + + private val logger = LogServiceFactory + .getLogService(classOf[PrestoAllDataTypeTest].getCanonicalName) + + private val rootPath = new File(this.getClass.getResource("/").getPath + + "../../../..").getCanonicalPath + private val storePath = s"$rootPath/integration/presto/target/store" + private val prestoServer = new PrestoServer + private val executorService = Executors.newFixedThreadPool(1) + + override def beforeAll: Unit = { + CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_WRITTEN_BY_APPNAME, + "Presto") + val map = new util.HashMap[String, String]() + map.put("hive.metastore", "file") + map.put("hive.metastore.catalog.dir", s"file://$storePath") + map.put("hive.allow-drop-table", "true") + prestoServer.startServer("testdb", map) + prestoServer.execute("drop schema if exists testdb") + prestoServer.execute("create schema testdb") + } + + override protected def beforeEach(): Unit = { + val query = "create table testdb.testtable(ID int, date date, country varchar, name varchar, phonetype varchar, serialname varchar,salary decimal(6,1), bonus decimal(8,6), monthlyBonus decimal(5,3), dob timestamp, shortField smallint, iscurrentemployee boolean) with(format='CARBONDATA') " + createTable(query, "testdb", "testtable") + } + + private def createTable(query: String, databaseName: String, tableName: String): Unit = { + prestoServer.execute(s"drop table if exists ${databaseName}.${tableName}") + prestoServer.execute(query) + logger.info("Creating The Carbon Store") + val absoluteTableIdentifier: AbsoluteTableIdentifier = getAbsoluteIdentifier(databaseName, tableName) + CarbonDataStoreCreator.createTable(absoluteTableIdentifier, true) + logger.info(s"\nCarbon store is created at location: $storePath") + } + + private def getAbsoluteIdentifier(dbName: String, + tableName: String) = { + val absoluteTableIdentifier = AbsoluteTableIdentifier.from( + storePath + "/" + dbName + "/" + tableName, + new CarbonTableIdentifier(dbName, + tableName, + UUID.randomUUID().toString)) + absoluteTableIdentifier + } + + test("test insert with different storage format names") { + val query1 = "create table testdb.testtable(ID int, date date, country varchar, name varchar, phonetype varchar, serialname varchar,salary decimal(6,1), bonus decimal(8,6), monthlyBonus decimal(5,3), dob timestamp, shortField smallint, iscurrentemployee boolean) with(format='CARBONDATA') " + val query2 = "create table testdb.testtable(ID int, date date, country varchar, name varchar, phonetype varchar, serialname varchar,salary decimal(6,1), bonus decimal(8,6), monthlyBonus decimal(5,3), dob timestamp, shortField smallint, iscurrentemployee boolean) with(format='CARBON') " + val query3 = "create table testdb.testtable(ID int, date date, country varchar, name varchar, phonetype varchar, serialname varchar,salary decimal(6,1), bonus decimal(8,6), monthlyBonus decimal(5,3), dob timestamp, shortField smallint, iscurrentemployee boolean) with(format='ORG.APACHE.CARBONDATA.FORMAT') " + createTable(query1, "testdb", "testtable") + createTable(query2, "testdb", "testtable") + createTable(query3, "testdb", "testtable") + prestoServer.execute("insert into testdb.testtable values(10, current_date, 'INDIA', 'Chandler', 'qwerty', 'usn20392',10000.0,16.234567,25.678,timestamp '1994-06-14 05:00:09',smallint '23', true)") + val absoluteTableIdentifier: AbsoluteTableIdentifier = getAbsoluteIdentifier("testdb", "testtable") + val carbonTable = SchemaReader.readCarbonTableFromStore(absoluteTableIdentifier) + val segmentPath = CarbonTablePath.getSegmentPath(carbonTable.getTablePath, "0") + assert(FileFactory.getCarbonFile(segmentPath).isFileExist) + } + + test("test insert into one segment and check folder structure") { + prestoServer.execute("insert into testdb.testtable values(10, current_date, 'INDIA', 'Chandler', 'qwerty', 'usn20392',10000.0,16.234567,25.678,timestamp '1994-06-14 05:00:09',smallint '23', true)") + prestoServer.execute("insert into testdb.testtable values(10, current_date, 'INDIA', 'Chandler', 'qwerty', 'usn20392',10000.0,16.234567,25.678,timestamp '1994-06-14 05:00:09',smallint '23', true)") + val absoluteTableIdentifier: AbsoluteTableIdentifier = getAbsoluteIdentifier("testdb", "testtable") + val carbonTable = SchemaReader.readCarbonTableFromStore(absoluteTableIdentifier) + val tablePath = carbonTable.getTablePath + val segment0Path = CarbonTablePath.getSegmentPath(tablePath, "0") + val segment1Path = CarbonTablePath.getSegmentPath(tablePath, "1") + val segment0 = FileFactory.getCarbonFile(segment0Path) + assert(segment0.isFileExist) + assert(segment0.listFiles(new CarbonFileFilter { + override def accept(file: CarbonFile): Boolean = { + file.getName.endsWith(CarbonTablePath.CARBON_DATA_EXT) || + file.getName.endsWith(CarbonTablePath.MERGE_INDEX_FILE_EXT) + } + }).length == 2) + val segment1 = FileFactory.getCarbonFile(segment1Path) + assert(segment1.isFileExist) + assert(segment1.listFiles(new CarbonFileFilter { + override def accept(file: CarbonFile): Boolean = { + file.getName.endsWith(CarbonTablePath.CARBON_DATA_EXT) || + file.getName.endsWith(CarbonTablePath.MERGE_INDEX_FILE_EXT) + } + }).length == 2) + val segmentsPath = CarbonTablePath.getSegmentFilesLocation(tablePath) + assert(FileFactory.getCarbonFile(segmentsPath).isFileExist && FileFactory.getCarbonFile(segmentsPath).listFiles(true).size() == 2) + val metadataFolderPath = CarbonTablePath.getMetadataPath(tablePath) + FileFactory.getCarbonFile(metadataFolderPath).listFiles(new CarbonFileFilter { + override def accept(file: CarbonFile): Boolean = { + file.getName.endsWith(CarbonTablePath.TABLE_STATUS_FILE) + } + }) + } + + test("test insert into many segments and check segment count and data count") { + prestoServer.execute("insert into testdb.testtable values(10, current_date, 'INDIA', 'Chandler', 'qwerty', 'usn20392',10000.0,16.234567,25.678,timestamp '1994-06-14 05:00:09',smallint '23', true)") + prestoServer.execute("insert into testdb.testtable values(10, current_date, 'INDIA', 'Chandler', 'qwerty', 'usn20392',10000.0,16.234567,25.678,timestamp '1998-12-16 10:12:09',smallint '23', true)") + prestoServer.execute("insert into testdb.testtable values(10, current_date, 'INDIA', 'Chandler', 'qwerty', 'usn20392',10000.0,16.234567,25.678,timestamp '1994-06-14 05:00:09',smallint '23', true)") + prestoServer.execute("insert into testdb.testtable values(10, current_date, 'INDIA', 'Chandler', 'qwerty', 'usn20392',10000.0,16.234567,25.678,timestamp '1998-12-16 10:12:09',smallint '23', true)") + val absoluteTableIdentifier: AbsoluteTableIdentifier = getAbsoluteIdentifier("testdb", "testtable") + val carbonTable = SchemaReader.readCarbonTableFromStore(absoluteTableIdentifier) + val segmentFoldersLocation = CarbonTablePath.getPartitionDir(carbonTable.getTablePath) + assert(FileFactory.getCarbonFile(segmentFoldersLocation).listFiles(false).size() == 8) + val actualResult1: List[Map[String, Any]] = prestoServer + .executeQuery("select count(*) AS RESULT from testdb.testtable") + val expectedResult1: List[Map[String, Any]] = List(Map("RESULT" -> 4)) + assert(actualResult1.equals(expectedResult1)) + // filter query + val actualResult2: List[Map[String, Any]] = prestoServer + .executeQuery("select count(*) AS RESULT from testdb.testtable WHERE dob = timestamp '1998-12-16 10:12:09'") + val expectedResult2: List[Map[String, Any]] = List(Map("RESULT" -> 2)) + assert(actualResult2.equals(expectedResult2)) + } + + test("test if the table status contains the segment file name for each load") { + prestoServer.execute("insert into testdb.testtable values(10, current_date, 'INDIA', 'Chandler', 'qwerty', 'usn20392',10000.0,16.234567,25.678,timestamp '1994-06-14 05:00:09',smallint '23', true)") + val absoluteTableIdentifier: AbsoluteTableIdentifier = getAbsoluteIdentifier("testdb", "testtable") + val carbonTable = SchemaReader.readCarbonTableFromStore(absoluteTableIdentifier) + val ssm = new SegmentStatusManager(carbonTable.getAbsoluteTableIdentifier) + ssm.getValidAndInvalidSegments.getValidSegments.asScala.foreach { segment => + val loadMetadataDetails = segment.getLoadMetadataDetails + assert(loadMetadataDetails.getSegmentFile != null) + } + } + + test("test for query when insert in progress") { + prestoServer.execute("insert into testdb.testtable values(10, current_date, 'INDIA', 'Chandler', 'qwerty', 'usn20392',10000.0,16.234567,25.678,timestamp '1994-06-14 05:00:09',smallint '23', true)") + val query = "insert into testdb.testtable values(10, current_date, 'INDIA', 'Chandler', 'qwerty', 'usn20392',10000.0,16.234567,25.678,timestamp '1994-06-14 05:00:09',smallint '23', true)" + val asyncQuery = runSqlAsync(query) + val actualResult1: List[Map[String, Any]] = prestoServer.executeQuery("select count(*) AS RESULT from testdb.testtable WHERE dob = timestamp '1994-06-14 05:00:09'") + val expectedResult1: List[Map[String, Any]] = List(Map("RESULT" -> 1)) + assert(actualResult1.equals(expectedResult1)) + assert(asyncQuery.get().equalsIgnoreCase("PASS")) + val actualResult2: List[Map[String, Any]] = prestoServer.executeQuery("select count(*) AS RESULT from testdb.testtable WHERE dob = timestamp '1994-06-14 05:00:09'") + val expectedResult2: List[Map[String, Any]] = List(Map("RESULT" -> 2)) + assert(actualResult2.equals(expectedResult2)) + } + + class QueryTask(query: String) extends Callable[String] { + override def call(): String = { + var result = "PASS" + try { + prestoServer.execute(query) + } catch { + case ex: Exception => + println(ex.printStackTrace()) + result = "FAIL" + } + result + } + } + + private def runSqlAsync(sql: String): Future[String] = { + val future = executorService.submit( + new QueryTask(sql) + ) + Thread.sleep(2) + future + } + + override def afterAll(): Unit = { Review comment: added in document, need to test in cluster about the concurrent test ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
QiangCai commented on pull request #3875: URL: https://github.com/apache/carbondata/pull/3875#issuecomment-712728179 I am checking CI random failure, so I abort CI for this PR. After that, I will retest this pr. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
CarbonDataQA1 commented on pull request #3875: URL: https://github.com/apache/carbondata/pull/3875#issuecomment-712729345 Build Failed with Spark 2.4.5, Please check CI http://121.244.95.60:12545/job/ApacheCarbon_PR_Builder_2.4.5/2777/ ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
ajantha-bhat commented on a change in pull request #3875: URL: https://github.com/apache/carbondata/pull/3875#discussion_r508430591 ########## File path: integration/hive/src/main/java/org/apache/carbondata/hive/MapredCarbonOutputFormat.java ########## @@ -92,6 +95,14 @@ public void checkOutputSpecs(FileSystem fileSystem, JobConf jobConf) throws IOEx } String tablePath = FileFactory.getCarbonFile(carbonLoadModel.getTablePath()).getAbsolutePath(); TaskAttemptID taskAttemptID = TaskAttemptID.forName(jc.get("mapred.task.id")); + // taskAttemptID will be null when the insert job is fired from presto. Presto send the JobConf + // and since presto does not use the MR framework for execution, the mapred.task.id will be + // null, so prepare a new ID. + if (taskAttemptID == null) { + SimpleDateFormat formatter = new SimpleDateFormat("yyyyMMddHHmm"); + String jobTrackerId = formatter.format(new Date()); + taskAttemptID = new TaskAttemptID(jobTrackerId, 0, TaskType.MAP, 0, 0); Review comment: ok, If this task number is used in file name, in case of non-transactional concurrent write. two files can have same file name leading to many issues. so, I suggested UUID. you can check again. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
Free forum by Nabble | Edit this page |