Github user QiangCai commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1970#discussion_r167481586 --- Diff: processing/src/main/java/org/apache/carbondata/processing/loading/model/LoadOption.java --- @@ -0,0 +1,245 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.processing.loading.model; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import org.apache.carbondata.common.Maps; +import org.apache.carbondata.common.annotations.InterfaceAudience; +import org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException; +import org.apache.carbondata.common.logging.LogService; +import org.apache.carbondata.common.logging.LogServiceFactory; +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.constants.CarbonLoadOptionConstants; +import org.apache.carbondata.core.util.CarbonProperties; +import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.processing.loading.exception.CarbonDataLoadingException; +import org.apache.carbondata.processing.util.CarbonDataProcessorUtil; +import org.apache.carbondata.processing.util.CarbonLoaderUtil; + +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.conf.Configuration; + +@InterfaceAudience.Developer +public class LoadOption { + + private static LogService LOG = LogServiceFactory.getLogService(LoadOption.class.getName()); + + /** + * get data loading options and initialise default value + */ + public static Map<String, String> fillOptionWithDefaultValue( + Map<String, String> options) throws InvalidLoadOptionException { + Map<String, String> optionsFinal = new HashMap<>(); + optionsFinal.put("delimiter", Maps.getOrDefault(options, "delimiter", ",")); + optionsFinal.put("quotechar", Maps.getOrDefault(options, "quotechar", "\"")); + optionsFinal.put("fileheader", Maps.getOrDefault(options, "fileheader", "")); + optionsFinal.put("commentchar", Maps.getOrDefault(options, "commentchar", "#")); + optionsFinal.put("columndict", Maps.getOrDefault(options, "columndict", null)); + + optionsFinal.put( + "escapechar", + CarbonLoaderUtil.getEscapeChar(Maps.getOrDefault(options,"escapechar", "\\"))); + + optionsFinal.put( + "serialization_null_format", + Maps.getOrDefault(options, "serialization_null_format", "\\N")); + + optionsFinal.put( + "bad_records_logger_enable", + Maps.getOrDefault( + options, + "bad_records_logger_enable", + CarbonProperties.getInstance().getProperty( + CarbonLoadOptionConstants.CARBON_OPTIONS_BAD_RECORDS_LOGGER_ENABLE, + CarbonLoadOptionConstants.CARBON_OPTIONS_BAD_RECORDS_LOGGER_ENABLE_DEFAULT))); + + String badRecordActionValue = CarbonProperties.getInstance().getProperty( + CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION, + CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION_DEFAULT); + + optionsFinal.put( + "bad_records_action", + Maps.getOrDefault( + options, + "bad_records_action", + CarbonProperties.getInstance().getProperty( + CarbonLoadOptionConstants.CARBON_OPTIONS_BAD_RECORDS_ACTION, + badRecordActionValue))); + + optionsFinal.put( + "is_empty_data_bad_record", + Maps.getOrDefault( + options, + "is_empty_data_bad_record", + CarbonProperties.getInstance().getProperty( + CarbonLoadOptionConstants.CARBON_OPTIONS_IS_EMPTY_DATA_BAD_RECORD, + CarbonLoadOptionConstants.CARBON_OPTIONS_IS_EMPTY_DATA_BAD_RECORD_DEFAULT))); + + optionsFinal.put( + "skip_empty_line", + Maps.getOrDefault( + options, + "skip_empty_line", + CarbonProperties.getInstance().getProperty( + CarbonLoadOptionConstants.CARBON_OPTIONS_SKIP_EMPTY_LINE))); + + optionsFinal.put( + "all_dictionary_path", + Maps.getOrDefault(options, "all_dictionary_path", "")); + + optionsFinal.put( + "complex_delimiter_level_1", + Maps.getOrDefault(options,"complex_delimiter_level_1", "\\$")); + + optionsFinal.put( + "complex_delimiter_level_2", + Maps.getOrDefault(options, "complex_delimiter_level_2", "\\:")); + + optionsFinal.put( + "dateformat", + Maps.getOrDefault( + options, + "dateformat", + CarbonProperties.getInstance().getProperty( + CarbonLoadOptionConstants.CARBON_OPTIONS_DATEFORMAT, + CarbonLoadOptionConstants.CARBON_OPTIONS_DATEFORMAT_DEFAULT))); + + optionsFinal.put( + "timestampformat", + Maps.getOrDefault( + options, + "timestampformat", + CarbonProperties.getInstance().getProperty( + CarbonLoadOptionConstants.CARBON_OPTIONS_TIMESTAMPFORMAT, + CarbonLoadOptionConstants.CARBON_OPTIONS_TIMESTAMPFORMAT_DEFAULT))); + + optionsFinal.put( + "global_sort_partitions", + Maps.getOrDefault( + options, + "global_sort_partitions", + CarbonProperties.getInstance().getProperty( + CarbonLoadOptionConstants.CARBON_OPTIONS_GLOBAL_SORT_PARTITIONS, + null))); + + optionsFinal.put("maxcolumns", Maps.getOrDefault(options, "maxcolumns", null)); + + optionsFinal.put( + "batch_sort_size_inmb", + Maps.getOrDefault( + options, + "batch_sort_size_inmb", + CarbonProperties.getInstance().getProperty( + CarbonLoadOptionConstants.CARBON_OPTIONS_BATCH_SORT_SIZE_INMB, + CarbonProperties.getInstance().getProperty( + CarbonCommonConstants.LOAD_BATCH_SORT_SIZE_INMB, + CarbonCommonConstants.LOAD_BATCH_SORT_SIZE_INMB_DEFAULT)))); + + optionsFinal.put( + "bad_record_path", + Maps.getOrDefault( + options, + "bad_record_path", + CarbonProperties.getInstance().getProperty( + CarbonLoadOptionConstants.CARBON_OPTIONS_BAD_RECORD_PATH, + CarbonProperties.getInstance().getProperty( + CarbonCommonConstants.CARBON_BADRECORDS_LOC, + CarbonCommonConstants.CARBON_BADRECORDS_LOC_DEFAULT_VAL)))); + + String useOnePass = Maps.getOrDefault( + options, + "single_pass", + CarbonProperties.getInstance().getProperty( + CarbonLoadOptionConstants.CARBON_OPTIONS_SINGLE_PASS, + CarbonLoadOptionConstants.CARBON_OPTIONS_SINGLE_PASS_DEFAULT)).trim().toLowerCase(); + + boolean singlePass; + + if (useOnePass.equalsIgnoreCase("true")) { + singlePass = true; + } else { + // when single_pass = false and if either alldictionarypath + // or columnDict is configured the do not allow load + if (StringUtils.isNotEmpty(optionsFinal.get("all_dictionary_path")) || StringUtils --- End diff -- move last StringUitls to next line --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1970#discussion_r167482163 --- Diff: integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datetype/DateTypeTest.scala --- @@ -16,10 +16,11 @@ */ package org.apache.carbondata.spark.testsuite.datetype -import org.apache.carbondata.spark.exception.MalformedCarbonCommandException import org.apache.spark.sql.test.util.QueryTest import org.scalatest.BeforeAndAfterAll +import org.apache.carbondata.common.exceptions.sql.MalformedCarbonCommandException + --- End diff -- fixed --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1970#discussion_r167482170 --- Diff: core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentStatusManager.java --- @@ -708,4 +710,129 @@ public static Boolean checkIfValidLoadInProgress(AbsoluteTableIdentifier absolut } } + private static boolean isLoadDeletionRequired(String metaDataLocation) { + LoadMetadataDetails[] details = SegmentStatusManager.readLoadMetadata(metaDataLocation); + if (details != null && details.length > 0) { + for (LoadMetadataDetails oneRow : details) { + if ((SegmentStatus.MARKED_FOR_DELETE == oneRow.getSegmentStatus() + || SegmentStatus.COMPACTED == oneRow.getSegmentStatus() + || SegmentStatus.INSERT_IN_PROGRESS == oneRow.getSegmentStatus() + || SegmentStatus.INSERT_OVERWRITE_IN_PROGRESS == oneRow.getSegmentStatus()) + && oneRow.getVisibility().equalsIgnoreCase("true")) { + return true; + } + } + } + return false; + } + + /** + * This will update the old table status details before clean files to the latest table status. + * @param oldList + * @param newList + * @return + */ + public static List<LoadMetadataDetails> updateLoadMetadataFromOldToNew( + LoadMetadataDetails[] oldList, LoadMetadataDetails[] newList) { + + List<LoadMetadataDetails> newListMetadata = + new ArrayList<LoadMetadataDetails>(Arrays.asList(newList)); + for (LoadMetadataDetails oldSegment : oldList) { + if ("false".equalsIgnoreCase(oldSegment.getVisibility())) { + newListMetadata.get(newListMetadata.indexOf(oldSegment)).setVisibility("false"); + } + } + return newListMetadata; + } + + private static void writeLoadMetadata(AbsoluteTableIdentifier identifier, + List<LoadMetadataDetails> listOfLoadFolderDetails) throws IOException { + String dataLoadLocation = CarbonTablePath.getTableStatusFilePath(identifier.getTablePath()); + + DataOutputStream dataOutputStream; + Gson gsonObjectToWrite = new Gson(); + BufferedWriter brWriter = null; + + AtomicFileOperations writeOperation = + new AtomicFileOperationsImpl(dataLoadLocation, FileFactory.getFileType(dataLoadLocation)); + + try { + + dataOutputStream = writeOperation.openForWrite(FileWriteOperation.OVERWRITE); + brWriter = new BufferedWriter(new OutputStreamWriter(dataOutputStream, + Charset.forName(CarbonCommonConstants.DEFAULT_CHARSET))); + + String metadataInstance = gsonObjectToWrite.toJson(listOfLoadFolderDetails.toArray()); + brWriter.write(metadataInstance); + } finally { + try { + if (null != brWriter) { + brWriter.flush(); + } + } catch (Exception e) { + LOG.error("error in flushing "); + + } + CarbonUtil.closeStreams(brWriter); + writeOperation.close(); + } + } + + public static void deleteLoadsAndUpdateMetadata( + CarbonTable carbonTable, + boolean isForceDeletion) throws IOException { + if (isLoadDeletionRequired(carbonTable.getMetadataPath())) { + LoadMetadataDetails[] details = + SegmentStatusManager.readLoadMetadata(carbonTable.getMetadataPath()); + AbsoluteTableIdentifier identifier = carbonTable.getAbsoluteTableIdentifier(); + ICarbonLock carbonTableStatusLock = CarbonLockFactory.getCarbonLockObj( + identifier, LockUsage.TABLE_STATUS_LOCK); + + // Delete marked loads + boolean isUpdationRequired = DeleteLoadFolders.deleteLoadFoldersFromFileSystem( + identifier, --- End diff -- fixed --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1970#discussion_r167482420 --- Diff: processing/src/main/java/org/apache/carbondata/processing/loading/model/LoadOption.java --- @@ -0,0 +1,245 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.processing.loading.model; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import org.apache.carbondata.common.Maps; +import org.apache.carbondata.common.annotations.InterfaceAudience; +import org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException; +import org.apache.carbondata.common.logging.LogService; +import org.apache.carbondata.common.logging.LogServiceFactory; +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.constants.CarbonLoadOptionConstants; +import org.apache.carbondata.core.util.CarbonProperties; +import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.processing.loading.exception.CarbonDataLoadingException; +import org.apache.carbondata.processing.util.CarbonDataProcessorUtil; +import org.apache.carbondata.processing.util.CarbonLoaderUtil; + +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.conf.Configuration; + +@InterfaceAudience.Developer +public class LoadOption { + + private static LogService LOG = LogServiceFactory.getLogService(LoadOption.class.getName()); + + /** + * get data loading options and initialise default value + */ + public static Map<String, String> fillOptionWithDefaultValue( + Map<String, String> options) throws InvalidLoadOptionException { + Map<String, String> optionsFinal = new HashMap<>(); + optionsFinal.put("delimiter", Maps.getOrDefault(options, "delimiter", ",")); + optionsFinal.put("quotechar", Maps.getOrDefault(options, "quotechar", "\"")); + optionsFinal.put("fileheader", Maps.getOrDefault(options, "fileheader", "")); + optionsFinal.put("commentchar", Maps.getOrDefault(options, "commentchar", "#")); + optionsFinal.put("columndict", Maps.getOrDefault(options, "columndict", null)); + + optionsFinal.put( + "escapechar", + CarbonLoaderUtil.getEscapeChar(Maps.getOrDefault(options,"escapechar", "\\"))); + + optionsFinal.put( + "serialization_null_format", + Maps.getOrDefault(options, "serialization_null_format", "\\N")); + + optionsFinal.put( + "bad_records_logger_enable", + Maps.getOrDefault( + options, + "bad_records_logger_enable", + CarbonProperties.getInstance().getProperty( + CarbonLoadOptionConstants.CARBON_OPTIONS_BAD_RECORDS_LOGGER_ENABLE, + CarbonLoadOptionConstants.CARBON_OPTIONS_BAD_RECORDS_LOGGER_ENABLE_DEFAULT))); + + String badRecordActionValue = CarbonProperties.getInstance().getProperty( + CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION, + CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION_DEFAULT); + + optionsFinal.put( + "bad_records_action", + Maps.getOrDefault( + options, + "bad_records_action", + CarbonProperties.getInstance().getProperty( + CarbonLoadOptionConstants.CARBON_OPTIONS_BAD_RECORDS_ACTION, + badRecordActionValue))); + + optionsFinal.put( + "is_empty_data_bad_record", + Maps.getOrDefault( + options, + "is_empty_data_bad_record", + CarbonProperties.getInstance().getProperty( + CarbonLoadOptionConstants.CARBON_OPTIONS_IS_EMPTY_DATA_BAD_RECORD, + CarbonLoadOptionConstants.CARBON_OPTIONS_IS_EMPTY_DATA_BAD_RECORD_DEFAULT))); + + optionsFinal.put( + "skip_empty_line", + Maps.getOrDefault( + options, + "skip_empty_line", + CarbonProperties.getInstance().getProperty( + CarbonLoadOptionConstants.CARBON_OPTIONS_SKIP_EMPTY_LINE))); + + optionsFinal.put( + "all_dictionary_path", + Maps.getOrDefault(options, "all_dictionary_path", "")); + + optionsFinal.put( + "complex_delimiter_level_1", + Maps.getOrDefault(options,"complex_delimiter_level_1", "\\$")); + + optionsFinal.put( + "complex_delimiter_level_2", + Maps.getOrDefault(options, "complex_delimiter_level_2", "\\:")); + + optionsFinal.put( + "dateformat", + Maps.getOrDefault( + options, + "dateformat", + CarbonProperties.getInstance().getProperty( + CarbonLoadOptionConstants.CARBON_OPTIONS_DATEFORMAT, + CarbonLoadOptionConstants.CARBON_OPTIONS_DATEFORMAT_DEFAULT))); + + optionsFinal.put( + "timestampformat", + Maps.getOrDefault( + options, + "timestampformat", + CarbonProperties.getInstance().getProperty( + CarbonLoadOptionConstants.CARBON_OPTIONS_TIMESTAMPFORMAT, + CarbonLoadOptionConstants.CARBON_OPTIONS_TIMESTAMPFORMAT_DEFAULT))); + + optionsFinal.put( + "global_sort_partitions", + Maps.getOrDefault( + options, + "global_sort_partitions", + CarbonProperties.getInstance().getProperty( + CarbonLoadOptionConstants.CARBON_OPTIONS_GLOBAL_SORT_PARTITIONS, + null))); + + optionsFinal.put("maxcolumns", Maps.getOrDefault(options, "maxcolumns", null)); + + optionsFinal.put( + "batch_sort_size_inmb", + Maps.getOrDefault( + options, + "batch_sort_size_inmb", + CarbonProperties.getInstance().getProperty( + CarbonLoadOptionConstants.CARBON_OPTIONS_BATCH_SORT_SIZE_INMB, + CarbonProperties.getInstance().getProperty( + CarbonCommonConstants.LOAD_BATCH_SORT_SIZE_INMB, + CarbonCommonConstants.LOAD_BATCH_SORT_SIZE_INMB_DEFAULT)))); + + optionsFinal.put( + "bad_record_path", + Maps.getOrDefault( + options, + "bad_record_path", + CarbonProperties.getInstance().getProperty( + CarbonLoadOptionConstants.CARBON_OPTIONS_BAD_RECORD_PATH, + CarbonProperties.getInstance().getProperty( + CarbonCommonConstants.CARBON_BADRECORDS_LOC, + CarbonCommonConstants.CARBON_BADRECORDS_LOC_DEFAULT_VAL)))); + + String useOnePass = Maps.getOrDefault( + options, + "single_pass", + CarbonProperties.getInstance().getProperty( + CarbonLoadOptionConstants.CARBON_OPTIONS_SINGLE_PASS, + CarbonLoadOptionConstants.CARBON_OPTIONS_SINGLE_PASS_DEFAULT)).trim().toLowerCase(); + + boolean singlePass; + + if (useOnePass.equalsIgnoreCase("true")) { + singlePass = true; + } else { + // when single_pass = false and if either alldictionarypath + // or columnDict is configured the do not allow load + if (StringUtils.isNotEmpty(optionsFinal.get("all_dictionary_path")) || StringUtils --- End diff -- fixed --- |
In reply to this post by qiuchenjian-2
Github user QiangCai commented on the issue:
https://github.com/apache/carbondata/pull/1970 LGTM --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/1970 Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/3713/ --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on the issue:
https://github.com/apache/carbondata/pull/1970 CI failed due to environment problem in CI machine. merged into carbonstore branch. --- |
In reply to this post by qiuchenjian-2
|
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/1970 Build Success with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/2474/ --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on the issue:
https://github.com/apache/carbondata/pull/1970 SDV Build Fail , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/3516/ --- |
Free forum by Nabble | Edit this page |