Github user manishgupta88 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2654#discussion_r214047186 --- Diff: processing/src/main/java/org/apache/carbondata/processing/sort/sortdata/SortParameters.java --- @@ -88,6 +88,12 @@ private DataType[] measureDataType; + private DataType[] noDicDataType; + + private DataType[] noDicSortDataType; + + private DataType[] noDicNoSortDataType; --- End diff -- write the usage of each type to get a better understanding --- |
In reply to this post by qiuchenjian-2
Github user manishgupta88 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2654#discussion_r214055654 --- Diff: processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerModel.java --- @@ -320,12 +325,22 @@ public static CarbonFactDataHandlerModel getCarbonFactDataHandlerModel(CarbonLoa List<CarbonDimension> allDimensions = carbonTable.getDimensions(); int dictDimCount = allDimensions.size() - segmentProperties.getNumberOfNoDictionaryDimension() - segmentProperties.getComplexDimensions().size(); + CarbonColumn[] noDicAndComplexColumns = + new CarbonColumn[segmentProperties.getNumberOfNoDictionaryDimension() + segmentProperties + .getComplexDimensions().size()]; + int noDic = 0; + int noDicAndComp = 0; for (CarbonDimension dim : allDimensions) { if (!dim.isComplex() && !dim.hasEncoding(Encoding.DICTIONARY) && dim.getDataType() == DataTypes.VARCHAR) { // ordinal is set in CarbonTable.fillDimensionsAndMeasuresForTables() varcharDimIdxInNoDict.add(dim.getOrdinal() - dictDimCount); } + if (!dim.hasEncoding(Encoding.DICTIONARY) || dim.isComplex() || null != dim + .getComplexParentDimension()) { + noDicAndComplexColumns[noDicAndComp++] = --- End diff -- Modify the if loop check to check only for dictionary encoding if (!dim.hasEncoding(Encoding.DICTIONARY)) --- |
In reply to this post by qiuchenjian-2
Github user manishgupta88 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2654#discussion_r214038515 --- Diff: core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java --- @@ -976,4 +978,122 @@ public static long getDataBasedOnRestructuredDataType(byte[] data, DataType rest return value; } + /** + * Check if the column is a no dictionary primitive column + * + * @param dataType + * @return + */ + public static boolean isPrimitiveColumn(DataType dataType) { + if (dataType == DataTypes.BOOLEAN || dataType == DataTypes.BYTE || dataType == DataTypes.SHORT + || dataType == DataTypes.INT || dataType == DataTypes.LONG || DataTypes.isDecimal(dataType) + || dataType == DataTypes.FLOAT || dataType == DataTypes.DOUBLE + || dataType == DataTypes.BYTE_ARRAY) { + return true; + } + return false; + } + + /** + * Put the data to unsafe memory + * + * @param dataType + * @param data + * @param baseObject + * @param address + * @param size + * @param sizeInBytes + */ + public static void putDataToUnsafe(DataType dataType, Object data, Object baseObject, --- End diff -- Create a new class CarbonUnsafeUtil and move this method there --- |
In reply to this post by qiuchenjian-2
Github user manishgupta88 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2654#discussion_r214055038 --- Diff: processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerModel.java --- @@ -320,12 +325,22 @@ public static CarbonFactDataHandlerModel getCarbonFactDataHandlerModel(CarbonLoa List<CarbonDimension> allDimensions = carbonTable.getDimensions(); int dictDimCount = allDimensions.size() - segmentProperties.getNumberOfNoDictionaryDimension() - segmentProperties.getComplexDimensions().size(); + CarbonColumn[] noDicAndComplexColumns = + new CarbonColumn[segmentProperties.getNumberOfNoDictionaryDimension() + segmentProperties + .getComplexDimensions().size()]; + int noDic = 0; --- End diff -- Remove this unused variable --- |
In reply to this post by qiuchenjian-2
Github user manishgupta88 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2654#discussion_r214049631 --- Diff: processing/src/main/java/org/apache/carbondata/processing/util/CarbonDataProcessorUtil.java --- @@ -450,6 +450,114 @@ public static boolean isHeaderValid(String tableName, String[] csvHeader, return type; } + /** + * Get the no dictionary data types on the table + * + * @param databaseName + * @param tableName + * @return + */ + public static DataType[] getNoDicDataTypes(String databaseName, String tableName) { + CarbonTable carbonTable = CarbonMetadata.getInstance().getCarbonTable(databaseName, tableName); + List<CarbonDimension> dimensions = carbonTable.getDimensionByTableName(tableName); + int noDicCount = 0; + for (int i = 0; i < dimensions.size(); i++) { + if (!dimensions.get(i).hasEncoding(Encoding.DICTIONARY)) { + noDicCount++; + } + } + DataType[] type = new DataType[noDicCount]; + noDicCount = 0; + for (int i = 0; i < dimensions.size(); i++) { + if (!dimensions.get(i).hasEncoding(Encoding.DICTIONARY)) { + type[noDicCount++] = dimensions.get(i).getDataType(); + } + } + return type; + } + + /** + * Get the no dictionary sort column mapping of the table + * + * @param databaseName + * @param tableName + * @return + */ + public static boolean[] getNoDicSortColMapping(String databaseName, String tableName) { + CarbonTable carbonTable = CarbonMetadata.getInstance().getCarbonTable(databaseName, tableName); + List<CarbonDimension> dimensions = carbonTable.getDimensionByTableName(tableName); + List<Integer> noDicIndexes = new ArrayList<>(dimensions.size()); + int noDicCount = 0; + for (int i = 0; i < dimensions.size(); i++) { + if (!dimensions.get(i).hasEncoding(Encoding.DICTIONARY)) { + noDicIndexes.add(i); + noDicCount++; + } + } + + boolean[] noDicSortColMapping = new boolean[noDicCount]; + for (int i = 0; i < noDicSortColMapping.length; i++) { + if (dimensions.get(noDicIndexes.get(i)).isSortColumn()) { + noDicSortColMapping[i] = true; + } --- End diff -- same comment as above --- |
In reply to this post by qiuchenjian-2
Github user manishgupta88 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2654#discussion_r214049718 --- Diff: processing/src/main/java/org/apache/carbondata/processing/util/CarbonDataProcessorUtil.java --- @@ -450,6 +450,114 @@ public static boolean isHeaderValid(String tableName, String[] csvHeader, return type; } + /** + * Get the no dictionary data types on the table + * + * @param databaseName + * @param tableName + * @return + */ + public static DataType[] getNoDicDataTypes(String databaseName, String tableName) { + CarbonTable carbonTable = CarbonMetadata.getInstance().getCarbonTable(databaseName, tableName); + List<CarbonDimension> dimensions = carbonTable.getDimensionByTableName(tableName); + int noDicCount = 0; + for (int i = 0; i < dimensions.size(); i++) { + if (!dimensions.get(i).hasEncoding(Encoding.DICTIONARY)) { + noDicCount++; + } + } + DataType[] type = new DataType[noDicCount]; + noDicCount = 0; + for (int i = 0; i < dimensions.size(); i++) { + if (!dimensions.get(i).hasEncoding(Encoding.DICTIONARY)) { + type[noDicCount++] = dimensions.get(i).getDataType(); + } + } + return type; + } + + /** + * Get the no dictionary sort column mapping of the table + * + * @param databaseName + * @param tableName + * @return + */ + public static boolean[] getNoDicSortColMapping(String databaseName, String tableName) { + CarbonTable carbonTable = CarbonMetadata.getInstance().getCarbonTable(databaseName, tableName); + List<CarbonDimension> dimensions = carbonTable.getDimensionByTableName(tableName); + List<Integer> noDicIndexes = new ArrayList<>(dimensions.size()); + int noDicCount = 0; + for (int i = 0; i < dimensions.size(); i++) { + if (!dimensions.get(i).hasEncoding(Encoding.DICTIONARY)) { + noDicIndexes.add(i); + noDicCount++; + } + } + + boolean[] noDicSortColMapping = new boolean[noDicCount]; + for (int i = 0; i < noDicSortColMapping.length; i++) { + if (dimensions.get(noDicIndexes.get(i)).isSortColumn()) { + noDicSortColMapping[i] = true; + } + } + return noDicSortColMapping; + } + + /** + * Get the data types of the no dictionary sort columns + * + * @param databaseName + * @param tableName + * @return + */ + public static DataType[] getNoDicSortDataTypes(String databaseName, String tableName) { + CarbonTable carbonTable = CarbonMetadata.getInstance().getCarbonTable(databaseName, tableName); + List<CarbonDimension> dimensions = carbonTable.getDimensionByTableName(tableName); + int noDicSortCount = 0; + for (int i = 0; i < dimensions.size(); i++) { + if (!dimensions.get(i).hasEncoding(Encoding.DICTIONARY) && dimensions.get(i).isSortColumn()) { + noDicSortCount++; + } + } + DataType[] type = new DataType[noDicSortCount]; + noDicSortCount = 0; + for (int i = 0; i < dimensions.size(); i++) { + if (!dimensions.get(i).hasEncoding(Encoding.DICTIONARY) && dimensions.get(i).isSortColumn()) { + type[noDicSortCount++] = dimensions.get(i).getDataType(); + } --- End diff -- same comment as above. Check on the callers and merge all 3 methods into one if possible --- |
In reply to this post by qiuchenjian-2
Github user manishgupta88 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2654#discussion_r214051061 --- Diff: processing/src/main/java/org/apache/carbondata/processing/util/CarbonDataProcessorUtil.java --- @@ -450,6 +450,114 @@ public static boolean isHeaderValid(String tableName, String[] csvHeader, return type; } + /** + * Get the no dictionary data types on the table + * + * @param databaseName + * @param tableName + * @return + */ + public static DataType[] getNoDicDataTypes(String databaseName, String tableName) { + CarbonTable carbonTable = CarbonMetadata.getInstance().getCarbonTable(databaseName, tableName); + List<CarbonDimension> dimensions = carbonTable.getDimensionByTableName(tableName); + int noDicCount = 0; + for (int i = 0; i < dimensions.size(); i++) { + if (!dimensions.get(i).hasEncoding(Encoding.DICTIONARY)) { + noDicCount++; + } + } + DataType[] type = new DataType[noDicCount]; + noDicCount = 0; + for (int i = 0; i < dimensions.size(); i++) { + if (!dimensions.get(i).hasEncoding(Encoding.DICTIONARY)) { + type[noDicCount++] = dimensions.get(i).getDataType(); + } + } + return type; + } + + /** + * Get the no dictionary sort column mapping of the table + * + * @param databaseName + * @param tableName + * @return + */ + public static boolean[] getNoDicSortColMapping(String databaseName, String tableName) { + CarbonTable carbonTable = CarbonMetadata.getInstance().getCarbonTable(databaseName, tableName); + List<CarbonDimension> dimensions = carbonTable.getDimensionByTableName(tableName); + List<Integer> noDicIndexes = new ArrayList<>(dimensions.size()); + int noDicCount = 0; + for (int i = 0; i < dimensions.size(); i++) { + if (!dimensions.get(i).hasEncoding(Encoding.DICTIONARY)) { + noDicIndexes.add(i); + noDicCount++; + } + } + + boolean[] noDicSortColMapping = new boolean[noDicCount]; + for (int i = 0; i < noDicSortColMapping.length; i++) { + if (dimensions.get(noDicIndexes.get(i)).isSortColumn()) { + noDicSortColMapping[i] = true; + } + } + return noDicSortColMapping; + } + + /** + * Get the data types of the no dictionary sort columns + * + * @param databaseName + * @param tableName + * @return + */ + public static DataType[] getNoDicSortDataTypes(String databaseName, String tableName) { + CarbonTable carbonTable = CarbonMetadata.getInstance().getCarbonTable(databaseName, tableName); + List<CarbonDimension> dimensions = carbonTable.getDimensionByTableName(tableName); + int noDicSortCount = 0; + for (int i = 0; i < dimensions.size(); i++) { + if (!dimensions.get(i).hasEncoding(Encoding.DICTIONARY) && dimensions.get(i).isSortColumn()) { + noDicSortCount++; + } + } + DataType[] type = new DataType[noDicSortCount]; + noDicSortCount = 0; + for (int i = 0; i < dimensions.size(); i++) { + if (!dimensions.get(i).hasEncoding(Encoding.DICTIONARY) && dimensions.get(i).isSortColumn()) { + type[noDicSortCount++] = dimensions.get(i).getDataType(); + } + } + return type; + } + + /** + * Get the data types of the no dictionary no sort columns + * + * @param databaseName + * @param tableName + * @return + */ + public static DataType[] getNoDicNoSortDataTypes(String databaseName, String tableName) { + CarbonTable carbonTable = CarbonMetadata.getInstance().getCarbonTable(databaseName, tableName); + List<CarbonDimension> dimensions = carbonTable.getDimensionByTableName(tableName); + int noDicNoSortCount = 0; + for (int i = 0; i < dimensions.size(); i++) { + if (!dimensions.get(i).hasEncoding(Encoding.DICTIONARY) && !dimensions.get(i) + .isSortColumn()) { + noDicNoSortCount++; + } + } + DataType[] type = new DataType[noDicNoSortCount]; + noDicNoSortCount = 0; + for (int i = 0; i < dimensions.size(); i++) { + if (!dimensions.get(i).hasEncoding(Encoding.DICTIONARY) && !dimensions.get(i) + .isSortColumn()) { + type[noDicNoSortCount++] = dimensions.get(i).getDataType(); + } + } + return type; --- End diff -- same comment as above --- |
In reply to this post by qiuchenjian-2
Github user kumarvishal09 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2654#discussion_r214085963 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/columnar/BlockIndexerStorageForNoDictionary.java --- @@ -0,0 +1,157 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.datastore.columnar; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.metadata.datatype.DataType; + +public class BlockIndexerStorageForNoDictionary { + + private short[] rowIdPage; + + private short[] rowIdRlePage; + + private DataType dataType; + + public BlockIndexerStorageForNoDictionary(Object[] dataPage, DataType dataType, + boolean isNoDictionary, boolean isSortRequired) { --- End diff -- isNoDictionary is not required as it this class will be called for No dictionary primitive columns --- |
In reply to this post by qiuchenjian-2
Github user kumarvishal09 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2654#discussion_r214086463 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/columnar/BlockIndexerStorageForNoDictionary.java --- @@ -0,0 +1,157 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.datastore.columnar; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.metadata.datatype.DataType; + +public class BlockIndexerStorageForNoDictionary { + + private short[] rowIdPage; + + private short[] rowIdRlePage; + + private DataType dataType; + + public BlockIndexerStorageForNoDictionary(Object[] dataPage, DataType dataType, + boolean isNoDictionary, boolean isSortRequired) { + this.dataType = dataType; + ColumnWithRowIdForNoDictionary<Short>[] dataWithRowId = + createColumnWithRowId(dataPage, isNoDictionary); + if (isSortRequired) { + Arrays.sort(dataWithRowId); + } + short[] rowIds = extractDataAndReturnRowId(dataWithRowId, dataPage); + rleEncodeOnRowId(rowIds); + } + + /** + * Create an object with each column array and respective rowId + * + * @return + */ + private ColumnWithRowIdForNoDictionary<Short>[] createColumnWithRowId(Object[] dataPage, + boolean isNoDictionary) { + ColumnWithRowIdForNoDictionary<Short>[] columnWithIndexs = + new ColumnWithRowIdForNoDictionary[dataPage.length]; + if (isNoDictionary) { + for (short i = 0; i < columnWithIndexs.length; i++) { + columnWithIndexs[i] = new ColumnWithRowIdForNoDictionary<>(dataPage[i], i, dataType); + } + } + return columnWithIndexs; + } + + private short[] extractDataAndReturnRowId(ColumnWithRowIdForNoDictionary<Short>[] dataWithRowId, + Object[] dataPage) { + short[] indexes = new short[dataWithRowId.length]; + for (int i = 0; i < indexes.length; i++) { + indexes[i] = dataWithRowId[i].getIndex(); + dataPage[i] = dataWithRowId[i].getColumn(); + } + return indexes; + } + + /** + * It compresses depends up on the sequence numbers. + * [1,2,3,4,6,8,10,11,12,13] is translated to [1,4,6,8,10,13] and [0,6]. In + * first array the start and end of sequential numbers and second array + * keeps the indexes of where sequential numbers starts. If there is no + * sequential numbers then the same array it returns with empty second + * array. + * + * @param rowIds + */ + private void rleEncodeOnRowId(short[] rowIds) { --- End diff -- This method is common for BlockIndexStorageForShot please move it to some util class --- |
In reply to this post by qiuchenjian-2
Github user kumarvishal09 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2654#discussion_r214087899 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java --- @@ -73,11 +113,16 @@ public int fillVector(int[] filteredRowId, ColumnVectorInfo[] vectorInfo, int ch if (null != localDictionary) { return localDictionary .getDictionaryValue(CarbonUtil.getSurrogateInternal(columnPage.getBytes(rowId), 0, 3)); - } else if (columnType == ColumnType.COMPLEX_PRIMITIVE && this.isAdaptiveComplexPrimitive()) { - if (columnPage.getNullBits().get(rowId)) { + } else if (columnType == ColumnType.COMPLEX_PRIMITIVE --- End diff -- In case of explict sorted data will be return based on invertedIndexReverse can u please check the same, Please refer UnsafeFixedLengthDimensionDataChunkStore.java getRowMethod --- |
In reply to this post by qiuchenjian-2
Github user kumarvishal09 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2654#discussion_r214088957 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/EncodingFactory.java --- @@ -81,25 +81,29 @@ public ColumnPageDecoder createDecoder(List<Encoding> encodings, List<ByteBuffer metadata.readFields(in); SimpleStatsResult stats = PrimitivePageStatsCollector.newInstance(metadata); return new AdaptiveIntegralCodec(metadata.getSchemaDataType(), metadata.getStoreDataType(), - stats).createDecoder(metadata); + stats, encodings.contains(Encoding.INVERTED_INDEX), --- End diff -- why two times we are calling encodings.contains(Encoding.INVERTED_INDEX) same value can't be use --- |
In reply to this post by qiuchenjian-2
Github user kumarvishal09 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2654#discussion_r214089736 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/adaptive/AdaptiveCodec.java --- @@ -38,17 +40,56 @@ // the data type specified in schema protected final DataType srcDataType; + protected boolean isSort; + protected boolean isInvertedIndex; + protected AdaptiveCodec(DataType srcDataType, DataType targetDataType, - SimpleStatsResult stats) { + SimpleStatsResult stats, boolean isSort, boolean isInvertedIndex) { this.stats = stats; this.srcDataType = srcDataType; this.targetDataType = targetDataType; + this.isSort = isSort; + this.isInvertedIndex = isInvertedIndex; } public DataType getTargetDataType() { return targetDataType; } + public Object[] getPageBasedOnDataType(ColumnPage input) { --- End diff -- what is the purpose of this method?? --- |
In reply to this post by qiuchenjian-2
Github user kumarvishal09 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2654#discussion_r214090412 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/adaptive/AdaptiveDeltaIntegralCodec.java --- @@ -88,7 +97,32 @@ public ColumnPageEncoder createEncoder(Map<String, String> parameter) { encodedPage = ColumnPage.newPage(input.getColumnSpec(), targetDataType, input.getPageSize()); input.convertValue(converter); - byte[] result = encodedPage.compress(compressor); + result = encodedPage.compress(compressor); + if (isInvertedIndex) { --- End diff -- can u please move this code to some utility and as below classes also has same logic --- |
In reply to this post by qiuchenjian-2
Github user kumarvishal09 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2654#discussion_r214090481 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/adaptive/AdaptiveDeltaIntegralCodec.java --- @@ -103,9 +137,33 @@ protected ColumnPageEncoderMeta getEncoderMeta(ColumnPage inputPage) { protected List<Encoding> getEncodingList() { List<Encoding> encodings = new ArrayList<>(); encodings.add(Encoding.ADAPTIVE_DELTA_INTEGRAL); + if (isInvertedIndex) { + encodings.add(Encoding.INVERTED_INDEX); + } return encodings; } + @Override + protected void fillLegacyFields(DataChunk2 dataChunk) throws IOException { --- End diff -- can u please move this code to some utility and as below classes also has same logic --- |
In reply to this post by qiuchenjian-2
Github user kumarvishal09 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2654#discussion_r214092679 --- Diff: processing/src/main/java/org/apache/carbondata/processing/loading/sort/SortStepRowHandler.java --- @@ -240,8 +258,44 @@ public IntermediateSortTempRow readWithNoSortFieldConvert( return new IntermediateSortTempRow(dictSortDims, noDictSortDims,measure); } + /** + * Read the data from the stream + * + * @param inputStream + * @param idx + * @return + * @throws IOException + */ + private Object readDataFromStream(DataInputStream inputStream, int idx) throws IOException { --- End diff -- is it refactor code ?? I think same code is also present for measures, can u please check and combine --- |
In reply to this post by qiuchenjian-2
Github user dhatchayani commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2654#discussion_r214313592 --- Diff: processing/src/main/java/org/apache/carbondata/processing/loading/sort/SortStepRowHandler.java --- @@ -240,8 +258,44 @@ public IntermediateSortTempRow readWithNoSortFieldConvert( return new IntermediateSortTempRow(dictSortDims, noDictSortDims,measure); } + /** + * Read the data from the stream + * + * @param inputStream + * @param idx + * @return + * @throws IOException + */ + private Object readDataFromStream(DataInputStream inputStream, int idx) throws IOException { --- End diff -- For measures, it will always be packed/unpacked to/from bytebuffer --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2654 Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/8203/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2654 Build Failed with Spark 2.2.1, Please check CI http://95.216.28.178:8080/job/ApacheCarbonPRBuilder1/132/ --- |
In reply to this post by qiuchenjian-2
Github user manishgupta88 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2654#discussion_r214338168 --- Diff: processing/src/main/java/org/apache/carbondata/processing/loading/sort/SortStepRowHandler.java --- @@ -375,6 +454,47 @@ public void writeRawRowAsIntermediateSortTempRowToOutputStream(Object[] row, outputStream.write(rowBuffer.array(), 0, packSize); } + /** + * Write the data to stream + * + * @param data + * @param outputStream + * @param idx + * @throws IOException + */ + private void writeDataToStream(Object data, DataOutputStream outputStream, int idx) + throws IOException { + DataType dataType = noDicSortDataTypes[idx]; + if (null == data) { + outputStream.writeBoolean(false); + return; --- End diff -- do not use return statement instead use the if else block wisely --- |
In reply to this post by qiuchenjian-2
Github user manishgupta88 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2654#discussion_r214337384 --- Diff: processing/src/main/java/org/apache/carbondata/processing/loading/sort/SortStepRowHandler.java --- @@ -359,9 +433,14 @@ public void writeRawRowAsIntermediateSortTempRowToOutputStream(Object[] row, // write no-dict & sort for (int idx = 0; idx < this.noDictSortDimCnt; idx++) { - byte[] bytes = (byte[]) row[this.noDictSortDimIdx[idx]]; - outputStream.writeShort(bytes.length); - outputStream.write(bytes); + if (DataTypeUtil.isPrimitiveColumn(noDicSortDataTypes[idx])) { --- End diff -- I can see that at multiple places for every row DataTypeUtil.isPrimitiveColumn is getting used. Please check the load performance impact of this --- |
Free forum by Nabble | Edit this page |