Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2819#discussion_r227322577 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/adaptive/AdaptiveIntegralCodec.java --- @@ -248,6 +266,136 @@ public double decodeDouble(float value) { public double decodeDouble(double value) { throw new RuntimeException("internal error: " + debugInfo()); } + + @Override + public void decodeAndFillVector(ColumnPage columnPage, ColumnVectorInfo vectorInfo) { + CarbonColumnVector vector = vectorInfo.vector; + BitSet nullBits = columnPage.getNullBits(); + DataType dataType = vector.getType(); + DataType type = columnPage.getDataType(); + int pageSize = columnPage.getPageSize(); + BitSet deletedRows = vectorInfo.deletedRows; + fillVector(columnPage, vector, dataType, type, pageSize, vectorInfo); + if (deletedRows == null || deletedRows.isEmpty()) { + for (int i = nullBits.nextSetBit(0); i >= 0; i = nullBits.nextSetBit(i + 1)) { + vector.putNull(i); + } + } + } + + private void fillVector(ColumnPage columnPage, CarbonColumnVector vector, DataType dataType, --- End diff -- ok --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2819#discussion_r227616722 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/MeasureRawColumnChunk.java --- @@ -105,6 +106,22 @@ public ColumnPage convertToColumnPageWithOutCache(int index) { } } + /** + * Convert raw data with specified page number processed to DimensionColumnDataChunk and fill the + * vector + * + * @param pageNumber page number to decode and fill the vector + * @param vectorInfo vector to be filled with column page + */ + public void convertToColumnPageAndFillVector(int pageNumber, ColumnVectorInfo vectorInfo) { + assert pageNumber < pagesCount; + try { + chunkReader.decodeColumnPageAndFillVector(this, pageNumber, vectorInfo); + } catch (IOException | MemoryException e) { + throw new RuntimeException(e); --- End diff -- Why not throw e directly? --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2819#discussion_r227616799 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/VariableLengthDimensionColumnPage.java --- @@ -54,10 +75,15 @@ public VariableLengthDimensionColumnPage(byte[] dataChunks, int[] invertedIndex, } dataChunkStore = DimensionChunkStoreFactory.INSTANCE .getDimensionChunkStore(0, isExplicitSorted, numberOfRows, totalSize, dimStoreType, - dictionary); - dataChunkStore.putArray(invertedIndex, invertedIndexReverse, dataChunks); + dictionary, vectorInfo != null); + if (vectorInfo != null) { + dataChunkStore.fillVector(invertedIndex, invertedIndexReverse, dataChunks, vectorInfo); + } else { + dataChunkStore.putArray(invertedIndex, invertedIndexReverse, dataChunks); + } } + --- End diff -- remove this --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2819#discussion_r227617004 --- Diff: core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java --- @@ -1845,6 +1845,18 @@ public static final int CARBON_MINMAX_ALLOWED_BYTE_COUNT_MIN = 10; public static final int CARBON_MINMAX_ALLOWED_BYTE_COUNT_MAX = 1000; + /** + * When enabled complete row filters will be handled by carbon in case of vector. + * If it is disabled then only page level pruning will be done by carbon and row level filtering + * will be done by spark for vector. + * There is no change in flow for non-vector based queries. --- End diff -- can you also add in which case it is suggested to set to false? since default is true --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2819#discussion_r227617094 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/AbstractNonDictionaryVectorFiller.java --- @@ -0,0 +1,278 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.datastore.chunk.store.impl.safe; + +import java.nio.ByteBuffer; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.metadata.datatype.DataType; +import org.apache.carbondata.core.metadata.datatype.DataTypes; +import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector; +import org.apache.carbondata.core.util.ByteUtil; +import org.apache.carbondata.core.util.DataTypeUtil; + +public abstract class AbstractNonDictionaryVectorFiller { --- End diff -- For public class, please add interface annotation --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2819#discussion_r227617413 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/AbstractNonDictionaryVectorFiller.java --- @@ -0,0 +1,278 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.datastore.chunk.store.impl.safe; + +import java.nio.ByteBuffer; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.metadata.datatype.DataType; +import org.apache.carbondata.core.metadata.datatype.DataTypes; +import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector; +import org.apache.carbondata.core.util.ByteUtil; +import org.apache.carbondata.core.util.DataTypeUtil; + +public abstract class AbstractNonDictionaryVectorFiller { + + protected int lengthSize; + protected int numberOfRows; + + public AbstractNonDictionaryVectorFiller(int lengthSize, int numberOfRows) { + this.lengthSize = lengthSize; + this.numberOfRows = numberOfRows; + } + + public abstract void fillVector(byte[] data, CarbonColumnVector vector, ByteBuffer buffer); + + public int getLengthFromBuffer(ByteBuffer buffer) { + return buffer.getShort(); + } +} + +class NonDictionaryVectorFillerFactory { + + public static AbstractNonDictionaryVectorFiller getVectorFiller(DataType type, int lengthSize, + int numberOfRows) { + if (type == DataTypes.STRING) { + return new StringVectorFiller(lengthSize, numberOfRows); + } else if (type == DataTypes.VARCHAR) { + return new LongStringVectorFiller(lengthSize, numberOfRows); + } else if (type == DataTypes.TIMESTAMP) { + return new TimeStampVectorFiller(lengthSize, numberOfRows); + } else if (type == DataTypes.BOOLEAN) { + return new BooleanVectorFiller(lengthSize, numberOfRows); + } else if (type == DataTypes.SHORT) { + return new ShortVectorFiller(lengthSize, numberOfRows); + } else if (type == DataTypes.INT) { + return new IntVectorFiller(lengthSize, numberOfRows); + } else if (type == DataTypes.LONG) { + return new LongVectorFiller(lengthSize, numberOfRows); + } else { + throw new UnsupportedOperationException("Not supported datatype : " + type); + } + + } + +} + +class StringVectorFiller extends AbstractNonDictionaryVectorFiller { + + public StringVectorFiller(int lengthSize, int numberOfRows) { + super(lengthSize, numberOfRows); + } + + @Override + public void fillVector(byte[] data, CarbonColumnVector vector, ByteBuffer buffer) { + // start position will be used to store the current data position + int startOffset = 0; + // as first position will be start from length of bytes as data is stored first in the memory + // block we need to skip first two bytes this is because first two bytes will be length of the + // data which we have to skip + int currentOffset = lengthSize; + ByteUtil.UnsafeComparer comparator = ByteUtil.UnsafeComparer.INSTANCE; + for (int i = 0; i < numberOfRows - 1; i++) { + buffer.position(startOffset); + startOffset += getLengthFromBuffer(buffer) + lengthSize; + int length = startOffset - (currentOffset); + if (comparator.equals(CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY, 0, + CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY.length, data, currentOffset, length)) { + vector.putNull(i); + } else { + vector.putByteArray(i, currentOffset, length, data); + } + currentOffset = startOffset + lengthSize; + } + // Handle last row + int length = (data.length - currentOffset); + if (comparator.equals(CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY, 0, + CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY.length, data, currentOffset, length)) { + vector.putNull(numberOfRows - 1); + } else { + vector.putByteArray(numberOfRows - 1, currentOffset, length, data); + } + } +} + +class LongStringVectorFiller extends StringVectorFiller { --- End diff -- There are multiple class in one file, are these non-public class only used in this file? --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2819#discussion_r227617725 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/SafeDecimalColumnPage.java --- @@ -193,6 +193,30 @@ public void convertValue(ColumnPageValueConverter codec) { } } + @Override public byte[] getBytePage() { --- End diff -- move Override to previous line --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2819#discussion_r227617936 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoderMeta.java --- @@ -49,6 +49,8 @@ // Make it protected for RLEEncoderMeta protected String compressorName; + private transient boolean fillCompleteVector; --- End diff -- add comment for this variable --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2819#discussion_r227618017 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/VarLengthColumnPageBase.java --- @@ -176,7 +179,7 @@ private static ColumnPage getDecimalColumnPage(TableSpec.ColumnSpec columnSpec, rowOffset.putInt(counter, offset); VarLengthColumnPageBase page; - if (unsafe) { + if (unsafe && !meta.isFillCompleteVector()) { --- End diff -- many place check like this, can we make a function for it and make it more readable by give proper function name? --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2819#discussion_r227618184 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/EncodingFactory.java --- @@ -66,6 +66,14 @@ public abstract ColumnPageEncoder createEncoder(TableSpec.ColumnSpec columnSpec, */ public ColumnPageDecoder createDecoder(List<Encoding> encodings, List<ByteBuffer> encoderMetas, String compressor) throws IOException { + return createDecoder(encodings, encoderMetas, compressor, false); + } + + /** + * Return new decoder based on encoder metadata read from file --- End diff -- In the comment, can you describe what is the behavior when `fullVectorFill` is true? --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2819#discussion_r227618222 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/EncodingFactory.java --- @@ -66,6 +66,14 @@ public abstract ColumnPageEncoder createEncoder(TableSpec.ColumnSpec columnSpec, */ public ColumnPageDecoder createDecoder(List<Encoding> encodings, List<ByteBuffer> encoderMetas, String compressor) throws IOException { + return createDecoder(encodings, encoderMetas, compressor, false); + } + + /** + * Return new decoder based on encoder metadata read from file --- End diff -- In the comment, can you describe what is the behavior when `fullVectorFill` is true? --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2819#discussion_r227618507 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageDecoder.java --- @@ -29,6 +31,12 @@ */ ColumnPage decode(byte[] input, int offset, int length) throws MemoryException, IOException; + /** + * Apply decoding algorithm on input byte array and fill the vector here. + */ + void decodeAndFillVector(byte[] input, int offset, int length, ColumnVectorInfo vectorInfo, + BitSet nullBits, boolean isLVEncoded) throws MemoryException, IOException; --- End diff -- I feel it is not good to add `isLVEncoded` just for LVEncoded, can we pass a more generic parameter, since this is a common class for all Decoder --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2819#discussion_r227618801 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/adaptive/AdaptiveIntegralCodec.java --- @@ -248,6 +269,143 @@ public double decodeDouble(float value) { public double decodeDouble(double value) { throw new RuntimeException("internal error: " + debugInfo()); } + + @Override + public void decodeAndFillVector(ColumnPage columnPage, ColumnVectorInfo vectorInfo) { + CarbonColumnVector vector = vectorInfo.vector; + BitSet nullBits = columnPage.getNullBits(); + DataType vectorDataType = vector.getType(); + DataType pageDataType = columnPage.getDataType(); + int pageSize = columnPage.getPageSize(); + BitSet deletedRows = vectorInfo.deletedRows; + fillVector(columnPage, vector, vectorDataType, pageDataType, pageSize, vectorInfo); + if (deletedRows == null || deletedRows.isEmpty()) { + for (int i = nullBits.nextSetBit(0); i >= 0; i = nullBits.nextSetBit(i + 1)) { + vector.putNull(i); + } + } + } + + private void fillVector(ColumnPage columnPage, CarbonColumnVector vector, + DataType vectorDataType, DataType pageDataType, int pageSize, ColumnVectorInfo vectorInfo) { + if (pageDataType == DataTypes.BOOLEAN || pageDataType == DataTypes.BYTE) { + byte[] byteData = columnPage.getBytePage(); + if (vectorDataType == DataTypes.SHORT) { + for (int i = 0; i < pageSize; i++) { + vector.putShort(i, (short) byteData[i]); + } + } else if (vectorDataType == DataTypes.INT) { + for (int i = 0; i < pageSize; i++) { + vector.putInt(i, (int) byteData[i]); + } + } else if (vectorDataType == DataTypes.LONG) { + for (int i = 0; i < pageSize; i++) { + vector.putLong(i, byteData[i]); + } + } else if (vectorDataType == DataTypes.TIMESTAMP) { + for (int i = 0; i < pageSize; i++) { + vector.putLong(i, byteData[i] * 1000); + } + } else if (vectorDataType == DataTypes.BOOLEAN) { + vector.putBytes(0, pageSize, byteData, 0); + --- End diff -- remove empty line --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2819#discussion_r227619046 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/collector/impl/DictionaryBasedVectorResultCollector.java --- @@ -198,4 +219,48 @@ void fillColumnVectorDetails(CarbonColumnarBatch columnarBatch, int rowCounter, } } + private void collectResultInColumnarBatchDirect(BlockletScannedResult scannedResult, --- End diff -- add comment for this function --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2819#discussion_r227619247 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java --- @@ -478,6 +478,17 @@ private BlockExecutionInfo getBlockExecutionInfoForBlock(QueryModel queryModel, } else { blockExecutionInfo.setPrefetchBlocklet(queryModel.isPreFetchData()); } + // In case of fg datamap it should not go to direct fill. + boolean fgDataMapPathPresent = false; + for (TableBlockInfo blockInfo : queryModel.getTableBlockInfos()) { + fgDataMapPathPresent = blockInfo.getDataMapWriterPath() != null; + if (fgDataMapPathPresent) { + break; --- End diff -- Is it possible to set the queryModel.setDirectVectorFill directly? --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2819#discussion_r227619567 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/model/QueryModel.java --- @@ -124,6 +124,11 @@ private boolean preFetchData = true; + /** + * It fills the vector directly from decoded column page with out any staging and conversions --- End diff -- "It fills the vector", can you give more detail for which vector? and describe how spark/presto is integrated with this? --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2819#discussion_r227619641 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/result/BlockletScannedResult.java --- @@ -72,6 +72,11 @@ */ private int[] pageFilteredRowCount; + /** + * Filtered pages to be decoded and loaded to vector. + */ + private int[] pagesFiltered; --- End diff -- ```suggestion private int[] pagesIdFiltered; ``` --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2819#discussion_r228038986 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/MeasureRawColumnChunk.java --- @@ -105,6 +106,22 @@ public ColumnPage convertToColumnPageWithOutCache(int index) { } } + /** + * Convert raw data with specified page number processed to DimensionColumnDataChunk and fill the + * vector + * + * @param pageNumber page number to decode and fill the vector + * @param vectorInfo vector to be filled with column page + */ + public void convertToColumnPageAndFillVector(int pageNumber, ColumnVectorInfo vectorInfo) { + assert pageNumber < pagesCount; + try { + chunkReader.decodeColumnPageAndFillVector(this, pageNumber, vectorInfo); + } catch (IOException | MemoryException e) { + throw new RuntimeException(e); --- End diff -- Because those are checked exceptions, need to handle and throw the same exceptions till callers --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2819#discussion_r228039285 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/VariableLengthDimensionColumnPage.java --- @@ -54,10 +75,15 @@ public VariableLengthDimensionColumnPage(byte[] dataChunks, int[] invertedIndex, } dataChunkStore = DimensionChunkStoreFactory.INSTANCE .getDimensionChunkStore(0, isExplicitSorted, numberOfRows, totalSize, dimStoreType, - dictionary); - dataChunkStore.putArray(invertedIndex, invertedIndexReverse, dataChunks); + dictionary, vectorInfo != null); + if (vectorInfo != null) { + dataChunkStore.fillVector(invertedIndex, invertedIndexReverse, dataChunks, vectorInfo); + } else { + dataChunkStore.putArray(invertedIndex, invertedIndexReverse, dataChunks); + } } + --- End diff -- ok --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2819#discussion_r228039614 --- Diff: core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java --- @@ -1845,6 +1845,18 @@ public static final int CARBON_MINMAX_ALLOWED_BYTE_COUNT_MIN = 10; public static final int CARBON_MINMAX_ALLOWED_BYTE_COUNT_MAX = 1000; + /** + * When enabled complete row filters will be handled by carbon in case of vector. + * If it is disabled then only page level pruning will be done by carbon and row level filtering + * will be done by spark for vector. + * There is no change in flow for non-vector based queries. --- End diff -- will make it as false by default in other pending PR. Since this PR is focused only on full scan many tests fail that's why it defaults it to true. --- |
Free forum by Nabble | Edit this page |