Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2819#discussion_r228039991 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/AbstractNonDictionaryVectorFiller.java --- @@ -0,0 +1,278 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.datastore.chunk.store.impl.safe; + +import java.nio.ByteBuffer; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.metadata.datatype.DataType; +import org.apache.carbondata.core.metadata.datatype.DataTypes; +import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector; +import org.apache.carbondata.core.util.ByteUtil; +import org.apache.carbondata.core.util.DataTypeUtil; + +public abstract class AbstractNonDictionaryVectorFiller { --- End diff -- ok --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2819#discussion_r228040088 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/AbstractNonDictionaryVectorFiller.java --- @@ -0,0 +1,278 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.datastore.chunk.store.impl.safe; + +import java.nio.ByteBuffer; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.metadata.datatype.DataType; +import org.apache.carbondata.core.metadata.datatype.DataTypes; +import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector; +import org.apache.carbondata.core.util.ByteUtil; +import org.apache.carbondata.core.util.DataTypeUtil; + +public abstract class AbstractNonDictionaryVectorFiller { + + protected int lengthSize; + protected int numberOfRows; + + public AbstractNonDictionaryVectorFiller(int lengthSize, int numberOfRows) { + this.lengthSize = lengthSize; + this.numberOfRows = numberOfRows; + } + + public abstract void fillVector(byte[] data, CarbonColumnVector vector, ByteBuffer buffer); + + public int getLengthFromBuffer(ByteBuffer buffer) { + return buffer.getShort(); + } +} + +class NonDictionaryVectorFillerFactory { + + public static AbstractNonDictionaryVectorFiller getVectorFiller(DataType type, int lengthSize, + int numberOfRows) { + if (type == DataTypes.STRING) { + return new StringVectorFiller(lengthSize, numberOfRows); + } else if (type == DataTypes.VARCHAR) { + return new LongStringVectorFiller(lengthSize, numberOfRows); + } else if (type == DataTypes.TIMESTAMP) { + return new TimeStampVectorFiller(lengthSize, numberOfRows); + } else if (type == DataTypes.BOOLEAN) { + return new BooleanVectorFiller(lengthSize, numberOfRows); + } else if (type == DataTypes.SHORT) { + return new ShortVectorFiller(lengthSize, numberOfRows); + } else if (type == DataTypes.INT) { + return new IntVectorFiller(lengthSize, numberOfRows); + } else if (type == DataTypes.LONG) { + return new LongVectorFiller(lengthSize, numberOfRows); + } else { + throw new UnsupportedOperationException("Not supported datatype : " + type); + } + + } + +} + +class StringVectorFiller extends AbstractNonDictionaryVectorFiller { + + public StringVectorFiller(int lengthSize, int numberOfRows) { + super(lengthSize, numberOfRows); + } + + @Override + public void fillVector(byte[] data, CarbonColumnVector vector, ByteBuffer buffer) { + // start position will be used to store the current data position + int startOffset = 0; + // as first position will be start from length of bytes as data is stored first in the memory + // block we need to skip first two bytes this is because first two bytes will be length of the + // data which we have to skip + int currentOffset = lengthSize; + ByteUtil.UnsafeComparer comparator = ByteUtil.UnsafeComparer.INSTANCE; + for (int i = 0; i < numberOfRows - 1; i++) { + buffer.position(startOffset); + startOffset += getLengthFromBuffer(buffer) + lengthSize; + int length = startOffset - (currentOffset); + if (comparator.equals(CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY, 0, + CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY.length, data, currentOffset, length)) { + vector.putNull(i); + } else { + vector.putByteArray(i, currentOffset, length, data); + } + currentOffset = startOffset + lengthSize; + } + // Handle last row + int length = (data.length - currentOffset); + if (comparator.equals(CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY, 0, + CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY.length, data, currentOffset, length)) { + vector.putNull(numberOfRows - 1); + } else { + vector.putByteArray(numberOfRows - 1, currentOffset, length, data); + } + } +} + +class LongStringVectorFiller extends StringVectorFiller { --- End diff -- Yes, all these are non public classes, only used in this class --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2819#discussion_r228040196 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/SafeDecimalColumnPage.java --- @@ -193,6 +193,30 @@ public void convertValue(ColumnPageValueConverter codec) { } } + @Override public byte[] getBytePage() { --- End diff -- ok --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2819#discussion_r228040371 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoderMeta.java --- @@ -49,6 +49,8 @@ // Make it protected for RLEEncoderMeta protected String compressorName; + private transient boolean fillCompleteVector; --- End diff -- ok --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2819#discussion_r228041838 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/VarLengthColumnPageBase.java --- @@ -176,7 +179,7 @@ private static ColumnPage getDecimalColumnPage(TableSpec.ColumnSpec columnSpec, rowOffset.putInt(counter, offset); VarLengthColumnPageBase page; - if (unsafe) { + if (unsafe && !meta.isFillCompleteVector()) { --- End diff -- ok --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2819#discussion_r228041948 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/VarLengthColumnPageBase.java --- @@ -176,7 +179,7 @@ private static ColumnPage getDecimalColumnPage(TableSpec.ColumnSpec columnSpec, rowOffset.putInt(counter, offset); VarLengthColumnPageBase page; - if (unsafe) { + if (unsafe && !meta.isFillCompleteVector()) { --- End diff -- ok --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2819#discussion_r228042373 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/EncodingFactory.java --- @@ -66,6 +66,14 @@ public abstract ColumnPageEncoder createEncoder(TableSpec.ColumnSpec columnSpec, */ public ColumnPageDecoder createDecoder(List<Encoding> encodings, List<ByteBuffer> encoderMetas, String compressor) throws IOException { + return createDecoder(encodings, encoderMetas, compressor, false); + } + + /** + * Return new decoder based on encoder metadata read from file --- End diff -- added comment --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2819#discussion_r228042421 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/EncodingFactory.java --- @@ -66,6 +66,14 @@ public abstract ColumnPageEncoder createEncoder(TableSpec.ColumnSpec columnSpec, */ public ColumnPageDecoder createDecoder(List<Encoding> encodings, List<ByteBuffer> encoderMetas, String compressor) throws IOException { + return createDecoder(encodings, encoderMetas, compressor, false); + } + + /** + * Return new decoder based on encoder metadata read from file --- End diff -- added comment --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2819#discussion_r228047744 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageDecoder.java --- @@ -29,6 +31,12 @@ */ ColumnPage decode(byte[] input, int offset, int length) throws MemoryException, IOException; + /** + * Apply decoding algorithm on input byte array and fill the vector here. + */ + void decodeAndFillVector(byte[] input, int offset, int length, ColumnVectorInfo vectorInfo, + BitSet nullBits, boolean isLVEncoded) throws MemoryException, IOException; --- End diff -- Yes, it is as per the old method `decode` added this method, It was added as part of the local dictionary and it is getting refactored as part of vishal's store method. @kumarvishal09 please remove this `isLVEncoded` from decode method. And it should align one of the datatypes used in the column page in case of local dictionary. --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2819#discussion_r228047825 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/adaptive/AdaptiveIntegralCodec.java --- @@ -248,6 +269,143 @@ public double decodeDouble(float value) { public double decodeDouble(double value) { throw new RuntimeException("internal error: " + debugInfo()); } + + @Override + public void decodeAndFillVector(ColumnPage columnPage, ColumnVectorInfo vectorInfo) { + CarbonColumnVector vector = vectorInfo.vector; + BitSet nullBits = columnPage.getNullBits(); + DataType vectorDataType = vector.getType(); + DataType pageDataType = columnPage.getDataType(); + int pageSize = columnPage.getPageSize(); + BitSet deletedRows = vectorInfo.deletedRows; + fillVector(columnPage, vector, vectorDataType, pageDataType, pageSize, vectorInfo); + if (deletedRows == null || deletedRows.isEmpty()) { + for (int i = nullBits.nextSetBit(0); i >= 0; i = nullBits.nextSetBit(i + 1)) { + vector.putNull(i); + } + } + } + + private void fillVector(ColumnPage columnPage, CarbonColumnVector vector, + DataType vectorDataType, DataType pageDataType, int pageSize, ColumnVectorInfo vectorInfo) { + if (pageDataType == DataTypes.BOOLEAN || pageDataType == DataTypes.BYTE) { + byte[] byteData = columnPage.getBytePage(); + if (vectorDataType == DataTypes.SHORT) { + for (int i = 0; i < pageSize; i++) { + vector.putShort(i, (short) byteData[i]); + } + } else if (vectorDataType == DataTypes.INT) { + for (int i = 0; i < pageSize; i++) { + vector.putInt(i, (int) byteData[i]); + } + } else if (vectorDataType == DataTypes.LONG) { + for (int i = 0; i < pageSize; i++) { + vector.putLong(i, byteData[i]); + } + } else if (vectorDataType == DataTypes.TIMESTAMP) { + for (int i = 0; i < pageSize; i++) { + vector.putLong(i, byteData[i] * 1000); + } + } else if (vectorDataType == DataTypes.BOOLEAN) { + vector.putBytes(0, pageSize, byteData, 0); + --- End diff -- ok --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2819#discussion_r228048117 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/collector/impl/DictionaryBasedVectorResultCollector.java --- @@ -198,4 +219,48 @@ void fillColumnVectorDetails(CarbonColumnarBatch columnarBatch, int rowCounter, } } + private void collectResultInColumnarBatchDirect(BlockletScannedResult scannedResult, --- End diff -- ok --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2819#discussion_r228049239 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java --- @@ -478,6 +478,17 @@ private BlockExecutionInfo getBlockExecutionInfoForBlock(QueryModel queryModel, } else { blockExecutionInfo.setPrefetchBlocklet(queryModel.isPreFetchData()); } + // In case of fg datamap it should not go to direct fill. + boolean fgDataMapPathPresent = false; + for (TableBlockInfo blockInfo : queryModel.getTableBlockInfos()) { + fgDataMapPathPresent = blockInfo.getDataMapWriterPath() != null; + if (fgDataMapPathPresent) { + break; --- End diff -- ok --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2819#discussion_r228050191 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/model/QueryModel.java --- @@ -124,6 +124,11 @@ private boolean preFetchData = true; + /** + * It fills the vector directly from decoded column page with out any staging and conversions --- End diff -- ok, added comment --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2819#discussion_r228050305 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/result/BlockletScannedResult.java --- @@ -72,6 +72,11 @@ */ private int[] pageFilteredRowCount; + /** + * Filtered pages to be decoded and loaded to vector. + */ + private int[] pagesFiltered; --- End diff -- ok --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2819 Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder2.1/1006/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2819 Build Success with Spark 2.2.1, Please check CI http://95.216.28.178:8080/job/ApacheCarbonPRBuilder1/1220/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2819 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder2.1/1007/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2819 Build Success with Spark 2.3.1, Please check CI http://136.243.101.176:8080/job/carbondataprbuilder2.3/9272/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2819 Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder2.1/1021/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2819 Build Failed with Spark 2.3.1, Please check CI http://136.243.101.176:8080/job/carbondataprbuilder2.3/9286/ --- |
Free forum by Nabble | Edit this page |