Apache CarbonData Dev Mailing List archive › Apache CarbonData JIRA issues

[GitHub] carbondata pull request #2819: [CARBONDATA-3012] Added support for full scan...

Classic

List

126 messages Options

Options

1 ... 34567

[GitHub] carbondata pull request #2819: [CARBONDATA-3012] Added support for full scan...

Github user ravipesala commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2819#discussion_r228039991

--- Diff: core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/AbstractNonDictionaryVectorFiller.java ---
@@ -0,0 +1,278 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.core.datastore.chunk.store.impl.safe;
+
+import java.nio.ByteBuffer;
+
+import org.apache.carbondata.core.constants.CarbonCommonConstants;
+import org.apache.carbondata.core.metadata.datatype.DataType;
+import org.apache.carbondata.core.metadata.datatype.DataTypes;
+import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector;
+import org.apache.carbondata.core.util.ByteUtil;
+import org.apache.carbondata.core.util.DataTypeUtil;
+
+public abstract class AbstractNonDictionaryVectorFiller {
--- End diff --

ok

---

[GitHub] carbondata pull request #2819: [CARBONDATA-3012] Added support for full scan...

In reply to this post by qiuchenjian-2

Github user ravipesala commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2819#discussion_r228040088

--- Diff: core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/AbstractNonDictionaryVectorFiller.java ---
@@ -0,0 +1,278 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.core.datastore.chunk.store.impl.safe;
+
+import java.nio.ByteBuffer;
+
+import org.apache.carbondata.core.constants.CarbonCommonConstants;
+import org.apache.carbondata.core.metadata.datatype.DataType;
+import org.apache.carbondata.core.metadata.datatype.DataTypes;
+import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector;
+import org.apache.carbondata.core.util.ByteUtil;
+import org.apache.carbondata.core.util.DataTypeUtil;
+
+public abstract class AbstractNonDictionaryVectorFiller {
+
+ protected int lengthSize;
+ protected int numberOfRows;
+
+ public AbstractNonDictionaryVectorFiller(int lengthSize, int numberOfRows) {
+ this.lengthSize = lengthSize;
+ this.numberOfRows = numberOfRows;
+ }
+
+ public abstract void fillVector(byte[] data, CarbonColumnVector vector, ByteBuffer buffer);
+
+ public int getLengthFromBuffer(ByteBuffer buffer) {
+ return buffer.getShort();
+ }
+}
+
+class NonDictionaryVectorFillerFactory {
+
+ public static AbstractNonDictionaryVectorFiller getVectorFiller(DataType type, int lengthSize,
+ int numberOfRows) {
+ if (type == DataTypes.STRING) {
+ return new StringVectorFiller(lengthSize, numberOfRows);
+ } else if (type == DataTypes.VARCHAR) {
+ return new LongStringVectorFiller(lengthSize, numberOfRows);
+ } else if (type == DataTypes.TIMESTAMP) {
+ return new TimeStampVectorFiller(lengthSize, numberOfRows);
+ } else if (type == DataTypes.BOOLEAN) {
+ return new BooleanVectorFiller(lengthSize, numberOfRows);
+ } else if (type == DataTypes.SHORT) {
+ return new ShortVectorFiller(lengthSize, numberOfRows);
+ } else if (type == DataTypes.INT) {
+ return new IntVectorFiller(lengthSize, numberOfRows);
+ } else if (type == DataTypes.LONG) {
+ return new LongVectorFiller(lengthSize, numberOfRows);
+ } else {
+ throw new UnsupportedOperationException("Not supported datatype : " + type);
+ }
+
+ }
+
+}
+
+class StringVectorFiller extends AbstractNonDictionaryVectorFiller {
+
+ public StringVectorFiller(int lengthSize, int numberOfRows) {
+ super(lengthSize, numberOfRows);
+ }
+
+ @Override
+ public void fillVector(byte[] data, CarbonColumnVector vector, ByteBuffer buffer) {
+ // start position will be used to store the current data position
+ int startOffset = 0;
+ // as first position will be start from length of bytes as data is stored first in the memory
+ // block we need to skip first two bytes this is because first two bytes will be length of the
+ // data which we have to skip
+ int currentOffset = lengthSize;
+ ByteUtil.UnsafeComparer comparator = ByteUtil.UnsafeComparer.INSTANCE;
+ for (int i = 0; i < numberOfRows - 1; i++) {
+ buffer.position(startOffset);
+ startOffset += getLengthFromBuffer(buffer) + lengthSize;
+ int length = startOffset - (currentOffset);
+ if (comparator.equals(CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY, 0,
+ CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY.length, data, currentOffset, length)) {
+ vector.putNull(i);
+ } else {
+ vector.putByteArray(i, currentOffset, length, data);
+ }
+ currentOffset = startOffset + lengthSize;
+ }
+ // Handle last row
+ int length = (data.length - currentOffset);
+ if (comparator.equals(CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY, 0,
+ CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY.length, data, currentOffset, length)) {
+ vector.putNull(numberOfRows - 1);
+ } else {
+ vector.putByteArray(numberOfRows - 1, currentOffset, length, data);
+ }
+ }
+}
+
+class LongStringVectorFiller extends StringVectorFiller {
--- End diff --

Yes, all these are non public classes, only used in this class

---

[GitHub] carbondata pull request #2819: [CARBONDATA-3012] Added support for full scan...

In reply to this post by qiuchenjian-2

Github user ravipesala commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2819#discussion_r228040196

--- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/SafeDecimalColumnPage.java ---
@@ -193,6 +193,30 @@ public void convertValue(ColumnPageValueConverter codec) {
}
}

+ @Override public byte[] getBytePage() {
--- End diff --

ok

---

[GitHub] carbondata pull request #2819: [CARBONDATA-3012] Added support for full scan...

In reply to this post by qiuchenjian-2

Github user ravipesala commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2819#discussion_r228040371

--- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoderMeta.java ---
@@ -49,6 +49,8 @@
// Make it protected for RLEEncoderMeta
protected String compressorName;

+ private transient boolean fillCompleteVector;
--- End diff --

ok

---

[GitHub] carbondata pull request #2819: [CARBONDATA-3012] Added support for full scan...

In reply to this post by qiuchenjian-2

Github user ravipesala commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2819#discussion_r228041838

--- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/VarLengthColumnPageBase.java ---
@@ -176,7 +179,7 @@ private static ColumnPage getDecimalColumnPage(TableSpec.ColumnSpec columnSpec,
rowOffset.putInt(counter, offset);

VarLengthColumnPageBase page;
- if (unsafe) {
+ if (unsafe && !meta.isFillCompleteVector()) {
--- End diff --

ok

---

[GitHub] carbondata pull request #2819: [CARBONDATA-3012] Added support for full scan...

In reply to this post by qiuchenjian-2

Github user ravipesala commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2819#discussion_r228041948

--- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/VarLengthColumnPageBase.java ---
@@ -176,7 +179,7 @@ private static ColumnPage getDecimalColumnPage(TableSpec.ColumnSpec columnSpec,
rowOffset.putInt(counter, offset);

VarLengthColumnPageBase page;
- if (unsafe) {
+ if (unsafe && !meta.isFillCompleteVector()) {
--- End diff --

ok

---

[GitHub] carbondata pull request #2819: [CARBONDATA-3012] Added support for full scan...

In reply to this post by qiuchenjian-2

Github user ravipesala commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2819#discussion_r228042373

--- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/EncodingFactory.java ---
@@ -66,6 +66,14 @@ public abstract ColumnPageEncoder createEncoder(TableSpec.ColumnSpec columnSpec,
*/
public ColumnPageDecoder createDecoder(List<Encoding> encodings, List<ByteBuffer> encoderMetas,
String compressor) throws IOException {
+ return createDecoder(encodings, encoderMetas, compressor, false);
+ }
+
+ /**
+ * Return new decoder based on encoder metadata read from file
--- End diff --

added comment

---

[GitHub] carbondata pull request #2819: [CARBONDATA-3012] Added support for full scan...

In reply to this post by qiuchenjian-2

Github user ravipesala commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2819#discussion_r228042421

--- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/EncodingFactory.java ---
@@ -66,6 +66,14 @@ public abstract ColumnPageEncoder createEncoder(TableSpec.ColumnSpec columnSpec,
*/
public ColumnPageDecoder createDecoder(List<Encoding> encodings, List<ByteBuffer> encoderMetas,
String compressor) throws IOException {
+ return createDecoder(encodings, encoderMetas, compressor, false);
+ }
+
+ /**
+ * Return new decoder based on encoder metadata read from file
--- End diff --

added comment

---

[GitHub] carbondata pull request #2819: [CARBONDATA-3012] Added support for full scan...

In reply to this post by qiuchenjian-2

Github user ravipesala commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2819#discussion_r228047744

--- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageDecoder.java ---
@@ -29,6 +31,12 @@
*/
ColumnPage decode(byte[] input, int offset, int length) throws MemoryException, IOException;

+ /**
+ * Apply decoding algorithm on input byte array and fill the vector here.
+ */
+ void decodeAndFillVector(byte[] input, int offset, int length, ColumnVectorInfo vectorInfo,
+ BitSet nullBits, boolean isLVEncoded) throws MemoryException, IOException;
--- End diff --

Yes, it is as per the old method `decode` added this method, It was added as part of the local dictionary and it is getting refactored as part of vishal's store method.
@kumarvishal09 please remove this `isLVEncoded` from decode method. And it should align one of the datatypes used in the column page in case of local dictionary.

---

[GitHub] carbondata pull request #2819: [CARBONDATA-3012] Added support for full scan...

In reply to this post by qiuchenjian-2

Github user ravipesala commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2819#discussion_r228047825

--- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/adaptive/AdaptiveIntegralCodec.java ---
@@ -248,6 +269,143 @@ public double decodeDouble(float value) {
public double decodeDouble(double value) {
throw new RuntimeException("internal error: " + debugInfo());
}
+
+ @Override
+ public void decodeAndFillVector(ColumnPage columnPage, ColumnVectorInfo vectorInfo) {
+ CarbonColumnVector vector = vectorInfo.vector;
+ BitSet nullBits = columnPage.getNullBits();
+ DataType vectorDataType = vector.getType();
+ DataType pageDataType = columnPage.getDataType();
+ int pageSize = columnPage.getPageSize();
+ BitSet deletedRows = vectorInfo.deletedRows;
+ fillVector(columnPage, vector, vectorDataType, pageDataType, pageSize, vectorInfo);
+ if (deletedRows == null || deletedRows.isEmpty()) {
+ for (int i = nullBits.nextSetBit(0); i >= 0; i = nullBits.nextSetBit(i + 1)) {
+ vector.putNull(i);
+ }
+ }
+ }
+
+ private void fillVector(ColumnPage columnPage, CarbonColumnVector vector,
+ DataType vectorDataType, DataType pageDataType, int pageSize, ColumnVectorInfo vectorInfo) {
+ if (pageDataType == DataTypes.BOOLEAN || pageDataType == DataTypes.BYTE) {
+ byte[] byteData = columnPage.getBytePage();
+ if (vectorDataType == DataTypes.SHORT) {
+ for (int i = 0; i < pageSize; i++) {
+ vector.putShort(i, (short) byteData[i]);
+ }
+ } else if (vectorDataType == DataTypes.INT) {
+ for (int i = 0; i < pageSize; i++) {
+ vector.putInt(i, (int) byteData[i]);
+ }
+ } else if (vectorDataType == DataTypes.LONG) {
+ for (int i = 0; i < pageSize; i++) {
+ vector.putLong(i, byteData[i]);
+ }
+ } else if (vectorDataType == DataTypes.TIMESTAMP) {
+ for (int i = 0; i < pageSize; i++) {
+ vector.putLong(i, byteData[i] * 1000);
+ }
+ } else if (vectorDataType == DataTypes.BOOLEAN) {
+ vector.putBytes(0, pageSize, byteData, 0);
+
--- End diff --

ok

---

[GitHub] carbondata pull request #2819: [CARBONDATA-3012] Added support for full scan...

In reply to this post by qiuchenjian-2

Github user ravipesala commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2819#discussion_r228048117

--- Diff: core/src/main/java/org/apache/carbondata/core/scan/collector/impl/DictionaryBasedVectorResultCollector.java ---
@@ -198,4 +219,48 @@ void fillColumnVectorDetails(CarbonColumnarBatch columnarBatch, int rowCounter,
}
}

+ private void collectResultInColumnarBatchDirect(BlockletScannedResult scannedResult,
--- End diff --

ok

---

[GitHub] carbondata pull request #2819: [CARBONDATA-3012] Added support for full scan...

In reply to this post by qiuchenjian-2

Github user ravipesala commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2819#discussion_r228049239

--- Diff: core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java ---
@@ -478,6 +478,17 @@ private BlockExecutionInfo getBlockExecutionInfoForBlock(QueryModel queryModel,
} else {
blockExecutionInfo.setPrefetchBlocklet(queryModel.isPreFetchData());
}
+ // In case of fg datamap it should not go to direct fill.
+ boolean fgDataMapPathPresent = false;
+ for (TableBlockInfo blockInfo : queryModel.getTableBlockInfos()) {
+ fgDataMapPathPresent = blockInfo.getDataMapWriterPath() != null;
+ if (fgDataMapPathPresent) {
+ break;
--- End diff --

ok

---

[GitHub] carbondata pull request #2819: [CARBONDATA-3012] Added support for full scan...

In reply to this post by qiuchenjian-2

Github user ravipesala commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2819#discussion_r228050191

--- Diff: core/src/main/java/org/apache/carbondata/core/scan/model/QueryModel.java ---
@@ -124,6 +124,11 @@

private boolean preFetchData = true;

+ /**
+ * It fills the vector directly from decoded column page with out any staging and conversions
--- End diff --

ok, added comment

---

[GitHub] carbondata pull request #2819: [CARBONDATA-3012] Added support for full scan...

In reply to this post by qiuchenjian-2

Github user ravipesala commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2819#discussion_r228050305

--- Diff: core/src/main/java/org/apache/carbondata/core/scan/result/BlockletScannedResult.java ---
@@ -72,6 +72,11 @@
*/
private int[] pageFilteredRowCount;

+ /**
+ * Filtered pages to be decoded and loaded to vector.
+ */
+ private int[] pagesFiltered;
--- End diff --

ok

---

[GitHub] carbondata issue #2819: [CARBONDATA-3012] Added support for full scan querie...

In reply to this post by qiuchenjian-2

Github user CarbonDataQA commented on the issue:

https://github.com/apache/carbondata/pull/2819

Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder2.1/1006/

---

[GitHub] carbondata issue #2819: [CARBONDATA-3012] Added support for full scan querie...

In reply to this post by qiuchenjian-2

Github user CarbonDataQA commented on the issue:

https://github.com/apache/carbondata/pull/2819

Build Success with Spark 2.2.1, Please check CI http://95.216.28.178:8080/job/ApacheCarbonPRBuilder1/1220/

---

[GitHub] carbondata issue #2819: [CARBONDATA-3012] Added support for full scan querie...

In reply to this post by qiuchenjian-2

Github user CarbonDataQA commented on the issue:

https://github.com/apache/carbondata/pull/2819

Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder2.1/1007/

---

[GitHub] carbondata issue #2819: [CARBONDATA-3012] Added support for full scan querie...

In reply to this post by qiuchenjian-2

Github user CarbonDataQA commented on the issue:

https://github.com/apache/carbondata/pull/2819

Build Success with Spark 2.3.1, Please check CI http://136.243.101.176:8080/job/carbondataprbuilder2.3/9272/

---

[GitHub] carbondata issue #2819: [CARBONDATA-3012] Added support for full scan querie...

In reply to this post by qiuchenjian-2

Github user CarbonDataQA commented on the issue:

https://github.com/apache/carbondata/pull/2819

Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder2.1/1021/

---

[GitHub] carbondata issue #2819: [CARBONDATA-3012] Added support for full scan querie...

In reply to this post by qiuchenjian-2

Github user CarbonDataQA commented on the issue:

https://github.com/apache/carbondata/pull/2819

Build Failed with Spark 2.3.1, Please check CI http://136.243.101.176:8080/job/carbondataprbuilder2.3/9286/

---

1 ... 34567