Github user ravipesala commented on the issue:
https://github.com/apache/carbondata/pull/1265 SDV Build Fail , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/543/ --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on the issue:
https://github.com/apache/carbondata/pull/1265 retest this please --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on the issue:
https://github.com/apache/carbondata/pull/1265 SDV Build Fail , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/544/ --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1265#discussion_r137449599 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java --- @@ -366,7 +364,7 @@ private BitSet setFilterdIndexToBitSet(DimensionColumnDataChunk dimensionColumnD BitSet bitSet = new BitSet(numerOfRows); byte[][] filterValues = this.filterRangeValues; // binary search can only be applied if column is sorted - if (isNaturalSorted) { + if (isNaturalSorted && !dimensionColumnDataChunk.isNoDicitionaryColumn()) { --- End diff -- why this check is required? even no dictionary columns also can be sorted right? --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1265#discussion_r137449915 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java --- @@ -262,10 +259,7 @@ private BitSet getFilteredIndexes(DimensionColumnDataChunk dimensionColumnDataCh int numerOfRows) { byte[] defaultValue = null; if (dimColEvaluatorInfoList.get(0).getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)) { - DirectDictionaryGenerator directDictionaryGenerator = DirectDictionaryKeyGeneratorFactory - .getDirectDictionaryGenerator( - dimColEvaluatorInfoList.get(0).getDimension().getDataType()); - int key = directDictionaryGenerator.generateDirectSurrogateKey(null) + 1; + int key = 0; --- End diff -- I think it is better to get null key from respective interfaces instead of hard code. if the null value key changes in future also no effect here --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1265#discussion_r137449951 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java --- @@ -367,7 +360,7 @@ private BitSet setFilterdIndexToBitSet(DimensionColumnDataChunk dimensionColumnD BitSet bitSet = new BitSet(numerOfRows); byte[][] filterValues = this.filterRangeValues; // binary search can only be applied if column is sorted - if (isNaturalSorted) { + if (isNaturalSorted && !dimensionColumnDataChunk.isNoDicitionaryColumn()) { --- End diff -- why this check required? --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1265#discussion_r137450241 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/partition/EqualToFilterImpl.java --- @@ -50,7 +50,7 @@ public EqualToFilterImpl(EqualToExpression equalTo, PartitionInfo partitionInfo) literal.getLiteralExpValue().toString(), partitionInfo.getColumnSchemaList().get(0).getDataType()); if (PartitionType.RANGE == partitionInfo.getPartitionType() && value instanceof String) { - value = ByteUtil.toBytes((String)value); + value = ByteUtil.toBytesForPlainValue((String)value); --- End diff -- It seems old overloaded method looks good than this name. It is utility method so better have a overloaded method --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1265#discussion_r137490368 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java --- @@ -17,46 +17,183 @@ package org.apache.carbondata.core.datastore.chunk.store; +import java.nio.ByteBuffer; +import java.util.BitSet; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datastore.ColumnType; +import org.apache.carbondata.core.datastore.TableSpec; import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk; import org.apache.carbondata.core.datastore.page.ColumnPage; +import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator; +import org.apache.carbondata.core.metadata.datatype.DataType; import org.apache.carbondata.core.scan.executor.infos.KeyStructureInfo; +import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector; import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo; +import org.apache.carbondata.core.util.ByteUtil; +/** + * ColumnPage wrapper for dimension column reader + */ public class ColumnPageWrapper implements DimensionColumnDataChunk { + private TableSpec.ColumnSpec columnSpec; private ColumnPage columnPage; + private int columnValueSize; - public ColumnPageWrapper(ColumnPage columnPage) { + public ColumnPageWrapper(ColumnPage columnPage, int columnValueSize) { this.columnPage = columnPage; + this.columnValueSize = columnValueSize; + this.columnSpec = columnPage.getColumnSpec(); } @Override public int fillChunkData(byte[] data, int offset, int columnIndex, KeyStructureInfo restructuringInfo) { - throw new UnsupportedOperationException("internal error"); + int surrogate = columnPage.getInt(columnIndex); + ByteBuffer buffer = ByteBuffer.wrap(data); + buffer.putInt(offset, surrogate); + return columnValueSize; } @Override public int fillConvertedChunkData(int rowId, int columnIndex, int[] row, KeyStructureInfo restructuringInfo) { - throw new UnsupportedOperationException("internal error"); + row[columnIndex] = columnPage.getInt(rowId); + return columnIndex + 1; } @Override public int fillConvertedChunkData(ColumnVectorInfo[] vectorInfo, int column, KeyStructureInfo restructuringInfo) { - throw new UnsupportedOperationException("internal error"); + // fill the vector with data in column page + ColumnVectorInfo columnVectorInfo = vectorInfo[column]; + CarbonColumnVector vector = columnVectorInfo.vector; + fillData(null, columnVectorInfo, vector); + return column + 1; } @Override public int fillConvertedChunkData(int[] rowMapping, ColumnVectorInfo[] vectorInfo, int column, KeyStructureInfo restructuringInfo) { - throw new UnsupportedOperationException("internal error"); + ColumnVectorInfo columnVectorInfo = vectorInfo[column]; + CarbonColumnVector vector = columnVectorInfo.vector; + fillData(rowMapping, columnVectorInfo, vector); + return column + 1; + } + + /** + * Copy columnar data from internal column page to `vector`, row information described by + * `columnVectorInfo` + */ + private void fillData(int[] rowMapping, ColumnVectorInfo columnVectorInfo, --- End diff -- Since all the code is handled here I think we can remove `FixedLengthDimensionDataChunk` and `VariableLengthDimensionDataChunk` and all related class hierarchy right. --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1265#discussion_r137490913 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java --- @@ -262,10 +259,7 @@ private BitSet getFilteredIndexes(DimensionColumnDataChunk dimensionColumnDataCh int numerOfRows) { byte[] defaultValue = null; if (dimColEvaluatorInfoList.get(0).getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)) { - DirectDictionaryGenerator directDictionaryGenerator = DirectDictionaryKeyGeneratorFactory - .getDirectDictionaryGenerator( - dimColEvaluatorInfoList.get(0).getDimension().getDataType()); - int key = directDictionaryGenerator.generateDirectSurrogateKey(null) + 1; + int key = 0; --- End diff -- And also the null value for dictionary is 1 not 0 --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1265#discussion_r137805419 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java --- @@ -366,7 +364,7 @@ private BitSet setFilterdIndexToBitSet(DimensionColumnDataChunk dimensionColumnD BitSet bitSet = new BitSet(numerOfRows); byte[][] filterValues = this.filterRangeValues; // binary search can only be applied if column is sorted - if (isNaturalSorted) { + if (isNaturalSorted && !dimensionColumnDataChunk.isNoDicitionaryColumn()) { --- End diff -- In this PR, plain value (no dictionary) is using adaptive encoding, it is not sorted. I think it will make store size smaller but for some filter query it will be slower. Any suggestion? --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1265#discussion_r137806977 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/partition/EqualToFilterImpl.java --- @@ -50,7 +50,7 @@ public EqualToFilterImpl(EqualToExpression equalTo, PartitionInfo partitionInfo) literal.getLiteralExpValue().toString(), partitionInfo.getColumnSchemaList().get(0).getDataType()); if (PartitionType.RANGE == partitionInfo.getPartitionType() && value instanceof String) { - value = ByteUtil.toBytes((String)value); + value = ByteUtil.toBytesForPlainValue((String)value); --- End diff -- `ByteUtil.toBytes` will do `val = val ^ Integer.MIN_VALUE;` So I think it can not be used as normal utility function to convert int to byte[4] --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1265#discussion_r137809952 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java --- @@ -17,46 +17,183 @@ package org.apache.carbondata.core.datastore.chunk.store; +import java.nio.ByteBuffer; +import java.util.BitSet; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datastore.ColumnType; +import org.apache.carbondata.core.datastore.TableSpec; import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk; import org.apache.carbondata.core.datastore.page.ColumnPage; +import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator; +import org.apache.carbondata.core.metadata.datatype.DataType; import org.apache.carbondata.core.scan.executor.infos.KeyStructureInfo; +import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector; import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo; +import org.apache.carbondata.core.util.ByteUtil; +/** + * ColumnPage wrapper for dimension column reader + */ public class ColumnPageWrapper implements DimensionColumnDataChunk { + private TableSpec.ColumnSpec columnSpec; private ColumnPage columnPage; + private int columnValueSize; - public ColumnPageWrapper(ColumnPage columnPage) { + public ColumnPageWrapper(ColumnPage columnPage, int columnValueSize) { this.columnPage = columnPage; + this.columnValueSize = columnValueSize; + this.columnSpec = columnPage.getColumnSpec(); } @Override public int fillChunkData(byte[] data, int offset, int columnIndex, KeyStructureInfo restructuringInfo) { - throw new UnsupportedOperationException("internal error"); + int surrogate = columnPage.getInt(columnIndex); + ByteBuffer buffer = ByteBuffer.wrap(data); + buffer.putInt(offset, surrogate); + return columnValueSize; } @Override public int fillConvertedChunkData(int rowId, int columnIndex, int[] row, KeyStructureInfo restructuringInfo) { - throw new UnsupportedOperationException("internal error"); + row[columnIndex] = columnPage.getInt(rowId); + return columnIndex + 1; } @Override public int fillConvertedChunkData(ColumnVectorInfo[] vectorInfo, int column, KeyStructureInfo restructuringInfo) { - throw new UnsupportedOperationException("internal error"); + // fill the vector with data in column page + ColumnVectorInfo columnVectorInfo = vectorInfo[column]; + CarbonColumnVector vector = columnVectorInfo.vector; + fillData(null, columnVectorInfo, vector); + return column + 1; } @Override public int fillConvertedChunkData(int[] rowMapping, ColumnVectorInfo[] vectorInfo, int column, KeyStructureInfo restructuringInfo) { - throw new UnsupportedOperationException("internal error"); + ColumnVectorInfo columnVectorInfo = vectorInfo[column]; + CarbonColumnVector vector = columnVectorInfo.vector; + fillData(rowMapping, columnVectorInfo, vector); + return column + 1; + } + + /** + * Copy columnar data from internal column page to `vector`, row information described by + * `columnVectorInfo` + */ + private void fillData(int[] rowMapping, ColumnVectorInfo columnVectorInfo, --- End diff -- For dictionary column, it still using `IndexStorageCodec` to do encoding and using `decodeDimensionLegacy` to decode, so `FixedLengthDimensionDataChunk` is still required. `VariableLengthDimensionDataChunk` is also required for backward compatiblity to read old data file --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on the issue:
https://github.com/apache/carbondata/pull/1265 SDV Build Fail , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/627/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/1265 Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/24/ --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on the issue:
https://github.com/apache/carbondata/pull/1265 SDV Build Fail , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/1205/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/1265 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/574/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/1265 Build Failed with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/3078/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/1265 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/4322/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/1265 Build Failed with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/4000/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/1265 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/5210/ --- |
Free forum by Nabble | Edit this page |