Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2863#discussion_r233337530 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPageValueConverter.java --- @@ -37,5 +40,6 @@ double decodeDouble(long value); double decodeDouble(float value); double decodeDouble(double value); - void decodeAndFillVector(ColumnPage columnPage, ColumnVectorInfo vectorInfo); + void decodeAndFillVector(byte[] pageData, ColumnVectorInfo vectorInfo, BitSet nullBits, + DataType pageDataType, int pageSize); --- End diff -- can you provide comment for this func --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2863#discussion_r233338236 --- Diff: core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java --- @@ -2006,6 +2006,12 @@ private CarbonCommonConstants() { */ public static final String CARBON_WRITTEN_BY_APPNAME = "carbon.writtenby.app.name"; + /** + * When more global dictionary columns are there then there is issue in generating codegen to them --- End diff -- Is it only valid for table with global dictionary, or for normal table also? --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2863#discussion_r233339949 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/compress/DirectCompressCodec.java --- @@ -224,130 +238,134 @@ public void decodeAndFillVector(ColumnPage columnPage, ColumnVectorInfo vectorIn } } - private void fillVector(ColumnPage columnPage, CarbonColumnVector vector, - DataType vectorDataType, DataType pageDataType, int pageSize, ColumnVectorInfo vectorInfo) { + private void fillVector(byte[] pageData, CarbonColumnVector vector, DataType vectorDataType, + DataType pageDataType, int pageSize, ColumnVectorInfo vectorInfo, BitSet nullBits) { + int rowId = 0; if (pageDataType == DataTypes.BOOLEAN || pageDataType == DataTypes.BYTE) { - byte[] byteData = columnPage.getBytePage(); if (vectorDataType == DataTypes.SHORT) { for (int i = 0; i < pageSize; i++) { - vector.putShort(i, (short) byteData[i]); + vector.putShort(i, (short) pageData[i]); } } else if (vectorDataType == DataTypes.INT) { for (int i = 0; i < pageSize; i++) { - vector.putInt(i, (int) byteData[i]); + vector.putInt(i, (int) pageData[i]); } } else if (vectorDataType == DataTypes.LONG) { for (int i = 0; i < pageSize; i++) { - vector.putLong(i, byteData[i]); + vector.putLong(i, pageData[i]); } } else if (vectorDataType == DataTypes.TIMESTAMP) { for (int i = 0; i < pageSize; i++) { - vector.putLong(i, (long) byteData[i] * 1000); + vector.putLong(i, (long) pageData[i] * 1000); } } else if (vectorDataType == DataTypes.BOOLEAN || vectorDataType == DataTypes.BYTE) { - vector.putBytes(0, pageSize, byteData, 0); + vector.putBytes(0, pageSize, pageData, 0); } else if (DataTypes.isDecimal(vectorDataType)) { DecimalConverterFactory.DecimalConverter decimalConverter = vectorInfo.decimalConverter; - decimalConverter.fillVector(byteData, pageSize, vectorInfo, columnPage.getNullBits()); + decimalConverter.fillVector(pageData, pageSize, vectorInfo, nullBits, pageDataType); } else { for (int i = 0; i < pageSize; i++) { - vector.putDouble(i, byteData[i]); + vector.putDouble(i, pageData[i]); } } } else if (pageDataType == DataTypes.SHORT) { - short[] shortData = columnPage.getShortPage(); + int size = pageSize * DataTypes.SHORT.getSizeInBytes(); if (vectorDataType == DataTypes.SHORT) { - vector.putShorts(0, pageSize, shortData, 0); + for (int i = 0; i < size; i += DataTypes.SHORT.getSizeInBytes()) { + vector.putShort(rowId++, (ByteUtil.toShortLittleEndian(pageData, i))); + } } else if (vectorDataType == DataTypes.INT) { - for (int i = 0; i < pageSize; i++) { - vector.putInt(i, (int) shortData[i]); + for (int i = 0; i < size; i += DataTypes.SHORT.getSizeInBytes()) { + vector.putInt(rowId++, ByteUtil.toShortLittleEndian(pageData, i)); } } else if (vectorDataType == DataTypes.LONG) { - for (int i = 0; i < pageSize; i++) { - vector.putLong(i, shortData[i]); + for (int i = 0; i < size; i += DataTypes.SHORT.getSizeInBytes()) { + vector.putLong(rowId++, ByteUtil.toShortLittleEndian(pageData, i)); } } else if (vectorDataType == DataTypes.TIMESTAMP) { - for (int i = 0; i < pageSize; i++) { - vector.putLong(i, (long) shortData[i] * 1000); + for (int i = 0; i < size; i += DataTypes.SHORT.getSizeInBytes()) { + vector.putLong(rowId++, (long) ByteUtil.toShortLittleEndian(pageData, i) * 1000); } } else if (DataTypes.isDecimal(vectorDataType)) { DecimalConverterFactory.DecimalConverter decimalConverter = vectorInfo.decimalConverter; - decimalConverter.fillVector(shortData, pageSize, vectorInfo, columnPage.getNullBits()); + decimalConverter.fillVector(pageData, pageSize, vectorInfo, nullBits, pageDataType); } else { - for (int i = 0; i < pageSize; i++) { - vector.putDouble(i, shortData[i]); + for (int i = 0; i < size; i += DataTypes.SHORT.getSizeInBytes()) { + vector.putDouble(rowId++, ByteUtil.toShortLittleEndian(pageData, i)); } } } else if (pageDataType == DataTypes.SHORT_INT) { - byte[] shortIntPage = columnPage.getShortIntPage(); if (vectorDataType == DataTypes.INT) { for (int i = 0; i < pageSize; i++) { - int shortInt = ByteUtil.valueOf3Bytes(shortIntPage, i * 3); + int shortInt = ByteUtil.valueOf3Bytes(pageData, i * 3); vector.putInt(i, shortInt); } } else if (vectorDataType == DataTypes.LONG) { for (int i = 0; i < pageSize; i++) { - int shortInt = ByteUtil.valueOf3Bytes(shortIntPage, i * 3); + int shortInt = ByteUtil.valueOf3Bytes(pageData, i * 3); vector.putLong(i, shortInt); } } else if (vectorDataType == DataTypes.TIMESTAMP) { for (int i = 0; i < pageSize; i++) { - int shortInt = ByteUtil.valueOf3Bytes(shortIntPage, i * 3); + int shortInt = ByteUtil.valueOf3Bytes(pageData, i * 3); vector.putLong(i, (long) shortInt * 1000); } } else if (DataTypes.isDecimal(vectorDataType)) { DecimalConverterFactory.DecimalConverter decimalConverter = vectorInfo.decimalConverter; - int[] shortIntData = ByteUtil.toIntArrayFrom3Bytes(shortIntPage, pageSize); - decimalConverter.fillVector(shortIntData, pageSize, vectorInfo, columnPage.getNullBits()); + decimalConverter.fillVector(pageData, pageSize, vectorInfo, nullBits, pageDataType); } else { for (int i = 0; i < pageSize; i++) { - int shortInt = ByteUtil.valueOf3Bytes(shortIntPage, i * 3); + int shortInt = ByteUtil.valueOf3Bytes(pageData, i * 3); vector.putDouble(i, shortInt); } } } else if (pageDataType == DataTypes.INT) { - int[] intData = columnPage.getIntPage(); + int size = pageSize * DataTypes.INT.getSizeInBytes(); if (vectorDataType == DataTypes.INT) { - vector.putInts(0, pageSize, intData, 0); + for (int i = 0; i < size; i += DataTypes.INT.getSizeInBytes()) { --- End diff -- assign DataTypes.SHORT.getSizeInBytes() to a variable before the loop, do not call this func in the loop --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2863#discussion_r233340004 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/compress/DirectCompressCodec.java --- @@ -224,130 +238,134 @@ public void decodeAndFillVector(ColumnPage columnPage, ColumnVectorInfo vectorIn } } - private void fillVector(ColumnPage columnPage, CarbonColumnVector vector, - DataType vectorDataType, DataType pageDataType, int pageSize, ColumnVectorInfo vectorInfo) { + private void fillVector(byte[] pageData, CarbonColumnVector vector, DataType vectorDataType, + DataType pageDataType, int pageSize, ColumnVectorInfo vectorInfo, BitSet nullBits) { + int rowId = 0; if (pageDataType == DataTypes.BOOLEAN || pageDataType == DataTypes.BYTE) { - byte[] byteData = columnPage.getBytePage(); if (vectorDataType == DataTypes.SHORT) { for (int i = 0; i < pageSize; i++) { - vector.putShort(i, (short) byteData[i]); + vector.putShort(i, (short) pageData[i]); } } else if (vectorDataType == DataTypes.INT) { for (int i = 0; i < pageSize; i++) { - vector.putInt(i, (int) byteData[i]); + vector.putInt(i, (int) pageData[i]); } } else if (vectorDataType == DataTypes.LONG) { for (int i = 0; i < pageSize; i++) { - vector.putLong(i, byteData[i]); + vector.putLong(i, pageData[i]); } } else if (vectorDataType == DataTypes.TIMESTAMP) { for (int i = 0; i < pageSize; i++) { - vector.putLong(i, (long) byteData[i] * 1000); + vector.putLong(i, (long) pageData[i] * 1000); } } else if (vectorDataType == DataTypes.BOOLEAN || vectorDataType == DataTypes.BYTE) { - vector.putBytes(0, pageSize, byteData, 0); + vector.putBytes(0, pageSize, pageData, 0); } else if (DataTypes.isDecimal(vectorDataType)) { DecimalConverterFactory.DecimalConverter decimalConverter = vectorInfo.decimalConverter; - decimalConverter.fillVector(byteData, pageSize, vectorInfo, columnPage.getNullBits()); + decimalConverter.fillVector(pageData, pageSize, vectorInfo, nullBits, pageDataType); } else { for (int i = 0; i < pageSize; i++) { - vector.putDouble(i, byteData[i]); + vector.putDouble(i, pageData[i]); } } } else if (pageDataType == DataTypes.SHORT) { - short[] shortData = columnPage.getShortPage(); + int size = pageSize * DataTypes.SHORT.getSizeInBytes(); if (vectorDataType == DataTypes.SHORT) { - vector.putShorts(0, pageSize, shortData, 0); + for (int i = 0; i < size; i += DataTypes.SHORT.getSizeInBytes()) { + vector.putShort(rowId++, (ByteUtil.toShortLittleEndian(pageData, i))); + } } else if (vectorDataType == DataTypes.INT) { - for (int i = 0; i < pageSize; i++) { - vector.putInt(i, (int) shortData[i]); + for (int i = 0; i < size; i += DataTypes.SHORT.getSizeInBytes()) { + vector.putInt(rowId++, ByteUtil.toShortLittleEndian(pageData, i)); } } else if (vectorDataType == DataTypes.LONG) { - for (int i = 0; i < pageSize; i++) { - vector.putLong(i, shortData[i]); + for (int i = 0; i < size; i += DataTypes.SHORT.getSizeInBytes()) { + vector.putLong(rowId++, ByteUtil.toShortLittleEndian(pageData, i)); } } else if (vectorDataType == DataTypes.TIMESTAMP) { - for (int i = 0; i < pageSize; i++) { - vector.putLong(i, (long) shortData[i] * 1000); + for (int i = 0; i < size; i += DataTypes.SHORT.getSizeInBytes()) { + vector.putLong(rowId++, (long) ByteUtil.toShortLittleEndian(pageData, i) * 1000); } } else if (DataTypes.isDecimal(vectorDataType)) { DecimalConverterFactory.DecimalConverter decimalConverter = vectorInfo.decimalConverter; - decimalConverter.fillVector(shortData, pageSize, vectorInfo, columnPage.getNullBits()); + decimalConverter.fillVector(pageData, pageSize, vectorInfo, nullBits, pageDataType); } else { - for (int i = 0; i < pageSize; i++) { - vector.putDouble(i, shortData[i]); + for (int i = 0; i < size; i += DataTypes.SHORT.getSizeInBytes()) { + vector.putDouble(rowId++, ByteUtil.toShortLittleEndian(pageData, i)); } } } else if (pageDataType == DataTypes.SHORT_INT) { - byte[] shortIntPage = columnPage.getShortIntPage(); if (vectorDataType == DataTypes.INT) { for (int i = 0; i < pageSize; i++) { - int shortInt = ByteUtil.valueOf3Bytes(shortIntPage, i * 3); + int shortInt = ByteUtil.valueOf3Bytes(pageData, i * 3); vector.putInt(i, shortInt); } } else if (vectorDataType == DataTypes.LONG) { for (int i = 0; i < pageSize; i++) { - int shortInt = ByteUtil.valueOf3Bytes(shortIntPage, i * 3); + int shortInt = ByteUtil.valueOf3Bytes(pageData, i * 3); vector.putLong(i, shortInt); } } else if (vectorDataType == DataTypes.TIMESTAMP) { for (int i = 0; i < pageSize; i++) { - int shortInt = ByteUtil.valueOf3Bytes(shortIntPage, i * 3); + int shortInt = ByteUtil.valueOf3Bytes(pageData, i * 3); vector.putLong(i, (long) shortInt * 1000); } } else if (DataTypes.isDecimal(vectorDataType)) { DecimalConverterFactory.DecimalConverter decimalConverter = vectorInfo.decimalConverter; - int[] shortIntData = ByteUtil.toIntArrayFrom3Bytes(shortIntPage, pageSize); - decimalConverter.fillVector(shortIntData, pageSize, vectorInfo, columnPage.getNullBits()); + decimalConverter.fillVector(pageData, pageSize, vectorInfo, nullBits, pageDataType); } else { for (int i = 0; i < pageSize; i++) { - int shortInt = ByteUtil.valueOf3Bytes(shortIntPage, i * 3); + int shortInt = ByteUtil.valueOf3Bytes(pageData, i * 3); vector.putDouble(i, shortInt); } } } else if (pageDataType == DataTypes.INT) { - int[] intData = columnPage.getIntPage(); + int size = pageSize * DataTypes.INT.getSizeInBytes(); if (vectorDataType == DataTypes.INT) { - vector.putInts(0, pageSize, intData, 0); + for (int i = 0; i < size; i += DataTypes.INT.getSizeInBytes()) { + vector.putInt(rowId++, ByteUtil.toIntLittleEndian(pageData, i)); + } } else if (vectorDataType == DataTypes.LONG) { - for (int i = 0; i < pageSize; i++) { - vector.putLong(i, intData[i]); + for (int i = 0; i < size; i += DataTypes.INT.getSizeInBytes()) { --- End diff -- same as above comment --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2863#discussion_r233341050 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/CarbonColumnVectorImpl.java --- @@ -53,6 +53,12 @@ private DataType blockDataType; + private int[] lengths; --- End diff -- please add comment for these three newly added variables --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2863 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder2.1/1403/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2863 Build Success with Spark 2.3.1, Please check CI http://136.243.101.176:8080/job/carbondataprbuilder2.3/9661/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2863 Build Success with Spark 2.2.1, Please check CI http://95.216.28.178:8080/job/ApacheCarbonPRBuilder1/1613/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2863 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder2.1/1421/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2863 Build Success with Spark 2.2.1, Please check CI http://95.216.28.178:8080/job/ApacheCarbonPRBuilder1/1631/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2863 Build Success with Spark 2.3.1, Please check CI http://136.243.101.176:8080/job/carbondataprbuilder2.3/9679/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2863 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder2.1/1426/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2863 Build Success with Spark 2.2.1, Please check CI http://95.216.28.178:8080/job/ApacheCarbonPRBuilder1/1635/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2863 Build Success with Spark 2.3.1, Please check CI http://136.243.101.176:8080/job/carbondataprbuilder2.3/9683/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2863 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder2.1/1438/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2863 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder2.1/1440/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2863 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder2.1/1442/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2863 Build Success with Spark 2.2.1, Please check CI http://95.216.28.178:8080/job/ApacheCarbonPRBuilder1/1652/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2863 Build Success with Spark 2.3.1, Please check CI http://136.243.101.176:8080/job/carbondataprbuilder2.3/9700/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2863 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder2.1/1445/ --- |
Free forum by Nabble | Edit this page |