Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2417#discussion_r198861537 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java --- @@ -17,45 +17,102 @@ package org.apache.carbondata.core.datastore.chunk.store; +import java.nio.ByteBuffer; +import java.util.BitSet; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datastore.ColumnType; +import org.apache.carbondata.core.datastore.TableSpec; import org.apache.carbondata.core.datastore.chunk.DimensionColumnPage; import org.apache.carbondata.core.datastore.page.ColumnPage; +import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator; +import org.apache.carbondata.core.metadata.datatype.DataType; +import org.apache.carbondata.core.metadata.datatype.DataTypes; import org.apache.carbondata.core.scan.executor.infos.KeyStructureInfo; +import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector; import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo; +import org.apache.carbondata.core.util.ByteUtil; public class ColumnPageWrapper implements DimensionColumnPage { private ColumnPage columnPage; + private TableSpec.ColumnSpec columnSpec; + private int columnValueSize = 0; + public ColumnPageWrapper(ColumnPage columnPage) { this.columnPage = columnPage; + this.columnSpec = columnPage.getColumnSpec(); } @Override public int fillRawData(int rowId, int offset, byte[] data, KeyStructureInfo restructuringInfo) { - throw new UnsupportedOperationException("internal error"); + // TODO verify the implementation. Mostly this is for dictionary. + int surrogate = columnPage.getInt(rowId); + ByteBuffer buffer = ByteBuffer.wrap(data); + buffer.putInt(offset, surrogate); + return columnValueSize; --- End diff -- No implementation required here as it is used only for dictionary. It will go to FixedChunReader so just `throw new UnsupportedOperationExceptio` --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2417#discussion_r198861716 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java --- @@ -17,45 +17,102 @@ package org.apache.carbondata.core.datastore.chunk.store; +import java.nio.ByteBuffer; +import java.util.BitSet; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datastore.ColumnType; +import org.apache.carbondata.core.datastore.TableSpec; import org.apache.carbondata.core.datastore.chunk.DimensionColumnPage; import org.apache.carbondata.core.datastore.page.ColumnPage; +import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator; +import org.apache.carbondata.core.metadata.datatype.DataType; +import org.apache.carbondata.core.metadata.datatype.DataTypes; import org.apache.carbondata.core.scan.executor.infos.KeyStructureInfo; +import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector; import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo; +import org.apache.carbondata.core.util.ByteUtil; public class ColumnPageWrapper implements DimensionColumnPage { private ColumnPage columnPage; + private TableSpec.ColumnSpec columnSpec; + private int columnValueSize = 0; + public ColumnPageWrapper(ColumnPage columnPage) { this.columnPage = columnPage; + this.columnSpec = columnPage.getColumnSpec(); } @Override public int fillRawData(int rowId, int offset, byte[] data, KeyStructureInfo restructuringInfo) { - throw new UnsupportedOperationException("internal error"); + // TODO verify the implementation. Mostly this is for dictionary. + int surrogate = columnPage.getInt(rowId); + ByteBuffer buffer = ByteBuffer.wrap(data); + buffer.putInt(offset, surrogate); + return columnValueSize; } @Override public int fillSurrogateKey(int rowId, int chunkIndex, int[] outputSurrogateKey, KeyStructureInfo restructuringInfo) { - throw new UnsupportedOperationException("internal error"); + outputSurrogateKey[chunkIndex] = columnPage.getInt(rowId); --- End diff -- not required, remove --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2417#discussion_r198861844 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java --- @@ -17,45 +17,102 @@ package org.apache.carbondata.core.datastore.chunk.store; +import java.nio.ByteBuffer; +import java.util.BitSet; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datastore.ColumnType; +import org.apache.carbondata.core.datastore.TableSpec; import org.apache.carbondata.core.datastore.chunk.DimensionColumnPage; import org.apache.carbondata.core.datastore.page.ColumnPage; +import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator; +import org.apache.carbondata.core.metadata.datatype.DataType; +import org.apache.carbondata.core.metadata.datatype.DataTypes; import org.apache.carbondata.core.scan.executor.infos.KeyStructureInfo; +import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector; import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo; +import org.apache.carbondata.core.util.ByteUtil; public class ColumnPageWrapper implements DimensionColumnPage { private ColumnPage columnPage; + private TableSpec.ColumnSpec columnSpec; + private int columnValueSize = 0; + public ColumnPageWrapper(ColumnPage columnPage) { this.columnPage = columnPage; + this.columnSpec = columnPage.getColumnSpec(); } @Override public int fillRawData(int rowId, int offset, byte[] data, KeyStructureInfo restructuringInfo) { - throw new UnsupportedOperationException("internal error"); + // TODO verify the implementation. Mostly this is for dictionary. + int surrogate = columnPage.getInt(rowId); + ByteBuffer buffer = ByteBuffer.wrap(data); + buffer.putInt(offset, surrogate); + return columnValueSize; } @Override public int fillSurrogateKey(int rowId, int chunkIndex, int[] outputSurrogateKey, KeyStructureInfo restructuringInfo) { - throw new UnsupportedOperationException("internal error"); + outputSurrogateKey[chunkIndex] = columnPage.getInt(rowId); + return chunkIndex + 1; } @Override public int fillVector(ColumnVectorInfo[] vectorInfo, int chunkIndex, KeyStructureInfo restructuringInfo) { - throw new UnsupportedOperationException("internal error"); + // fill the vector with data in column page --- End diff -- no need --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2417#discussion_r198864088 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java --- @@ -77,14 +134,115 @@ public boolean isExplicitSorted() { return false; } - @Override - public int compareTo(int rowId, byte[] compareValue) { - throw new UnsupportedOperationException("internal error"); + @Override public int compareTo(int rowId, byte[] compareValue) { + if (columnPage.getColumnSpec().getColumnType() == ColumnType.DIRECT_DICTIONARY) { + int surrogate = columnPage.getInt(rowId); + int input = ByteBuffer.wrap(compareValue).getInt(); + return surrogate - input; + } else { + byte[] data; + if (columnPage.getDataType() == DataTypes.INT) { + data = ByteUtil.toBytes(columnPage.getInt(rowId)); + } else if (columnPage.getDataType() == DataTypes.STRING) { + data = columnPage.getBytes(rowId); + } else { + throw new RuntimeException("invalid data type for dimension: " + columnPage.getDataType()); + } + return ByteUtil.UnsafeComparer.INSTANCE + .compareTo(data, 0, data.length, compareValue, 0, compareValue.length); + } } @Override public void freeMemory() { } + private void fillData(int[] rowMapping, ColumnVectorInfo columnVectorInfo, + CarbonColumnVector vector) { + int offsetRowId = columnVectorInfo.offset; + int vectorOffset = columnVectorInfo.vectorOffset; + int maxRowId = offsetRowId + columnVectorInfo.size; + BitSet nullBitset = columnPage.getNullBits(); + switch (columnSpec.getColumnType()) { + case DIRECT_DICTIONARY: --- End diff -- No need to handle `DIRECT_DICTIONARY` and 'GLOBAL_DICTIONARY' --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2417#discussion_r198864176 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java --- @@ -77,14 +134,115 @@ public boolean isExplicitSorted() { return false; } - @Override - public int compareTo(int rowId, byte[] compareValue) { - throw new UnsupportedOperationException("internal error"); + @Override public int compareTo(int rowId, byte[] compareValue) { + if (columnPage.getColumnSpec().getColumnType() == ColumnType.DIRECT_DICTIONARY) { + int surrogate = columnPage.getInt(rowId); + int input = ByteBuffer.wrap(compareValue).getInt(); + return surrogate - input; + } else { + byte[] data; + if (columnPage.getDataType() == DataTypes.INT) { + data = ByteUtil.toBytes(columnPage.getInt(rowId)); + } else if (columnPage.getDataType() == DataTypes.STRING) { + data = columnPage.getBytes(rowId); + } else { + throw new RuntimeException("invalid data type for dimension: " + columnPage.getDataType()); + } + return ByteUtil.UnsafeComparer.INSTANCE + .compareTo(data, 0, data.length, compareValue, 0, compareValue.length); + } } @Override public void freeMemory() { } + private void fillData(int[] rowMapping, ColumnVectorInfo columnVectorInfo, + CarbonColumnVector vector) { + int offsetRowId = columnVectorInfo.offset; + int vectorOffset = columnVectorInfo.vectorOffset; + int maxRowId = offsetRowId + columnVectorInfo.size; + BitSet nullBitset = columnPage.getNullBits(); + switch (columnSpec.getColumnType()) { + case DIRECT_DICTIONARY: + DirectDictionaryGenerator generator = columnVectorInfo.directDictionaryGenerator; + assert (generator != null); + DataType dataType = generator.getReturnType(); + for (int rowId = offsetRowId; rowId < maxRowId; rowId++) { + int currentRowId = (rowMapping == null) ? rowId : rowMapping[rowId]; + if (nullBitset.get(currentRowId)) { + vector.putNull(vectorOffset++); + } else { + int surrogate = columnPage.getInt(currentRowId); + Object valueFromSurrogate = generator.getValueFromSurrogate(surrogate); + if (valueFromSurrogate == null) { + vector.putNull(vectorOffset++); + } else { + if (dataType == DataTypes.INT) { + vector.putInt(vectorOffset++, (int) valueFromSurrogate); + } else { + vector.putLong(vectorOffset++, (long) valueFromSurrogate); + } + } + } + } + break; + case GLOBAL_DICTIONARY: + for (int rowId = offsetRowId; rowId < maxRowId; rowId++) { + int currentRowId = (rowMapping == null) ? rowId : rowMapping[rowId]; + if (nullBitset.get(currentRowId)) { + vector.putNull(vectorOffset++); + } else { + int data = columnPage.getInt(currentRowId); + vector.putInt(vectorOffset++, data); + } + } + break; + case PLAIN_VALUE: + for (int rowId = offsetRowId; rowId < maxRowId; rowId++) { + int currentRowId = (rowMapping == null) ? rowId : rowMapping[rowId]; + if (nullBitset.get(currentRowId)) { + vector.putNull(vectorOffset++); + } else { + if (columnSpec.getSchemaDataType() == DataTypes.STRING) { + byte[] data = columnPage.getBytes(currentRowId); + if (isNullPlainValue(data)) { + vector.putNull(vectorOffset++); + } else { + vector.putBytes(vectorOffset++, 0, data.length, data); + } + } else if (columnSpec.getSchemaDataType() == DataTypes.BOOLEAN) { + boolean data = columnPage.getBoolean(currentRowId); + vector.putBoolean(vectorOffset++, (boolean) data); + } else if (columnSpec.getSchemaDataType() == DataTypes.INT) { + // TODO have to check for other dataTypes. Only INT Specified Now. --- End diff -- remove it --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2417#discussion_r198864334 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java --- @@ -77,14 +134,115 @@ public boolean isExplicitSorted() { return false; } - @Override - public int compareTo(int rowId, byte[] compareValue) { - throw new UnsupportedOperationException("internal error"); + @Override public int compareTo(int rowId, byte[] compareValue) { + if (columnPage.getColumnSpec().getColumnType() == ColumnType.DIRECT_DICTIONARY) { + int surrogate = columnPage.getInt(rowId); + int input = ByteBuffer.wrap(compareValue).getInt(); + return surrogate - input; + } else { + byte[] data; + if (columnPage.getDataType() == DataTypes.INT) { + data = ByteUtil.toBytes(columnPage.getInt(rowId)); + } else if (columnPage.getDataType() == DataTypes.STRING) { + data = columnPage.getBytes(rowId); + } else { + throw new RuntimeException("invalid data type for dimension: " + columnPage.getDataType()); + } + return ByteUtil.UnsafeComparer.INSTANCE + .compareTo(data, 0, data.length, compareValue, 0, compareValue.length); + } } @Override public void freeMemory() { } + private void fillData(int[] rowMapping, ColumnVectorInfo columnVectorInfo, + CarbonColumnVector vector) { + int offsetRowId = columnVectorInfo.offset; + int vectorOffset = columnVectorInfo.vectorOffset; + int maxRowId = offsetRowId + columnVectorInfo.size; + BitSet nullBitset = columnPage.getNullBits(); + switch (columnSpec.getColumnType()) { + case DIRECT_DICTIONARY: + DirectDictionaryGenerator generator = columnVectorInfo.directDictionaryGenerator; + assert (generator != null); + DataType dataType = generator.getReturnType(); + for (int rowId = offsetRowId; rowId < maxRowId; rowId++) { + int currentRowId = (rowMapping == null) ? rowId : rowMapping[rowId]; + if (nullBitset.get(currentRowId)) { + vector.putNull(vectorOffset++); + } else { + int surrogate = columnPage.getInt(currentRowId); + Object valueFromSurrogate = generator.getValueFromSurrogate(surrogate); + if (valueFromSurrogate == null) { + vector.putNull(vectorOffset++); + } else { + if (dataType == DataTypes.INT) { + vector.putInt(vectorOffset++, (int) valueFromSurrogate); + } else { + vector.putLong(vectorOffset++, (long) valueFromSurrogate); + } + } + } + } + break; + case GLOBAL_DICTIONARY: + for (int rowId = offsetRowId; rowId < maxRowId; rowId++) { + int currentRowId = (rowMapping == null) ? rowId : rowMapping[rowId]; + if (nullBitset.get(currentRowId)) { + vector.putNull(vectorOffset++); + } else { + int data = columnPage.getInt(currentRowId); + vector.putInt(vectorOffset++, data); + } + } + break; + case PLAIN_VALUE: + for (int rowId = offsetRowId; rowId < maxRowId; rowId++) { + int currentRowId = (rowMapping == null) ? rowId : rowMapping[rowId]; + if (nullBitset.get(currentRowId)) { + vector.putNull(vectorOffset++); + } else { + if (columnSpec.getSchemaDataType() == DataTypes.STRING) { + byte[] data = columnPage.getBytes(currentRowId); + if (isNullPlainValue(data)) { + vector.putNull(vectorOffset++); + } else { + vector.putBytes(vectorOffset++, 0, data.length, data); + } + } else if (columnSpec.getSchemaDataType() == DataTypes.BOOLEAN) { + boolean data = columnPage.getBoolean(currentRowId); + vector.putBoolean(vectorOffset++, (boolean) data); + } else if (columnSpec.getSchemaDataType() == DataTypes.INT) { + // TODO have to check for other dataTypes. Only INT Specified Now. + int data = columnPage.getInt(currentRowId); + vector.putInt(vectorOffset++, (int) data); --- End diff -- remove all typecasts, not required --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2417#discussion_r198865395 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java --- @@ -77,14 +134,115 @@ public boolean isExplicitSorted() { return false; } - @Override - public int compareTo(int rowId, byte[] compareValue) { - throw new UnsupportedOperationException("internal error"); + @Override public int compareTo(int rowId, byte[] compareValue) { + if (columnPage.getColumnSpec().getColumnType() == ColumnType.DIRECT_DICTIONARY) { + int surrogate = columnPage.getInt(rowId); + int input = ByteBuffer.wrap(compareValue).getInt(); + return surrogate - input; + } else { + byte[] data; + if (columnPage.getDataType() == DataTypes.INT) { + data = ByteUtil.toBytes(columnPage.getInt(rowId)); + } else if (columnPage.getDataType() == DataTypes.STRING) { + data = columnPage.getBytes(rowId); + } else { + throw new RuntimeException("invalid data type for dimension: " + columnPage.getDataType()); + } + return ByteUtil.UnsafeComparer.INSTANCE + .compareTo(data, 0, data.length, compareValue, 0, compareValue.length); + } } @Override public void freeMemory() { } + private void fillData(int[] rowMapping, ColumnVectorInfo columnVectorInfo, + CarbonColumnVector vector) { + int offsetRowId = columnVectorInfo.offset; + int vectorOffset = columnVectorInfo.vectorOffset; + int maxRowId = offsetRowId + columnVectorInfo.size; + BitSet nullBitset = columnPage.getNullBits(); + switch (columnSpec.getColumnType()) { + case DIRECT_DICTIONARY: + DirectDictionaryGenerator generator = columnVectorInfo.directDictionaryGenerator; + assert (generator != null); + DataType dataType = generator.getReturnType(); + for (int rowId = offsetRowId; rowId < maxRowId; rowId++) { + int currentRowId = (rowMapping == null) ? rowId : rowMapping[rowId]; + if (nullBitset.get(currentRowId)) { + vector.putNull(vectorOffset++); + } else { + int surrogate = columnPage.getInt(currentRowId); + Object valueFromSurrogate = generator.getValueFromSurrogate(surrogate); + if (valueFromSurrogate == null) { + vector.putNull(vectorOffset++); + } else { + if (dataType == DataTypes.INT) { + vector.putInt(vectorOffset++, (int) valueFromSurrogate); + } else { + vector.putLong(vectorOffset++, (long) valueFromSurrogate); + } + } + } + } + break; + case GLOBAL_DICTIONARY: + for (int rowId = offsetRowId; rowId < maxRowId; rowId++) { + int currentRowId = (rowMapping == null) ? rowId : rowMapping[rowId]; + if (nullBitset.get(currentRowId)) { + vector.putNull(vectorOffset++); + } else { + int data = columnPage.getInt(currentRowId); + vector.putInt(vectorOffset++, data); + } + } + break; + case PLAIN_VALUE: + for (int rowId = offsetRowId; rowId < maxRowId; rowId++) { + int currentRowId = (rowMapping == null) ? rowId : rowMapping[rowId]; + if (nullBitset.get(currentRowId)) { + vector.putNull(vectorOffset++); + } else { + if (columnSpec.getSchemaDataType() == DataTypes.STRING) { + byte[] data = columnPage.getBytes(currentRowId); + if (isNullPlainValue(data)) { + vector.putNull(vectorOffset++); + } else { + vector.putBytes(vectorOffset++, 0, data.length, data); + } + } else if (columnSpec.getSchemaDataType() == DataTypes.BOOLEAN) { + boolean data = columnPage.getBoolean(currentRowId); + vector.putBoolean(vectorOffset++, (boolean) data); + } else if (columnSpec.getSchemaDataType() == DataTypes.INT) { + // TODO have to check for other dataTypes. Only INT Specified Now. + int data = columnPage.getInt(currentRowId); + vector.putInt(vectorOffset++, (int) data); + } else if (columnSpec.getSchemaDataType() == DataTypes.LONG) { + long data = columnPage.getLong(currentRowId); + vector.putLong(vectorOffset++, (long) data); + } else if (columnSpec.getSchemaDataType() == DataTypes.TIMESTAMP) { + long data = columnPage.getLong(currentRowId); + vector.putLong(vectorOffset++, (long) data); + } else if (columnSpec.getSchemaDataType() == DataTypes.FLOAT) { + float data = columnPage.getFloat(currentRowId); + vector.putFloat(vectorOffset++, (float) data); + } else if (columnSpec.getSchemaDataType() == DataTypes.DOUBLE) { + double data = columnPage.getDouble(currentRowId); + vector.putDouble(vectorOffset++, (double) data); + } --- End diff -- Try to implement for decimal also later. Please raise jira for it --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2417#discussion_r198866126 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java --- @@ -17,45 +17,102 @@ package org.apache.carbondata.core.datastore.chunk.store; +import java.nio.ByteBuffer; +import java.util.BitSet; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datastore.ColumnType; +import org.apache.carbondata.core.datastore.TableSpec; import org.apache.carbondata.core.datastore.chunk.DimensionColumnPage; import org.apache.carbondata.core.datastore.page.ColumnPage; +import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator; +import org.apache.carbondata.core.metadata.datatype.DataType; +import org.apache.carbondata.core.metadata.datatype.DataTypes; import org.apache.carbondata.core.scan.executor.infos.KeyStructureInfo; +import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector; import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo; +import org.apache.carbondata.core.util.ByteUtil; public class ColumnPageWrapper implements DimensionColumnPage { private ColumnPage columnPage; + private TableSpec.ColumnSpec columnSpec; + private int columnValueSize = 0; + public ColumnPageWrapper(ColumnPage columnPage) { this.columnPage = columnPage; + this.columnSpec = columnPage.getColumnSpec(); } @Override public int fillRawData(int rowId, int offset, byte[] data, KeyStructureInfo restructuringInfo) { - throw new UnsupportedOperationException("internal error"); + // TODO verify the implementation. Mostly this is for dictionary. + int surrogate = columnPage.getInt(rowId); + ByteBuffer buffer = ByteBuffer.wrap(data); + buffer.putInt(offset, surrogate); + return columnValueSize; } @Override public int fillSurrogateKey(int rowId, int chunkIndex, int[] outputSurrogateKey, KeyStructureInfo restructuringInfo) { - throw new UnsupportedOperationException("internal error"); + outputSurrogateKey[chunkIndex] = columnPage.getInt(rowId); + return chunkIndex + 1; } @Override public int fillVector(ColumnVectorInfo[] vectorInfo, int chunkIndex, KeyStructureInfo restructuringInfo) { - throw new UnsupportedOperationException("internal error"); + // fill the vector with data in column page + ColumnVectorInfo columnVectorInfo = vectorInfo[chunkIndex]; + CarbonColumnVector vector = columnVectorInfo.vector; + fillData(null, columnVectorInfo, vector); + return chunkIndex + 1; } + @Override public int fillVector(int[] filteredRowId, ColumnVectorInfo[] vectorInfo, int chunkIndex, KeyStructureInfo restructuringInfo) { - throw new UnsupportedOperationException("internal error"); + ColumnVectorInfo columnVectorInfo = vectorInfo[chunkIndex]; + CarbonColumnVector vector = columnVectorInfo.vector; + fillData(filteredRowId, columnVectorInfo, vector); + return chunkIndex + 1; } - @Override - public byte[] getChunkData(int rowId) { - return columnPage.getBytes(rowId); + @Override public byte[] getChunkData(int rowId) { + ColumnType columnType = columnPage.getColumnSpec().getColumnType(); + if (columnType == ColumnType.DIRECT_DICTIONARY) { --- End diff -- remove dictionary and direct dicionary --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2417#discussion_r198867991 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java --- @@ -77,14 +134,115 @@ public boolean isExplicitSorted() { return false; } - @Override - public int compareTo(int rowId, byte[] compareValue) { - throw new UnsupportedOperationException("internal error"); + @Override public int compareTo(int rowId, byte[] compareValue) { + if (columnPage.getColumnSpec().getColumnType() == ColumnType.DIRECT_DICTIONARY) { --- End diff -- remove dictionary --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2417 Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/6624/ --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2417#discussion_r198868873 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java --- @@ -77,14 +134,115 @@ public boolean isExplicitSorted() { return false; } - @Override - public int compareTo(int rowId, byte[] compareValue) { - throw new UnsupportedOperationException("internal error"); + @Override public int compareTo(int rowId, byte[] compareValue) { + if (columnPage.getColumnSpec().getColumnType() == ColumnType.DIRECT_DICTIONARY) { + int surrogate = columnPage.getInt(rowId); + int input = ByteBuffer.wrap(compareValue).getInt(); + return surrogate - input; + } else { + byte[] data; + if (columnPage.getDataType() == DataTypes.INT) { --- End diff -- First convert `compareValue` to respective datatype and compare with actual value --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2417#discussion_r198872075 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java --- @@ -405,13 +446,30 @@ public void putData(int rowId, Object value) { } else if (dataType == DataTypes.STRING || dataType == DataTypes.BYTE_ARRAY || dataType == DataTypes.VARCHAR) { - putBytes(rowId, (byte[]) value); - statsCollector.update((byte[]) value); + byte[] valueWithLength; + if (columnSpec.getColumnType() != ColumnType.PLAIN_VALUE) { + // This case is for GLOBAL_DICTIONARY and DIRECT_DICTIONARY. In this + // scenario the dataType is BYTE_ARRAY and passed bytearray should + // be saved. + putBytes(rowId, (byte[]) value); + statsCollector.update((byte[]) value); + } else { + if (dataType == DataTypes.VARCHAR) { + // Add length and then add the data. + valueWithLength = addIntLengthToByteArray((byte[]) value); + } else { + valueWithLength = addShortLengthToByteArray((byte[]) value); + } + putBytes(rowId, valueWithLength); + statsCollector.update((byte[]) valueWithLength); --- End diff -- Move down --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2417#discussion_r198872182 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java --- @@ -405,13 +446,30 @@ public void putData(int rowId, Object value) { } else if (dataType == DataTypes.STRING || dataType == DataTypes.BYTE_ARRAY || dataType == DataTypes.VARCHAR) { - putBytes(rowId, (byte[]) value); - statsCollector.update((byte[]) value); + byte[] valueWithLength; + if (columnSpec.getColumnType() != ColumnType.PLAIN_VALUE) { + // This case is for GLOBAL_DICTIONARY and DIRECT_DICTIONARY. In this + // scenario the dataType is BYTE_ARRAY and passed bytearray should + // be saved. + putBytes(rowId, (byte[]) value); + statsCollector.update((byte[]) value); + } else { + if (dataType == DataTypes.VARCHAR) { + // Add length and then add the data. + valueWithLength = addIntLengthToByteArray((byte[]) value); + } else { + valueWithLength = addShortLengthToByteArray((byte[]) value); + } + putBytes(rowId, valueWithLength); + statsCollector.update((byte[]) valueWithLength); + } } else { throw new RuntimeException("unsupported data type: " + dataType); } } + --- End diff -- remove unnecessary gaps --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2417#discussion_r198873521 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/LazyColumnPage.java --- @@ -283,16 +283,16 @@ public byte getByte(int rowId) { @Override public short getShort(int rowId) { --- End diff -- Check for float also --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2417#discussion_r198874161 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DefaultEncodingFactory.java --- @@ -38,6 +38,15 @@ import org.apache.carbondata.core.metadata.datatype.DataTypes; import org.apache.carbondata.core.metadata.datatype.DecimalConverterFactory; +import static org.apache.carbondata.core.metadata.datatype.DataTypes.BOOLEAN; --- End diff -- just add `import static org.apache.carbondata.core.metadata.datatype.DataTypes.*` --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2417#discussion_r198874999 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DefaultEncodingFactory.java --- @@ -58,16 +67,36 @@ public static EncodingFactory getInstance() { @Override public ColumnPageEncoder createEncoder(TableSpec.ColumnSpec columnSpec, ColumnPage inputPage) { // TODO: add log + ColumnPageEncoder pageEncoder = null; if (columnSpec instanceof TableSpec.MeasureSpec) { return createEncoderForMeasure(inputPage); - } else { - if (newWay) { - return createEncoderForDimension((TableSpec.DimensionSpec) columnSpec, inputPage); - } else { - assert columnSpec instanceof TableSpec.DimensionSpec; + } else if (columnSpec instanceof TableSpec.DimensionSpec) { + pageEncoder = createCodecForDimension((TableSpec.DimensionSpec) columnSpec, inputPage); + if (pageEncoder == null) { return createEncoderForDimensionLegacy((TableSpec.DimensionSpec) columnSpec); } } + return pageEncoder; + } + + private ColumnPageEncoder createCodecForDimension(TableSpec.DimensionSpec columnSpec, + ColumnPage inputPage) { + switch (columnSpec.getColumnType()) { + case PLAIN_VALUE: + if ((inputPage.getDataType() == DataTypes.BYTE) || (inputPage.getDataType() + == DataTypes.SHORT) || (inputPage.getDataType() == DataTypes.INT) || ( + inputPage.getDataType() == DataTypes.LONG)) { + return selectCodecByAlgorithmForIntegral(inputPage.getStatistics()).createEncoder(null); + } else if ((inputPage.getDataType() == DataTypes.FLOAT) || (inputPage.getDataType() + == DataTypes.DOUBLE)) { + return selectCodecByAlgorithmForFloating(inputPage.getStatistics()).createEncoder(null); + } else if (inputPage.getDataType() == DataTypes.STRING) { + // TODO. Currently let string go through legacy encoding. Later will change the encoding. + return null; + } + break; + } + return null; } private ColumnPageEncoder createEncoderForDimension(TableSpec.DimensionSpec columnSpec, --- End diff -- remove it , no body uses it --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2417#discussion_r198877038 --- Diff: processing/src/main/java/org/apache/carbondata/processing/store/TablePage.java --- @@ -354,6 +341,18 @@ public EncodedTablePage getEncodedTablePage() { .getColumnType()); } } + // for (int i = 0; i < dimensionPages.length; i++) { --- End diff -- remove commented code --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2417#discussion_r198877123 --- Diff: core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java --- @@ -436,10 +436,11 @@ public static boolean isFixedSizeDataType(DataType dataType) { * * @param dataInBytes data * @param actualDataType actual data type + * @param isTimeStampConversion * @return actual data after conversion */ public static Object getDataBasedOnDataTypeForNoDictionaryColumn(byte[] dataInBytes, - DataType actualDataType) { + DataType actualDataType, boolean isTimeStampConversion) { --- End diff -- Add one more method to pass the `isTimeStampConversion` --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2417 Build Failed with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/5453/ --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on the issue:
https://github.com/apache/carbondata/pull/2417 SDV Build Fail , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/5500/ --- |
Free forum by Nabble | Edit this page |