Github user gvramana commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2417#discussion_r200935214 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java --- @@ -53,11 +60,75 @@ public int fillVector(int[] filteredRowId, ColumnVectorInfo[] vectorInfo, int ch throw new UnsupportedOperationException("internal error"); } - @Override - public byte[] getChunkData(int rowId) { - return columnPage.getBytes(rowId); + @Override public byte[] getChunkData(int rowId) { + ColumnType columnType = columnPage.getColumnSpec().getColumnType(); + DataType srcDataType = columnPage.getColumnSpec().getSchemaDataType(); + DataType targetDataType = columnPage.getDataType(); + if (columnPage.getNullBits().get(rowId)) { + // if this row is null, return default null represent in byte array + return CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY; + } + if ((columnType == ColumnType.COMPLEX_PRIMITIVE) && this.isAdaptiveComplexPrimitive()) { + if (srcDataType == DataTypes.DOUBLE || srcDataType == DataTypes.FLOAT) { + double doubleData = columnPage.getDouble(rowId); + if (srcDataType == DataTypes.FLOAT) { + float out = (float) doubleData; + return ByteUtil.toBytes(out); + } else { + return ByteUtil.toBytes(doubleData); + } + } else if (DataTypes.isDecimal(srcDataType)) { + throw new RuntimeException("unsupported type: " + srcDataType); + } else if ((srcDataType == DataTypes.BYTE) || + (srcDataType == DataTypes.BOOLEAN) || + (srcDataType == DataTypes.SHORT) || + (srcDataType == DataTypes.SHORT_INT) || + (srcDataType == DataTypes.INT) || + (srcDataType == DataTypes.LONG) || + (srcDataType == DataTypes.TIMESTAMP)) { + long longData = columnPage.getLong(rowId); + if ((srcDataType == DataTypes.BYTE)) { + byte out = (byte) longData; --- End diff -- Should not convert to Long and then covert, it is overhead --- |
In reply to this post by qiuchenjian-2
Github user gvramana commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2417#discussion_r200938152 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java --- @@ -53,11 +60,75 @@ public int fillVector(int[] filteredRowId, ColumnVectorInfo[] vectorInfo, int ch throw new UnsupportedOperationException("internal error"); } - @Override - public byte[] getChunkData(int rowId) { - return columnPage.getBytes(rowId); + @Override public byte[] getChunkData(int rowId) { + ColumnType columnType = columnPage.getColumnSpec().getColumnType(); + DataType srcDataType = columnPage.getColumnSpec().getSchemaDataType(); + DataType targetDataType = columnPage.getDataType(); + if (columnPage.getNullBits().get(rowId)) { + // if this row is null, return default null represent in byte array + return CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY; + } + if ((columnType == ColumnType.COMPLEX_PRIMITIVE) && this.isAdaptiveComplexPrimitive()) { + if (srcDataType == DataTypes.DOUBLE || srcDataType == DataTypes.FLOAT) { + double doubleData = columnPage.getDouble(rowId); + if (srcDataType == DataTypes.FLOAT) { + float out = (float) doubleData; + return ByteUtil.toBytes(out); + } else { + return ByteUtil.toBytes(doubleData); + } + } else if (DataTypes.isDecimal(srcDataType)) { + throw new RuntimeException("unsupported type: " + srcDataType); + } else if ((srcDataType == DataTypes.BYTE) || + (srcDataType == DataTypes.BOOLEAN) || + (srcDataType == DataTypes.SHORT) || + (srcDataType == DataTypes.SHORT_INT) || + (srcDataType == DataTypes.INT) || + (srcDataType == DataTypes.LONG) || + (srcDataType == DataTypes.TIMESTAMP)) { + long longData = columnPage.getLong(rowId); + if ((srcDataType == DataTypes.BYTE)) { + byte out = (byte) longData; --- End diff -- Use Switch instead of if statement --- |
In reply to this post by qiuchenjian-2
Github user gvramana commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2417#discussion_r200962872 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java --- @@ -53,11 +60,75 @@ public int fillVector(int[] filteredRowId, ColumnVectorInfo[] vectorInfo, int ch throw new UnsupportedOperationException("internal error"); } - @Override - public byte[] getChunkData(int rowId) { - return columnPage.getBytes(rowId); + @Override public byte[] getChunkData(int rowId) { + ColumnType columnType = columnPage.getColumnSpec().getColumnType(); + DataType srcDataType = columnPage.getColumnSpec().getSchemaDataType(); + DataType targetDataType = columnPage.getDataType(); + if (columnPage.getNullBits().get(rowId)) { + // if this row is null, return default null represent in byte array + return CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY; + } + if ((columnType == ColumnType.COMPLEX_PRIMITIVE) && this.isAdaptiveComplexPrimitive()) { + if (srcDataType == DataTypes.DOUBLE || srcDataType == DataTypes.FLOAT) { + double doubleData = columnPage.getDouble(rowId); + if (srcDataType == DataTypes.FLOAT) { + float out = (float) doubleData; + return ByteUtil.toBytes(out); + } else { + return ByteUtil.toBytes(doubleData); + } + } else if (DataTypes.isDecimal(srcDataType)) { + throw new RuntimeException("unsupported type: " + srcDataType); + } else if ((srcDataType == DataTypes.BYTE) || + (srcDataType == DataTypes.BOOLEAN) || + (srcDataType == DataTypes.SHORT) || + (srcDataType == DataTypes.SHORT_INT) || + (srcDataType == DataTypes.INT) || + (srcDataType == DataTypes.LONG) || + (srcDataType == DataTypes.TIMESTAMP)) { + long longData = columnPage.getLong(rowId); + if ((srcDataType == DataTypes.BYTE)) { + byte out = (byte) longData; + return ByteUtil.toBytes(out); + } else if (srcDataType == DataTypes.BOOLEAN) { + byte out = (byte) longData; + return ByteUtil.toBytes(ByteUtil.toBoolean(out)); + } else if (srcDataType == DataTypes.SHORT) { + short out = (short) longData; + return ByteUtil.toBytes(out); + } else if (srcDataType == DataTypes.SHORT_INT) { + int out = (int) longData; + return ByteUtil.toBytes(out); + } else if (srcDataType == DataTypes.INT) { + int out = (int) longData; + return ByteUtil.toBytes(out); + } else { + // timestamp and long + return ByteUtil.toBytes(longData); + } + } else if ((targetDataType == DataTypes.STRING) || + (targetDataType == DataTypes.VARCHAR) || + (targetDataType == DataTypes.BYTE_ARRAY)) { + return columnPage.getBytes(rowId); + } else { + throw new RuntimeException("unsupported type: " + targetDataType); + } + } else if ((columnType == ColumnType.COMPLEX_PRIMITIVE) && !this.isAdaptiveComplexPrimitive()) { + if ((srcDataType == DataTypes.BYTE) || (srcDataType == DataTypes.BOOLEAN)) { + byte[] out = new byte[1]; + out[0] = (columnPage.getByte(rowId)); + return out; + } else if (srcDataType == DataTypes.BYTE_ARRAY) { + return columnPage.getBytes(rowId); + } else { + throw new RuntimeException("unsupported type: " + targetDataType); + } + } else { + return columnPage.getBytes(rowId); --- End diff -- getBytes should not be default , it should throw exception, unsupported datatype --- |
In reply to this post by qiuchenjian-2
Github user gvramana commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2417#discussion_r200964076 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java --- @@ -141,7 +141,7 @@ private static ColumnPage createFixLengthByteArrayPage(TableSpec.ColumnSpec colu throw new RuntimeException(e); } } else { - return new SafeFixLengthColumnPage(columnSpec, dataType, pageSize, eachValueSize); + return new SafeFixLengthColumnPage(columnSpec, dataType, pageSize); --- End diff -- This should not be removed. --- |
In reply to this post by qiuchenjian-2
Github user gvramana commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2417#discussion_r200967561 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/ComplexColumnPage.java --- @@ -63,25 +66,42 @@ public ComplexColumnPage(List<ColumnType> complexColumnType) { * below method will be used to initlize the column page of complex type * @param columnToDictMap * dictionary map - * @param columnNames - * list of columns * @param pageSize * number of records * @throws MemoryException * if memory is not sufficient */ - public void initialize(Map<String, LocalDictionaryGenerator> columnToDictMap, - List<String> columnNames, int pageSize) throws MemoryException { + public void initialize(Map<String, LocalDictionaryGenerator> columnToDictMap, int pageSize) + throws MemoryException { + DataType dataType; for (int i = 0; i < this.columnPages.length; i++) { - LocalDictionaryGenerator localDictionaryGenerator = columnToDictMap.get(columnNames.get(i)); + ComplexColumnInfo complexColumnInfo = complexColumnInfoList.get(i); + LocalDictionaryGenerator localDictionaryGenerator = + columnToDictMap.get(complexColumnInfo.getColumnNames()); if (null == localDictionaryGenerator) { - TableSpec.ColumnSpec spec = TableSpec.ColumnSpec - .newInstance(columnNames.get(i), DataTypes.BYTE_ARRAY, complexColumnType.get(i)); - this.columnPages[i] = ColumnPage.newPage(spec, DataTypes.BYTE_ARRAY, pageSize); - this.columnPages[i].setStatsCollector(new DummyStatsCollector()); + dataType = complexColumnInfo.getColumnDataTypes(); + if ((complexColumnInfo.isNoDictionary() && !((DataTypes.isStructType(dataType) || DataTypes + .isArrayType(dataType) || (dataType == DataTypes.STRING) || (dataType + == DataTypes.VARCHAR) || (dataType == DataTypes.DATE) || DataTypes + .isDecimal(dataType))))) { + TableSpec.ColumnSpec spec = TableSpec.ColumnSpec + .newInstance(complexColumnInfo.getColumnNames(), dataType, + complexColumnInfo.getComplexColumnType()); + // no dictionary primitive types need adaptive encoding, + // hence store as object instead of byte array + this.columnPages[i] = ColumnPage.newPage(spec, dataType, pageSize); + this.columnPages[i].setStatsCollector(PrimitivePageStatsCollector.newInstance(dataType)); + } else { + TableSpec.ColumnSpec spec = TableSpec.ColumnSpec --- End diff -- remove duplicate logic --- |
In reply to this post by qiuchenjian-2
Github user gvramana commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2417#discussion_r200984983 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoder.java --- @@ -147,10 +160,48 @@ protected void fillLegacyFields(DataChunk2 dataChunk) public static EncodedColumnPage encodedColumn(ColumnPage page) throws IOException, MemoryException { - ColumnPageEncoder encoder = new DirectCompressCodec(DataTypes.BYTE_ARRAY).createEncoder(null); - return encoder.encode(page); + ColumnPageEncoder pageEncoder = createCodecForDimension(page); + if (pageEncoder == null) { + ColumnPageEncoder encoder = new DirectCompressCodec(DataTypes.BYTE_ARRAY).createEncoder(null); + return encoder.encode(page); + } else { + LOGGER.info("Encoder result ---> Source data type: " + pageEncoder.getEncoderMeta(page) --- End diff -- move to EncodeFactory --- |
In reply to this post by qiuchenjian-2
Github user gvramana commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2417#discussion_r200985232 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoder.java --- @@ -147,10 +160,48 @@ protected void fillLegacyFields(DataChunk2 dataChunk) public static EncodedColumnPage encodedColumn(ColumnPage page) throws IOException, MemoryException { - ColumnPageEncoder encoder = new DirectCompressCodec(DataTypes.BYTE_ARRAY).createEncoder(null); - return encoder.encode(page); + ColumnPageEncoder pageEncoder = createCodecForDimension(page); --- End diff -- FallbackEncodedColumnPage is also using complex type so child column types loop logic not present. --- |
In reply to this post by qiuchenjian-2
Github user gvramana commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2417#discussion_r200985673 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoder.java --- @@ -147,10 +160,48 @@ protected void fillLegacyFields(DataChunk2 dataChunk) public static EncodedColumnPage encodedColumn(ColumnPage page) throws IOException, MemoryException { - ColumnPageEncoder encoder = new DirectCompressCodec(DataTypes.BYTE_ARRAY).createEncoder(null); - return encoder.encode(page); + ColumnPageEncoder pageEncoder = createCodecForDimension(page); + if (pageEncoder == null) { + ColumnPageEncoder encoder = new DirectCompressCodec(DataTypes.BYTE_ARRAY).createEncoder(null); + return encoder.encode(page); + } else { + LOGGER.info("Encoder result ---> Source data type: " + pageEncoder.getEncoderMeta(page) + .getColumnSpec().getSchemaDataType() + " Destination data type: " + pageEncoder + .getEncoderMeta(page).getStoreDataType() + " for the column: " + pageEncoder + .getEncoderMeta(page).getColumnSpec().getFieldName()); + // TODO: remove the Sout after testing, + // currently added as executor Info logs doesn't come in IDE + System.out.println("Encoder result ---> Source data type: " + pageEncoder.getEncoderMeta(page) + .getColumnSpec().getSchemaDataType() + " Destination data type: " + pageEncoder + .getEncoderMeta(page).getStoreDataType() + " for the column: " + pageEncoder + .getEncoderMeta(page).getColumnSpec().getFieldName()); + return pageEncoder.encode(page); + } } + private static ColumnPageEncoder createCodecForDimension(ColumnPage inputPage) { + TableSpec.ColumnSpec columnSpec = inputPage.getColumnSpec(); --- End diff -- move to EncodeFactory --- |
In reply to this post by qiuchenjian-2
Github user gvramana commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2417#discussion_r200986348 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoder.java --- @@ -147,10 +160,48 @@ protected void fillLegacyFields(DataChunk2 dataChunk) public static EncodedColumnPage encodedColumn(ColumnPage page) throws IOException, MemoryException { - ColumnPageEncoder encoder = new DirectCompressCodec(DataTypes.BYTE_ARRAY).createEncoder(null); - return encoder.encode(page); + ColumnPageEncoder pageEncoder = createCodecForDimension(page); + if (pageEncoder == null) { + ColumnPageEncoder encoder = new DirectCompressCodec(DataTypes.BYTE_ARRAY).createEncoder(null); + return encoder.encode(page); + } else { + LOGGER.info("Encoder result ---> Source data type: " + pageEncoder.getEncoderMeta(page) + .getColumnSpec().getSchemaDataType() + " Destination data type: " + pageEncoder + .getEncoderMeta(page).getStoreDataType() + " for the column: " + pageEncoder + .getEncoderMeta(page).getColumnSpec().getFieldName()); + // TODO: remove the Sout after testing, + // currently added as executor Info logs doesn't come in IDE + System.out.println("Encoder result ---> Source data type: " + pageEncoder.getEncoderMeta(page) + .getColumnSpec().getSchemaDataType() + " Destination data type: " + pageEncoder + .getEncoderMeta(page).getStoreDataType() + " for the column: " + pageEncoder + .getEncoderMeta(page).getColumnSpec().getFieldName()); + return pageEncoder.encode(page); + } } + private static ColumnPageEncoder createCodecForDimension(ColumnPage inputPage) { + TableSpec.ColumnSpec columnSpec = inputPage.getColumnSpec(); + if (columnSpec.getColumnType() == ColumnType.COMPLEX_PRIMITIVE) { + if (inputPage.getDataType() == DataTypes.BYTE_ARRAY + || inputPage.getDataType() == DataTypes.STRING) { + // use legacy encoder + return null; + } else if ((inputPage.getDataType() == DataTypes.BYTE) || (inputPage.getDataType() + == DataTypes.SHORT) || (inputPage.getDataType() == DataTypes.INT) || ( + inputPage.getDataType() == DataTypes.LONG)) { + return selectCodecByAlgorithmForIntegral(inputPage.getStatistics(), true) + .createEncoder(null); + } else if ((inputPage.getDataType() == DataTypes.FLOAT) || (inputPage.getDataType() + == DataTypes.DOUBLE)) { + return selectCodecByAlgorithmForFloating(inputPage.getStatistics(), true) + .createEncoder(null); + } + } + // use legacy encoder + return null; --- End diff -- legacy encoder logic should also be here --- |
In reply to this post by qiuchenjian-2
Github user gvramana commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2417#discussion_r200989677 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/complextypes/PrimitiveQueryType.java --- @@ -93,9 +93,9 @@ public PrimitiveQueryType(String name, String parentname, int blockIndex, return 1; } - @Override public void parseBlocksAndReturnComplexColumnByteArray( - DimensionRawColumnChunk[] rawColumnChunks, int rowNumber, - int pageNumber, DataOutputStream dataOutputStream) throws IOException { + @Override + public void parseBlocksAndReturnComplexColumnByteArray(DimensionRawColumnChunk[] rawColumnChunks, --- End diff -- No change revert changes --- |
In reply to this post by qiuchenjian-2
Github user gvramana commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2417#discussion_r200989973 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/executor/util/QueryUtil.java --- @@ -870,4 +873,23 @@ private static void getChildDimensionOrdinal(CarbonDimension queryDimensions, } } } + + /** --- End diff -- Move to right location, not in query util --- |
In reply to this post by qiuchenjian-2
Github user gvramana commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2417#discussion_r200991672 --- Diff: integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestAdaptiveEncodingForNullValues.scala --- @@ -0,0 +1,168 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.spark.testsuite.dataload + +import scala.collection.mutable + +import org.apache.spark.sql.Row +import org.apache.spark.sql.test.util.QueryTest +import org.scalatest.BeforeAndAfterAll + +import org.apache.carbondata.core.constants.CarbonCommonConstants +import org.apache.carbondata.core.util.CarbonProperties + +/** + * Test class of Adaptive Encoding UnSafe Column Page with Null values + * + */ + +class TestAdaptiveEncodingForNullValues + extends QueryTest with BeforeAndAfterAll { + + override def beforeAll(): Unit = { + sql("DROP TABLE IF EXISTS adaptive") + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.ENABLE_UNSAFE_COLUMN_PAGE, + "true") + } + + override def afterAll(): Unit = { + sql("DROP TABLE IF EXISTS adaptive") + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.ENABLE_UNSAFE_COLUMN_PAGE, + "true") + } + + test("test INT with struct and array, Encoding INT-->BYTE") { + sql("Drop table if exists adaptive") + sql( + "create table adaptive(roll int, student struct<id:int,name:string,marks:array<int>>) " + + "stored by 'carbondata'") + sql("insert into adaptive values(1,'null$abc$null:null:null')") + checkAnswer(sql("select * from adaptive"), + Seq(Row(1, Row(null, "abc", mutable.WrappedArray.make(Array(null, null, null)))))) + } + + + test("test SMALLINT with struct and array SMALLINT --> BYTE") { + sql("Drop table if exists adaptive") + sql( + "create table adaptive(roll int, student struct<id:smallint,name:string," + + "marks:array<smallint>>) stored by 'carbondata'") + sql("insert into adaptive values(1,'null$abc$null:null:null')") + checkAnswer(sql("select * from adaptive"), + Seq(Row(1, Row(null, "abc", mutable.WrappedArray.make(Array(null, null, null)))))) + } + + + test("test BigInt with struct and array BIGINT --> BYTE") { + sql("Drop table if exists adaptive") + sql( + "create table adaptive(roll int, student struct<id:bigint,name:string," + + "marks:array<bigint>>) stored by 'carbondata'") + sql("insert into adaptive values(1,'null$abc$null:null:null')") + checkAnswer(sql("select * from adaptive"), + Seq(Row(1, Row(null, "abc", mutable.WrappedArray.make(Array(null, null, null)))))) + } + + test("test Double with Struct and Array DOUBLE --> BYTE") { + sql("Drop table if exists adaptive") + sql( + "create table adaptive(roll int, student struct<id:double,name:string," + + "marks:array<double>>) stored by 'carbondata'") + sql("insert into adaptive values(1,'null$abc$null:null:null')") + checkAnswer(sql("select * from adaptive"), + Seq(Row(1, Row(null, "abc", mutable.WrappedArray.make(Array(null, null, null)))))) + } + + test("test Decimal with Struct") { + sql("Drop table if exists adaptive") + sql( + "create table adaptive(roll int, student struct<id:decimal(3,2),name:string," + + "marks:array<decimal>>) stored by " + + "'carbondata'") + sql("insert into adaptive values(1,'null$abc$null:null:null')") + checkAnswer(sql("select * from adaptive"), + Seq(Row(1, Row(null, "abc", mutable.WrappedArray.make(Array(null, null, null)))))) + } + + test("test Timestamp with Struct") { + sql("Drop table if exists adaptive") + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy/MM/dd") + sql( + "create table adaptive(roll int, student struct<id:timestamp,name:string>) stored by " + + "'carbondata'") + sql("insert into adaptive values(1,'null$abc')") + checkAnswer(sql("select * from adaptive"), + Seq(Row(1, Row(null, "abc")))) + } + + test("test Timestamp with Array") { + sql("Drop table if exists adaptive") + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy/MM/dd") + sql( + "create table adaptive(roll int, student struct<name:string," + + "marks:array<timestamp>>) stored by 'carbondata'") + sql("insert into adaptive values(1,'abc$null:null:null')") + checkAnswer(sql("select * from adaptive"), + Seq(Row(1, Row("abc", mutable.WrappedArray.make(Array(null, null, null)))))) + } + + test("test DATE with Array") { --- End diff -- Write logic check it column is encoded as adaptive or not --- |
In reply to this post by qiuchenjian-2
Github user gvramana commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2417#discussion_r200992646 --- Diff: integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestAdaptiveEncodingUnsafeColumnPageForComplexDataType.scala --- @@ -0,0 +1,588 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.spark.testsuite.dataload + +import java.io.{File, PrintWriter} +import java.sql.Timestamp + +import scala.collection.mutable +import scala.util.Random + +import org.apache.spark.sql.Row +import org.apache.spark.sql.test.util.QueryTest +import org.scalatest.BeforeAndAfterAll + +import org.apache.carbondata.core.constants.CarbonCommonConstants +import org.apache.carbondata.core.util.CarbonProperties + +/** + * Test class of Adaptive Encoding UnSafe Column Page with Complex Data type + * + */ + +class TestAdaptiveEncodingUnsafeColumnPageForComplexDataType + extends QueryTest with BeforeAndAfterAll { + + override def beforeAll(): Unit = { + + new File(CarbonProperties.getInstance().getSystemFolderLocation).delete() + sql("DROP TABLE IF EXISTS adaptive") + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.ENABLE_UNSAFE_COLUMN_PAGE, + "true") + } + + override def afterAll(): Unit = { + sql("DROP TABLE IF EXISTS adaptive") + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.ENABLE_UNSAFE_COLUMN_PAGE, + "true") + } + + test("test INT with struct and array, Encoding INT-->BYTE") { --- End diff -- 1) Why testcases change for unsafe safe, derive once class from other and pass parameters 2) Write function to reduce duplication of logic among testcases --- |
In reply to this post by qiuchenjian-2
Github user sounakr commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2417#discussion_r201073104 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java --- @@ -17,32 +17,39 @@ package org.apache.carbondata.core.datastore.chunk.store; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datastore.ColumnType; import org.apache.carbondata.core.datastore.chunk.DimensionColumnPage; import org.apache.carbondata.core.datastore.page.ColumnPage; +import org.apache.carbondata.core.metadata.datatype.DataType; +import org.apache.carbondata.core.metadata.datatype.DataTypes; import org.apache.carbondata.core.scan.executor.infos.KeyStructureInfo; import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo; +import org.apache.carbondata.core.util.ByteUtil; public class ColumnPageWrapper implements DimensionColumnPage { private ColumnPage columnPage; - public ColumnPageWrapper(ColumnPage columnPage) { + private boolean isAdaptiveComplexPrimitivePage; + + public ColumnPageWrapper(ColumnPage columnPage, boolean isAdaptiveComplexPrimitivePage) { this.columnPage = columnPage; + this.isAdaptiveComplexPrimitivePage = isAdaptiveComplexPrimitivePage; } @Override public int fillRawData(int rowId, int offset, byte[] data, KeyStructureInfo restructuringInfo) { throw new UnsupportedOperationException("internal error"); } - @Override - public int fillSurrogateKey(int rowId, int chunkIndex, int[] outputSurrogateKey, + @Override public int fillSurrogateKey(int rowId, int chunkIndex, int[] outputSurrogateKey, --- End diff -- Done --- |
In reply to this post by qiuchenjian-2
Github user sounakr commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2417#discussion_r201197596 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java --- @@ -53,11 +60,75 @@ public int fillVector(int[] filteredRowId, ColumnVectorInfo[] vectorInfo, int ch throw new UnsupportedOperationException("internal error"); } - @Override - public byte[] getChunkData(int rowId) { - return columnPage.getBytes(rowId); + @Override public byte[] getChunkData(int rowId) { --- End diff -- Currently, Struct and Array types are stored as Byte Array. But as they are storing in LV format it can be modified and fit in V format. This refactoring will be done as part of jira CARBONDATA-2605 --- |
In reply to this post by qiuchenjian-2
Github user sounakr commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2417#discussion_r201197871 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java --- @@ -53,11 +60,75 @@ public int fillVector(int[] filteredRowId, ColumnVectorInfo[] vectorInfo, int ch throw new UnsupportedOperationException("internal error"); } - @Override - public byte[] getChunkData(int rowId) { - return columnPage.getBytes(rowId); + @Override public byte[] getChunkData(int rowId) { + ColumnType columnType = columnPage.getColumnSpec().getColumnType(); + DataType srcDataType = columnPage.getColumnSpec().getSchemaDataType(); + DataType targetDataType = columnPage.getDataType(); + if (columnPage.getNullBits().get(rowId)) { + // if this row is null, return default null represent in byte array + return CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY; + } + if ((columnType == ColumnType.COMPLEX_PRIMITIVE) && this.isAdaptiveComplexPrimitive()) { + if (srcDataType == DataTypes.DOUBLE || srcDataType == DataTypes.FLOAT) { + double doubleData = columnPage.getDouble(rowId); + if (srcDataType == DataTypes.FLOAT) { + float out = (float) doubleData; + return ByteUtil.toBytes(out); + } else { + return ByteUtil.toBytes(doubleData); + } + } else if (DataTypes.isDecimal(srcDataType)) { + throw new RuntimeException("unsupported type: " + srcDataType); --- End diff -- Decimal type is supported inside complex. But currently, it avoids the Adaptive Encoding path and goes as Byte Array Format. This will be modified in CARBONDATA-2605. --- |
In reply to this post by qiuchenjian-2
Github user sounakr commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2417#discussion_r201198855 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java --- @@ -53,11 +60,75 @@ public int fillVector(int[] filteredRowId, ColumnVectorInfo[] vectorInfo, int ch throw new UnsupportedOperationException("internal error"); } - @Override - public byte[] getChunkData(int rowId) { - return columnPage.getBytes(rowId); + @Override public byte[] getChunkData(int rowId) { + ColumnType columnType = columnPage.getColumnSpec().getColumnType(); + DataType srcDataType = columnPage.getColumnSpec().getSchemaDataType(); + DataType targetDataType = columnPage.getDataType(); + if (columnPage.getNullBits().get(rowId)) { + // if this row is null, return default null represent in byte array + return CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY; + } + if ((columnType == ColumnType.COMPLEX_PRIMITIVE) && this.isAdaptiveComplexPrimitive()) { + if (srcDataType == DataTypes.DOUBLE || srcDataType == DataTypes.FLOAT) { + double doubleData = columnPage.getDouble(rowId); + if (srcDataType == DataTypes.FLOAT) { + float out = (float) doubleData; + return ByteUtil.toBytes(out); + } else { + return ByteUtil.toBytes(doubleData); + } + } else if (DataTypes.isDecimal(srcDataType)) { + throw new RuntimeException("unsupported type: " + srcDataType); + } else if ((srcDataType == DataTypes.BYTE) || + (srcDataType == DataTypes.BOOLEAN) || + (srcDataType == DataTypes.SHORT) || + (srcDataType == DataTypes.SHORT_INT) || + (srcDataType == DataTypes.INT) || + (srcDataType == DataTypes.LONG) || + (srcDataType == DataTypes.TIMESTAMP)) { + long longData = columnPage.getLong(rowId); + if ((srcDataType == DataTypes.BYTE)) { + byte out = (byte) longData; --- End diff -- Placing a switch statement is not a small change and will impact many other files and classes. As this is not a functional issue postposing this to CARBONDATA-2713. --- |
In reply to this post by qiuchenjian-2
Github user sounakr commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2417#discussion_r201199734 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoder.java --- @@ -147,10 +160,48 @@ protected void fillLegacyFields(DataChunk2 dataChunk) public static EncodedColumnPage encodedColumn(ColumnPage page) throws IOException, MemoryException { - ColumnPageEncoder encoder = new DirectCompressCodec(DataTypes.BYTE_ARRAY).createEncoder(null); - return encoder.encode(page); + ColumnPageEncoder pageEncoder = createCodecForDimension(page); + if (pageEncoder == null) { + ColumnPageEncoder encoder = new DirectCompressCodec(DataTypes.BYTE_ARRAY).createEncoder(null); + return encoder.encode(page); + } else { + LOGGER.info("Encoder result ---> Source data type: " + pageEncoder.getEncoderMeta(page) --- End diff -- Done --- |
In reply to this post by qiuchenjian-2
Github user sounakr commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2417#discussion_r201199788 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java --- @@ -141,7 +141,7 @@ private static ColumnPage createFixLengthByteArrayPage(TableSpec.ColumnSpec colu throw new RuntimeException(e); } } else { - return new SafeFixLengthColumnPage(columnSpec, dataType, pageSize, eachValueSize); + return new SafeFixLengthColumnPage(columnSpec, dataType, pageSize); --- End diff -- Done --- |
In reply to this post by qiuchenjian-2
Github user sounakr commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2417#discussion_r201221334 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java --- @@ -53,11 +60,75 @@ public int fillVector(int[] filteredRowId, ColumnVectorInfo[] vectorInfo, int ch throw new UnsupportedOperationException("internal error"); } - @Override - public byte[] getChunkData(int rowId) { - return columnPage.getBytes(rowId); + @Override public byte[] getChunkData(int rowId) { + ColumnType columnType = columnPage.getColumnSpec().getColumnType(); + DataType srcDataType = columnPage.getColumnSpec().getSchemaDataType(); + DataType targetDataType = columnPage.getDataType(); + if (columnPage.getNullBits().get(rowId)) { + // if this row is null, return default null represent in byte array + return CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY; + } + if ((columnType == ColumnType.COMPLEX_PRIMITIVE) && this.isAdaptiveComplexPrimitive()) { + if (srcDataType == DataTypes.DOUBLE || srcDataType == DataTypes.FLOAT) { + double doubleData = columnPage.getDouble(rowId); + if (srcDataType == DataTypes.FLOAT) { + float out = (float) doubleData; + return ByteUtil.toBytes(out); + } else { + return ByteUtil.toBytes(doubleData); + } + } else if (DataTypes.isDecimal(srcDataType)) { + throw new RuntimeException("unsupported type: " + srcDataType); + } else if ((srcDataType == DataTypes.BYTE) || + (srcDataType == DataTypes.BOOLEAN) || + (srcDataType == DataTypes.SHORT) || + (srcDataType == DataTypes.SHORT_INT) || + (srcDataType == DataTypes.INT) || + (srcDataType == DataTypes.LONG) || + (srcDataType == DataTypes.TIMESTAMP)) { + long longData = columnPage.getLong(rowId); + if ((srcDataType == DataTypes.BYTE)) { + byte out = (byte) longData; + return ByteUtil.toBytes(out); + } else if (srcDataType == DataTypes.BOOLEAN) { + byte out = (byte) longData; + return ByteUtil.toBytes(ByteUtil.toBoolean(out)); + } else if (srcDataType == DataTypes.SHORT) { + short out = (short) longData; + return ByteUtil.toBytes(out); + } else if (srcDataType == DataTypes.SHORT_INT) { + int out = (int) longData; + return ByteUtil.toBytes(out); + } else if (srcDataType == DataTypes.INT) { + int out = (int) longData; + return ByteUtil.toBytes(out); + } else { + // timestamp and long + return ByteUtil.toBytes(longData); + } + } else if ((targetDataType == DataTypes.STRING) || + (targetDataType == DataTypes.VARCHAR) || + (targetDataType == DataTypes.BYTE_ARRAY)) { + return columnPage.getBytes(rowId); + } else { + throw new RuntimeException("unsupported type: " + targetDataType); + } + } else if ((columnType == ColumnType.COMPLEX_PRIMITIVE) && !this.isAdaptiveComplexPrimitive()) { + if ((srcDataType == DataTypes.BYTE) || (srcDataType == DataTypes.BOOLEAN)) { + byte[] out = new byte[1]; + out[0] = (columnPage.getByte(rowId)); + return out; + } else if (srcDataType == DataTypes.BYTE_ARRAY) { + return columnPage.getBytes(rowId); + } else { + throw new RuntimeException("unsupported type: " + targetDataType); + } + } else { + return columnPage.getBytes(rowId); --- End diff -- This getBytes is only for Complex STRUCT and ARRAY type which are in Byte Array Format. Later when CARBONDATA-2713 turns STRUCT and ARRAY to V-format then this check will throw exception --- |
Free forum by Nabble | Edit this page |