[GitHub] carbondata pull request #2417: [WIP][Complex Column Enhancements]Primitive D...

classic Classic list List threaded Threaded
181 messages Options
1 ... 5678910
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

qiuchenjian-2
Github user gvramana commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2417#discussion_r200935214
 
    --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java ---
    @@ -53,11 +60,75 @@ public int fillVector(int[] filteredRowId, ColumnVectorInfo[] vectorInfo, int ch
         throw new UnsupportedOperationException("internal error");
       }
     
    -  @Override
    -  public byte[] getChunkData(int rowId) {
    -    return columnPage.getBytes(rowId);
    +  @Override public byte[] getChunkData(int rowId) {
    +    ColumnType columnType = columnPage.getColumnSpec().getColumnType();
    +    DataType srcDataType = columnPage.getColumnSpec().getSchemaDataType();
    +    DataType targetDataType = columnPage.getDataType();
    +    if (columnPage.getNullBits().get(rowId)) {
    +      // if this row is null, return default null represent in byte array
    +      return CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY;
    +    }
    +    if ((columnType == ColumnType.COMPLEX_PRIMITIVE) && this.isAdaptiveComplexPrimitive()) {
    +      if (srcDataType == DataTypes.DOUBLE || srcDataType == DataTypes.FLOAT) {
    +        double doubleData = columnPage.getDouble(rowId);
    +        if (srcDataType == DataTypes.FLOAT) {
    +          float out = (float) doubleData;
    +          return ByteUtil.toBytes(out);
    +        } else {
    +          return ByteUtil.toBytes(doubleData);
    +        }
    +      } else if (DataTypes.isDecimal(srcDataType)) {
    +        throw new RuntimeException("unsupported type: " + srcDataType);
    +      } else if ((srcDataType == DataTypes.BYTE) ||
    +          (srcDataType == DataTypes.BOOLEAN) ||
    +          (srcDataType == DataTypes.SHORT) ||
    +          (srcDataType == DataTypes.SHORT_INT) ||
    +          (srcDataType == DataTypes.INT) ||
    +          (srcDataType == DataTypes.LONG) ||
    +          (srcDataType == DataTypes.TIMESTAMP)) {
    +        long longData = columnPage.getLong(rowId);
    +        if ((srcDataType == DataTypes.BYTE)) {
    +          byte out = (byte) longData;
    --- End diff --
   
    Should not convert to Long and then covert, it is overhead


---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user gvramana commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2417#discussion_r200938152
 
    --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java ---
    @@ -53,11 +60,75 @@ public int fillVector(int[] filteredRowId, ColumnVectorInfo[] vectorInfo, int ch
         throw new UnsupportedOperationException("internal error");
       }
     
    -  @Override
    -  public byte[] getChunkData(int rowId) {
    -    return columnPage.getBytes(rowId);
    +  @Override public byte[] getChunkData(int rowId) {
    +    ColumnType columnType = columnPage.getColumnSpec().getColumnType();
    +    DataType srcDataType = columnPage.getColumnSpec().getSchemaDataType();
    +    DataType targetDataType = columnPage.getDataType();
    +    if (columnPage.getNullBits().get(rowId)) {
    +      // if this row is null, return default null represent in byte array
    +      return CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY;
    +    }
    +    if ((columnType == ColumnType.COMPLEX_PRIMITIVE) && this.isAdaptiveComplexPrimitive()) {
    +      if (srcDataType == DataTypes.DOUBLE || srcDataType == DataTypes.FLOAT) {
    +        double doubleData = columnPage.getDouble(rowId);
    +        if (srcDataType == DataTypes.FLOAT) {
    +          float out = (float) doubleData;
    +          return ByteUtil.toBytes(out);
    +        } else {
    +          return ByteUtil.toBytes(doubleData);
    +        }
    +      } else if (DataTypes.isDecimal(srcDataType)) {
    +        throw new RuntimeException("unsupported type: " + srcDataType);
    +      } else if ((srcDataType == DataTypes.BYTE) ||
    +          (srcDataType == DataTypes.BOOLEAN) ||
    +          (srcDataType == DataTypes.SHORT) ||
    +          (srcDataType == DataTypes.SHORT_INT) ||
    +          (srcDataType == DataTypes.INT) ||
    +          (srcDataType == DataTypes.LONG) ||
    +          (srcDataType == DataTypes.TIMESTAMP)) {
    +        long longData = columnPage.getLong(rowId);
    +        if ((srcDataType == DataTypes.BYTE)) {
    +          byte out = (byte) longData;
    --- End diff --
   
    Use Switch instead of if statement


---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user gvramana commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2417#discussion_r200962872
 
    --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java ---
    @@ -53,11 +60,75 @@ public int fillVector(int[] filteredRowId, ColumnVectorInfo[] vectorInfo, int ch
         throw new UnsupportedOperationException("internal error");
       }
     
    -  @Override
    -  public byte[] getChunkData(int rowId) {
    -    return columnPage.getBytes(rowId);
    +  @Override public byte[] getChunkData(int rowId) {
    +    ColumnType columnType = columnPage.getColumnSpec().getColumnType();
    +    DataType srcDataType = columnPage.getColumnSpec().getSchemaDataType();
    +    DataType targetDataType = columnPage.getDataType();
    +    if (columnPage.getNullBits().get(rowId)) {
    +      // if this row is null, return default null represent in byte array
    +      return CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY;
    +    }
    +    if ((columnType == ColumnType.COMPLEX_PRIMITIVE) && this.isAdaptiveComplexPrimitive()) {
    +      if (srcDataType == DataTypes.DOUBLE || srcDataType == DataTypes.FLOAT) {
    +        double doubleData = columnPage.getDouble(rowId);
    +        if (srcDataType == DataTypes.FLOAT) {
    +          float out = (float) doubleData;
    +          return ByteUtil.toBytes(out);
    +        } else {
    +          return ByteUtil.toBytes(doubleData);
    +        }
    +      } else if (DataTypes.isDecimal(srcDataType)) {
    +        throw new RuntimeException("unsupported type: " + srcDataType);
    +      } else if ((srcDataType == DataTypes.BYTE) ||
    +          (srcDataType == DataTypes.BOOLEAN) ||
    +          (srcDataType == DataTypes.SHORT) ||
    +          (srcDataType == DataTypes.SHORT_INT) ||
    +          (srcDataType == DataTypes.INT) ||
    +          (srcDataType == DataTypes.LONG) ||
    +          (srcDataType == DataTypes.TIMESTAMP)) {
    +        long longData = columnPage.getLong(rowId);
    +        if ((srcDataType == DataTypes.BYTE)) {
    +          byte out = (byte) longData;
    +          return ByteUtil.toBytes(out);
    +        } else if (srcDataType == DataTypes.BOOLEAN) {
    +          byte out = (byte) longData;
    +          return ByteUtil.toBytes(ByteUtil.toBoolean(out));
    +        } else if (srcDataType == DataTypes.SHORT) {
    +          short out = (short) longData;
    +          return ByteUtil.toBytes(out);
    +        } else if (srcDataType == DataTypes.SHORT_INT) {
    +          int out = (int) longData;
    +          return ByteUtil.toBytes(out);
    +        } else if (srcDataType == DataTypes.INT) {
    +          int out = (int) longData;
    +          return ByteUtil.toBytes(out);
    +        } else {
    +          // timestamp and long
    +          return ByteUtil.toBytes(longData);
    +        }
    +      } else if ((targetDataType == DataTypes.STRING) ||
    +          (targetDataType == DataTypes.VARCHAR) ||
    +          (targetDataType == DataTypes.BYTE_ARRAY)) {
    +        return columnPage.getBytes(rowId);
    +      } else {
    +        throw new RuntimeException("unsupported type: " + targetDataType);
    +      }
    +    } else if ((columnType == ColumnType.COMPLEX_PRIMITIVE) && !this.isAdaptiveComplexPrimitive()) {
    +      if ((srcDataType == DataTypes.BYTE) || (srcDataType == DataTypes.BOOLEAN)) {
    +        byte[] out = new byte[1];
    +        out[0] = (columnPage.getByte(rowId));
    +        return out;
    +      } else if (srcDataType == DataTypes.BYTE_ARRAY) {
    +        return columnPage.getBytes(rowId);
    +      } else {
    +        throw new RuntimeException("unsupported type: " + targetDataType);
    +      }
    +    } else {
    +      return columnPage.getBytes(rowId);
    --- End diff --
   
    getBytes should not be default , it should throw exception, unsupported datatype


---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user gvramana commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2417#discussion_r200964076
 
    --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java ---
    @@ -141,7 +141,7 @@ private static ColumnPage createFixLengthByteArrayPage(TableSpec.ColumnSpec colu
             throw new RuntimeException(e);
           }
         } else {
    -      return new SafeFixLengthColumnPage(columnSpec, dataType, pageSize, eachValueSize);
    +      return new SafeFixLengthColumnPage(columnSpec, dataType, pageSize);
    --- End diff --
   
    This should not be removed.


---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user gvramana commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2417#discussion_r200967561
 
    --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/ComplexColumnPage.java ---
    @@ -63,25 +66,42 @@ public ComplexColumnPage(List<ColumnType> complexColumnType) {
        * below method will be used to initlize the column page of complex type
        * @param columnToDictMap
        * dictionary map
    -   * @param columnNames
    -   * list of columns
        * @param pageSize
        * number of records
        * @throws MemoryException
        * if memory is not sufficient
        */
    -  public void initialize(Map<String, LocalDictionaryGenerator> columnToDictMap,
    -      List<String> columnNames, int pageSize) throws MemoryException {
    +  public void initialize(Map<String, LocalDictionaryGenerator> columnToDictMap, int pageSize)
    +      throws MemoryException {
    +    DataType dataType;
         for (int i = 0; i < this.columnPages.length; i++) {
    -      LocalDictionaryGenerator localDictionaryGenerator = columnToDictMap.get(columnNames.get(i));
    +      ComplexColumnInfo complexColumnInfo = complexColumnInfoList.get(i);
    +      LocalDictionaryGenerator localDictionaryGenerator =
    +          columnToDictMap.get(complexColumnInfo.getColumnNames());
           if (null == localDictionaryGenerator) {
    -        TableSpec.ColumnSpec spec = TableSpec.ColumnSpec
    -            .newInstance(columnNames.get(i), DataTypes.BYTE_ARRAY, complexColumnType.get(i));
    -        this.columnPages[i] = ColumnPage.newPage(spec, DataTypes.BYTE_ARRAY, pageSize);
    -        this.columnPages[i].setStatsCollector(new DummyStatsCollector());
    +        dataType = complexColumnInfo.getColumnDataTypes();
    +        if ((complexColumnInfo.isNoDictionary() && !((DataTypes.isStructType(dataType) || DataTypes
    +            .isArrayType(dataType) || (dataType == DataTypes.STRING) || (dataType
    +            == DataTypes.VARCHAR) || (dataType == DataTypes.DATE) || DataTypes
    +            .isDecimal(dataType))))) {
    +          TableSpec.ColumnSpec spec = TableSpec.ColumnSpec
    +              .newInstance(complexColumnInfo.getColumnNames(), dataType,
    +                  complexColumnInfo.getComplexColumnType());
    +          // no dictionary primitive types need adaptive encoding,
    +          // hence store as object instead of byte array
    +          this.columnPages[i] = ColumnPage.newPage(spec, dataType, pageSize);
    +          this.columnPages[i].setStatsCollector(PrimitivePageStatsCollector.newInstance(dataType));
    +        } else {
    +          TableSpec.ColumnSpec spec = TableSpec.ColumnSpec
    --- End diff --
   
    remove duplicate logic


---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user gvramana commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2417#discussion_r200984983
 
    --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoder.java ---
    @@ -147,10 +160,48 @@ protected void fillLegacyFields(DataChunk2 dataChunk)
     
       public static EncodedColumnPage encodedColumn(ColumnPage page)
           throws IOException, MemoryException {
    -    ColumnPageEncoder encoder = new DirectCompressCodec(DataTypes.BYTE_ARRAY).createEncoder(null);
    -    return encoder.encode(page);
    +    ColumnPageEncoder pageEncoder = createCodecForDimension(page);
    +    if (pageEncoder == null) {
    +      ColumnPageEncoder encoder = new DirectCompressCodec(DataTypes.BYTE_ARRAY).createEncoder(null);
    +      return encoder.encode(page);
    +    } else {
    +      LOGGER.info("Encoder result ---> Source data type: " + pageEncoder.getEncoderMeta(page)
    --- End diff --
   
    move to EncodeFactory


---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user gvramana commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2417#discussion_r200985232
 
    --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoder.java ---
    @@ -147,10 +160,48 @@ protected void fillLegacyFields(DataChunk2 dataChunk)
     
       public static EncodedColumnPage encodedColumn(ColumnPage page)
           throws IOException, MemoryException {
    -    ColumnPageEncoder encoder = new DirectCompressCodec(DataTypes.BYTE_ARRAY).createEncoder(null);
    -    return encoder.encode(page);
    +    ColumnPageEncoder pageEncoder = createCodecForDimension(page);
    --- End diff --
   
    FallbackEncodedColumnPage is also using complex type so child column types loop logic not present.


---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user gvramana commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2417#discussion_r200985673
 
    --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoder.java ---
    @@ -147,10 +160,48 @@ protected void fillLegacyFields(DataChunk2 dataChunk)
     
       public static EncodedColumnPage encodedColumn(ColumnPage page)
           throws IOException, MemoryException {
    -    ColumnPageEncoder encoder = new DirectCompressCodec(DataTypes.BYTE_ARRAY).createEncoder(null);
    -    return encoder.encode(page);
    +    ColumnPageEncoder pageEncoder = createCodecForDimension(page);
    +    if (pageEncoder == null) {
    +      ColumnPageEncoder encoder = new DirectCompressCodec(DataTypes.BYTE_ARRAY).createEncoder(null);
    +      return encoder.encode(page);
    +    } else {
    +      LOGGER.info("Encoder result ---> Source data type: " + pageEncoder.getEncoderMeta(page)
    +          .getColumnSpec().getSchemaDataType() + " Destination data type: " + pageEncoder
    +          .getEncoderMeta(page).getStoreDataType() + " for the column: " + pageEncoder
    +          .getEncoderMeta(page).getColumnSpec().getFieldName());
    +      // TODO: remove the Sout after testing,
    +      // currently added as executor Info logs doesn't come in IDE
    +      System.out.println("Encoder result ---> Source data type: " + pageEncoder.getEncoderMeta(page)
    +          .getColumnSpec().getSchemaDataType() + " Destination data type: " + pageEncoder
    +          .getEncoderMeta(page).getStoreDataType() + " for the column: " + pageEncoder
    +          .getEncoderMeta(page).getColumnSpec().getFieldName());
    +      return pageEncoder.encode(page);
    +    }
       }
     
    +  private static ColumnPageEncoder createCodecForDimension(ColumnPage inputPage) {
    +    TableSpec.ColumnSpec columnSpec = inputPage.getColumnSpec();
    --- End diff --
   
    move to EncodeFactory


---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user gvramana commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2417#discussion_r200986348
 
    --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoder.java ---
    @@ -147,10 +160,48 @@ protected void fillLegacyFields(DataChunk2 dataChunk)
     
       public static EncodedColumnPage encodedColumn(ColumnPage page)
           throws IOException, MemoryException {
    -    ColumnPageEncoder encoder = new DirectCompressCodec(DataTypes.BYTE_ARRAY).createEncoder(null);
    -    return encoder.encode(page);
    +    ColumnPageEncoder pageEncoder = createCodecForDimension(page);
    +    if (pageEncoder == null) {
    +      ColumnPageEncoder encoder = new DirectCompressCodec(DataTypes.BYTE_ARRAY).createEncoder(null);
    +      return encoder.encode(page);
    +    } else {
    +      LOGGER.info("Encoder result ---> Source data type: " + pageEncoder.getEncoderMeta(page)
    +          .getColumnSpec().getSchemaDataType() + " Destination data type: " + pageEncoder
    +          .getEncoderMeta(page).getStoreDataType() + " for the column: " + pageEncoder
    +          .getEncoderMeta(page).getColumnSpec().getFieldName());
    +      // TODO: remove the Sout after testing,
    +      // currently added as executor Info logs doesn't come in IDE
    +      System.out.println("Encoder result ---> Source data type: " + pageEncoder.getEncoderMeta(page)
    +          .getColumnSpec().getSchemaDataType() + " Destination data type: " + pageEncoder
    +          .getEncoderMeta(page).getStoreDataType() + " for the column: " + pageEncoder
    +          .getEncoderMeta(page).getColumnSpec().getFieldName());
    +      return pageEncoder.encode(page);
    +    }
       }
     
    +  private static ColumnPageEncoder createCodecForDimension(ColumnPage inputPage) {
    +    TableSpec.ColumnSpec columnSpec = inputPage.getColumnSpec();
    +    if (columnSpec.getColumnType() == ColumnType.COMPLEX_PRIMITIVE) {
    +      if (inputPage.getDataType() == DataTypes.BYTE_ARRAY
    +          || inputPage.getDataType() == DataTypes.STRING) {
    +        // use legacy encoder
    +        return null;
    +      } else if ((inputPage.getDataType() == DataTypes.BYTE) || (inputPage.getDataType()
    +          == DataTypes.SHORT) || (inputPage.getDataType() == DataTypes.INT) || (
    +          inputPage.getDataType() == DataTypes.LONG)) {
    +        return selectCodecByAlgorithmForIntegral(inputPage.getStatistics(), true)
    +            .createEncoder(null);
    +      } else if ((inputPage.getDataType() == DataTypes.FLOAT) || (inputPage.getDataType()
    +          == DataTypes.DOUBLE)) {
    +        return selectCodecByAlgorithmForFloating(inputPage.getStatistics(), true)
    +            .createEncoder(null);
    +      }
    +    }
    +    // use legacy encoder
    +    return null;
    --- End diff --
   
    legacy encoder  logic should also be here


---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user gvramana commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2417#discussion_r200989677
 
    --- Diff: core/src/main/java/org/apache/carbondata/core/scan/complextypes/PrimitiveQueryType.java ---
    @@ -93,9 +93,9 @@ public PrimitiveQueryType(String name, String parentname, int blockIndex,
         return 1;
       }
     
    -  @Override public void parseBlocksAndReturnComplexColumnByteArray(
    -      DimensionRawColumnChunk[] rawColumnChunks, int rowNumber,
    -      int pageNumber, DataOutputStream dataOutputStream) throws IOException {
    +  @Override
    +  public void parseBlocksAndReturnComplexColumnByteArray(DimensionRawColumnChunk[] rawColumnChunks,
    --- End diff --
   
    No change revert changes


---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user gvramana commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2417#discussion_r200989973
 
    --- Diff: core/src/main/java/org/apache/carbondata/core/scan/executor/util/QueryUtil.java ---
    @@ -870,4 +873,23 @@ private static void getChildDimensionOrdinal(CarbonDimension queryDimensions,
           }
         }
       }
    +
    +  /**
    --- End diff --
   
    Move to right location, not in query util


---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user gvramana commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2417#discussion_r200991672
 
    --- Diff: integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestAdaptiveEncodingForNullValues.scala ---
    @@ -0,0 +1,168 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *    http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.carbondata.spark.testsuite.dataload
    +
    +import scala.collection.mutable
    +
    +import org.apache.spark.sql.Row
    +import org.apache.spark.sql.test.util.QueryTest
    +import org.scalatest.BeforeAndAfterAll
    +
    +import org.apache.carbondata.core.constants.CarbonCommonConstants
    +import org.apache.carbondata.core.util.CarbonProperties
    +
    +/**
    + * Test class of Adaptive Encoding UnSafe Column Page with Null values
    + *
    + */
    +
    +class TestAdaptiveEncodingForNullValues
    +  extends QueryTest with BeforeAndAfterAll {
    +
    +  override def beforeAll(): Unit = {
    +    sql("DROP TABLE IF EXISTS adaptive")
    +    CarbonProperties.getInstance()
    +      .addProperty(CarbonCommonConstants.ENABLE_UNSAFE_COLUMN_PAGE,
    +        "true")
    +  }
    +
    +  override def afterAll(): Unit = {
    +    sql("DROP TABLE IF EXISTS adaptive")
    +    CarbonProperties.getInstance()
    +      .addProperty(CarbonCommonConstants.ENABLE_UNSAFE_COLUMN_PAGE,
    +        "true")
    +  }
    +
    +  test("test INT with struct and array, Encoding INT-->BYTE") {
    +    sql("Drop table if exists adaptive")
    +    sql(
    +      "create table adaptive(roll int, student struct<id:int,name:string,marks:array<int>>) " +
    +      "stored by 'carbondata'")
    +    sql("insert into adaptive values(1,'null$abc$null:null:null')")
    +    checkAnswer(sql("select * from adaptive"),
    +      Seq(Row(1, Row(null, "abc", mutable.WrappedArray.make(Array(null, null, null))))))
    +  }
    +
    +
    +  test("test SMALLINT with struct and array SMALLINT --> BYTE") {
    +    sql("Drop table if exists adaptive")
    +    sql(
    +      "create table adaptive(roll int, student struct<id:smallint,name:string," +
    +      "marks:array<smallint>>) stored by 'carbondata'")
    +    sql("insert into adaptive values(1,'null$abc$null:null:null')")
    +    checkAnswer(sql("select * from adaptive"),
    +      Seq(Row(1, Row(null, "abc", mutable.WrappedArray.make(Array(null, null, null))))))
    +  }
    +
    +
    +  test("test BigInt with struct and array BIGINT --> BYTE") {
    +    sql("Drop table if exists adaptive")
    +    sql(
    +      "create table adaptive(roll int, student struct<id:bigint,name:string," +
    +      "marks:array<bigint>>) stored by 'carbondata'")
    +    sql("insert into adaptive values(1,'null$abc$null:null:null')")
    +    checkAnswer(sql("select * from adaptive"),
    +      Seq(Row(1, Row(null, "abc", mutable.WrappedArray.make(Array(null, null, null))))))
    +  }
    +
    +  test("test Double with Struct and Array DOUBLE --> BYTE") {
    +    sql("Drop table if exists adaptive")
    +    sql(
    +      "create table adaptive(roll int, student struct<id:double,name:string," +
    +      "marks:array<double>>) stored by 'carbondata'")
    +    sql("insert into adaptive values(1,'null$abc$null:null:null')")
    +    checkAnswer(sql("select * from adaptive"),
    +      Seq(Row(1, Row(null, "abc", mutable.WrappedArray.make(Array(null, null, null))))))
    +  }
    +
    +  test("test Decimal with Struct") {
    +    sql("Drop table if exists adaptive")
    +    sql(
    +      "create table adaptive(roll int, student struct<id:decimal(3,2),name:string," +
    +      "marks:array<decimal>>) stored by " +
    +      "'carbondata'")
    +    sql("insert into adaptive values(1,'null$abc$null:null:null')")
    +    checkAnswer(sql("select * from adaptive"),
    +      Seq(Row(1, Row(null, "abc", mutable.WrappedArray.make(Array(null, null, null))))))
    +  }
    +
    +  test("test Timestamp with Struct") {
    +    sql("Drop table if exists adaptive")
    +    CarbonProperties.getInstance()
    +      .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy/MM/dd")
    +    sql(
    +      "create table adaptive(roll int, student struct<id:timestamp,name:string>) stored by " +
    +      "'carbondata'")
    +    sql("insert into adaptive values(1,'null$abc')")
    +    checkAnswer(sql("select * from adaptive"),
    +      Seq(Row(1, Row(null, "abc"))))
    +  }
    +
    +  test("test Timestamp with Array") {
    +    sql("Drop table if exists adaptive")
    +    CarbonProperties.getInstance()
    +      .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy/MM/dd")
    +    sql(
    +      "create table adaptive(roll int, student struct<name:string," +
    +      "marks:array<timestamp>>) stored by 'carbondata'")
    +    sql("insert into adaptive values(1,'abc$null:null:null')")
    +    checkAnswer(sql("select * from adaptive"),
    +      Seq(Row(1, Row("abc", mutable.WrappedArray.make(Array(null, null, null))))))
    +  }
    +
    +  test("test DATE with Array") {
    --- End diff --
   
    Write logic check it column is encoded as adaptive or not


---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user gvramana commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2417#discussion_r200992646
 
    --- Diff: integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestAdaptiveEncodingUnsafeColumnPageForComplexDataType.scala ---
    @@ -0,0 +1,588 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *    http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.carbondata.spark.testsuite.dataload
    +
    +import java.io.{File, PrintWriter}
    +import java.sql.Timestamp
    +
    +import scala.collection.mutable
    +import scala.util.Random
    +
    +import org.apache.spark.sql.Row
    +import org.apache.spark.sql.test.util.QueryTest
    +import org.scalatest.BeforeAndAfterAll
    +
    +import org.apache.carbondata.core.constants.CarbonCommonConstants
    +import org.apache.carbondata.core.util.CarbonProperties
    +
    +/**
    + * Test class of Adaptive Encoding UnSafe Column Page with Complex Data type
    + *
    + */
    +
    +class TestAdaptiveEncodingUnsafeColumnPageForComplexDataType
    +  extends QueryTest with BeforeAndAfterAll {
    +
    +  override def beforeAll(): Unit = {
    +
    +    new File(CarbonProperties.getInstance().getSystemFolderLocation).delete()
    +    sql("DROP TABLE IF EXISTS adaptive")
    +    CarbonProperties.getInstance()
    +      .addProperty(CarbonCommonConstants.ENABLE_UNSAFE_COLUMN_PAGE,
    +        "true")
    +  }
    +
    +  override def afterAll(): Unit = {
    +    sql("DROP TABLE IF EXISTS adaptive")
    +    CarbonProperties.getInstance()
    +      .addProperty(CarbonCommonConstants.ENABLE_UNSAFE_COLUMN_PAGE,
    +        "true")
    +  }
    +
    +  test("test INT with struct and array, Encoding INT-->BYTE") {
    --- End diff --
   
    1) Why testcases change for unsafe safe, derive once class from other and pass parameters
    2) Write function to reduce duplication of logic among testcases


---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user sounakr commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2417#discussion_r201073104
 
    --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java ---
    @@ -17,32 +17,39 @@
     
     package org.apache.carbondata.core.datastore.chunk.store;
     
    +
    +import org.apache.carbondata.core.constants.CarbonCommonConstants;
    +import org.apache.carbondata.core.datastore.ColumnType;
     import org.apache.carbondata.core.datastore.chunk.DimensionColumnPage;
     import org.apache.carbondata.core.datastore.page.ColumnPage;
    +import org.apache.carbondata.core.metadata.datatype.DataType;
    +import org.apache.carbondata.core.metadata.datatype.DataTypes;
     import org.apache.carbondata.core.scan.executor.infos.KeyStructureInfo;
     import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo;
    +import org.apache.carbondata.core.util.ByteUtil;
     
     public class ColumnPageWrapper implements DimensionColumnPage {
     
       private ColumnPage columnPage;
     
    -  public ColumnPageWrapper(ColumnPage columnPage) {
    +  private boolean isAdaptiveComplexPrimitivePage;
    +
    +  public ColumnPageWrapper(ColumnPage columnPage, boolean isAdaptiveComplexPrimitivePage) {
         this.columnPage = columnPage;
    +    this.isAdaptiveComplexPrimitivePage = isAdaptiveComplexPrimitivePage;
       }
     
       @Override
       public int fillRawData(int rowId, int offset, byte[] data, KeyStructureInfo restructuringInfo) {
         throw new UnsupportedOperationException("internal error");
       }
     
    -  @Override
    -  public int fillSurrogateKey(int rowId, int chunkIndex, int[] outputSurrogateKey,
    +  @Override public int fillSurrogateKey(int rowId, int chunkIndex, int[] outputSurrogateKey,
    --- End diff --
   
    Done


---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user sounakr commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2417#discussion_r201197596
 
    --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java ---
    @@ -53,11 +60,75 @@ public int fillVector(int[] filteredRowId, ColumnVectorInfo[] vectorInfo, int ch
         throw new UnsupportedOperationException("internal error");
       }
     
    -  @Override
    -  public byte[] getChunkData(int rowId) {
    -    return columnPage.getBytes(rowId);
    +  @Override public byte[] getChunkData(int rowId) {
    --- End diff --
   
    Currently, Struct and Array types are stored as Byte Array. But as they are storing in LV format it can be modified and fit in V format. This refactoring will be done as part of jira CARBONDATA-2605


---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user sounakr commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2417#discussion_r201197871
 
    --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java ---
    @@ -53,11 +60,75 @@ public int fillVector(int[] filteredRowId, ColumnVectorInfo[] vectorInfo, int ch
         throw new UnsupportedOperationException("internal error");
       }
     
    -  @Override
    -  public byte[] getChunkData(int rowId) {
    -    return columnPage.getBytes(rowId);
    +  @Override public byte[] getChunkData(int rowId) {
    +    ColumnType columnType = columnPage.getColumnSpec().getColumnType();
    +    DataType srcDataType = columnPage.getColumnSpec().getSchemaDataType();
    +    DataType targetDataType = columnPage.getDataType();
    +    if (columnPage.getNullBits().get(rowId)) {
    +      // if this row is null, return default null represent in byte array
    +      return CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY;
    +    }
    +    if ((columnType == ColumnType.COMPLEX_PRIMITIVE) && this.isAdaptiveComplexPrimitive()) {
    +      if (srcDataType == DataTypes.DOUBLE || srcDataType == DataTypes.FLOAT) {
    +        double doubleData = columnPage.getDouble(rowId);
    +        if (srcDataType == DataTypes.FLOAT) {
    +          float out = (float) doubleData;
    +          return ByteUtil.toBytes(out);
    +        } else {
    +          return ByteUtil.toBytes(doubleData);
    +        }
    +      } else if (DataTypes.isDecimal(srcDataType)) {
    +        throw new RuntimeException("unsupported type: " + srcDataType);
    --- End diff --
   
    Decimal type is supported inside complex. But currently, it avoids the Adaptive Encoding path and goes as Byte Array Format. This will be modified in CARBONDATA-2605.


---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user sounakr commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2417#discussion_r201198855
 
    --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java ---
    @@ -53,11 +60,75 @@ public int fillVector(int[] filteredRowId, ColumnVectorInfo[] vectorInfo, int ch
         throw new UnsupportedOperationException("internal error");
       }
     
    -  @Override
    -  public byte[] getChunkData(int rowId) {
    -    return columnPage.getBytes(rowId);
    +  @Override public byte[] getChunkData(int rowId) {
    +    ColumnType columnType = columnPage.getColumnSpec().getColumnType();
    +    DataType srcDataType = columnPage.getColumnSpec().getSchemaDataType();
    +    DataType targetDataType = columnPage.getDataType();
    +    if (columnPage.getNullBits().get(rowId)) {
    +      // if this row is null, return default null represent in byte array
    +      return CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY;
    +    }
    +    if ((columnType == ColumnType.COMPLEX_PRIMITIVE) && this.isAdaptiveComplexPrimitive()) {
    +      if (srcDataType == DataTypes.DOUBLE || srcDataType == DataTypes.FLOAT) {
    +        double doubleData = columnPage.getDouble(rowId);
    +        if (srcDataType == DataTypes.FLOAT) {
    +          float out = (float) doubleData;
    +          return ByteUtil.toBytes(out);
    +        } else {
    +          return ByteUtil.toBytes(doubleData);
    +        }
    +      } else if (DataTypes.isDecimal(srcDataType)) {
    +        throw new RuntimeException("unsupported type: " + srcDataType);
    +      } else if ((srcDataType == DataTypes.BYTE) ||
    +          (srcDataType == DataTypes.BOOLEAN) ||
    +          (srcDataType == DataTypes.SHORT) ||
    +          (srcDataType == DataTypes.SHORT_INT) ||
    +          (srcDataType == DataTypes.INT) ||
    +          (srcDataType == DataTypes.LONG) ||
    +          (srcDataType == DataTypes.TIMESTAMP)) {
    +        long longData = columnPage.getLong(rowId);
    +        if ((srcDataType == DataTypes.BYTE)) {
    +          byte out = (byte) longData;
    --- End diff --
   
    Placing a switch statement is not a small change and will impact many other files and classes. As this is not a functional issue postposing this to CARBONDATA-2713.


---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user sounakr commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2417#discussion_r201199734
 
    --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoder.java ---
    @@ -147,10 +160,48 @@ protected void fillLegacyFields(DataChunk2 dataChunk)
     
       public static EncodedColumnPage encodedColumn(ColumnPage page)
           throws IOException, MemoryException {
    -    ColumnPageEncoder encoder = new DirectCompressCodec(DataTypes.BYTE_ARRAY).createEncoder(null);
    -    return encoder.encode(page);
    +    ColumnPageEncoder pageEncoder = createCodecForDimension(page);
    +    if (pageEncoder == null) {
    +      ColumnPageEncoder encoder = new DirectCompressCodec(DataTypes.BYTE_ARRAY).createEncoder(null);
    +      return encoder.encode(page);
    +    } else {
    +      LOGGER.info("Encoder result ---> Source data type: " + pageEncoder.getEncoderMeta(page)
    --- End diff --
   
    Done


---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user sounakr commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2417#discussion_r201199788
 
    --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java ---
    @@ -141,7 +141,7 @@ private static ColumnPage createFixLengthByteArrayPage(TableSpec.ColumnSpec colu
             throw new RuntimeException(e);
           }
         } else {
    -      return new SafeFixLengthColumnPage(columnSpec, dataType, pageSize, eachValueSize);
    +      return new SafeFixLengthColumnPage(columnSpec, dataType, pageSize);
    --- End diff --
   
    Done


---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user sounakr commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2417#discussion_r201221334
 
    --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java ---
    @@ -53,11 +60,75 @@ public int fillVector(int[] filteredRowId, ColumnVectorInfo[] vectorInfo, int ch
         throw new UnsupportedOperationException("internal error");
       }
     
    -  @Override
    -  public byte[] getChunkData(int rowId) {
    -    return columnPage.getBytes(rowId);
    +  @Override public byte[] getChunkData(int rowId) {
    +    ColumnType columnType = columnPage.getColumnSpec().getColumnType();
    +    DataType srcDataType = columnPage.getColumnSpec().getSchemaDataType();
    +    DataType targetDataType = columnPage.getDataType();
    +    if (columnPage.getNullBits().get(rowId)) {
    +      // if this row is null, return default null represent in byte array
    +      return CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY;
    +    }
    +    if ((columnType == ColumnType.COMPLEX_PRIMITIVE) && this.isAdaptiveComplexPrimitive()) {
    +      if (srcDataType == DataTypes.DOUBLE || srcDataType == DataTypes.FLOAT) {
    +        double doubleData = columnPage.getDouble(rowId);
    +        if (srcDataType == DataTypes.FLOAT) {
    +          float out = (float) doubleData;
    +          return ByteUtil.toBytes(out);
    +        } else {
    +          return ByteUtil.toBytes(doubleData);
    +        }
    +      } else if (DataTypes.isDecimal(srcDataType)) {
    +        throw new RuntimeException("unsupported type: " + srcDataType);
    +      } else if ((srcDataType == DataTypes.BYTE) ||
    +          (srcDataType == DataTypes.BOOLEAN) ||
    +          (srcDataType == DataTypes.SHORT) ||
    +          (srcDataType == DataTypes.SHORT_INT) ||
    +          (srcDataType == DataTypes.INT) ||
    +          (srcDataType == DataTypes.LONG) ||
    +          (srcDataType == DataTypes.TIMESTAMP)) {
    +        long longData = columnPage.getLong(rowId);
    +        if ((srcDataType == DataTypes.BYTE)) {
    +          byte out = (byte) longData;
    +          return ByteUtil.toBytes(out);
    +        } else if (srcDataType == DataTypes.BOOLEAN) {
    +          byte out = (byte) longData;
    +          return ByteUtil.toBytes(ByteUtil.toBoolean(out));
    +        } else if (srcDataType == DataTypes.SHORT) {
    +          short out = (short) longData;
    +          return ByteUtil.toBytes(out);
    +        } else if (srcDataType == DataTypes.SHORT_INT) {
    +          int out = (int) longData;
    +          return ByteUtil.toBytes(out);
    +        } else if (srcDataType == DataTypes.INT) {
    +          int out = (int) longData;
    +          return ByteUtil.toBytes(out);
    +        } else {
    +          // timestamp and long
    +          return ByteUtil.toBytes(longData);
    +        }
    +      } else if ((targetDataType == DataTypes.STRING) ||
    +          (targetDataType == DataTypes.VARCHAR) ||
    +          (targetDataType == DataTypes.BYTE_ARRAY)) {
    +        return columnPage.getBytes(rowId);
    +      } else {
    +        throw new RuntimeException("unsupported type: " + targetDataType);
    +      }
    +    } else if ((columnType == ColumnType.COMPLEX_PRIMITIVE) && !this.isAdaptiveComplexPrimitive()) {
    +      if ((srcDataType == DataTypes.BYTE) || (srcDataType == DataTypes.BOOLEAN)) {
    +        byte[] out = new byte[1];
    +        out[0] = (columnPage.getByte(rowId));
    +        return out;
    +      } else if (srcDataType == DataTypes.BYTE_ARRAY) {
    +        return columnPage.getBytes(rowId);
    +      } else {
    +        throw new RuntimeException("unsupported type: " + targetDataType);
    +      }
    +    } else {
    +      return columnPage.getBytes(rowId);
    --- End diff --
   
    This getBytes is only for Complex STRUCT and ARRAY type which are in Byte Array Format. Later when CARBONDATA-2713 turns STRUCT and ARRAY to V-format then this check will throw exception


---
1 ... 5678910