[GitHub] carbondata pull request #2412: [CARBONDATA-2656] Presto vector stream reader...

classic Classic list List threaded Threaded
67 messages Options
1234
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata issue #2412: [CARBONDATA-2656] Presto vector stream readers perfo...

qiuchenjian-2
Github user sv71294 commented on the issue:

    https://github.com/apache/carbondata/pull/2412
 
    retest this please


---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata issue #2412: [CARBONDATA-2656] Presto vector stream readers perfo...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:

    https://github.com/apache/carbondata/pull/2412
 
    Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/6594/



---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata issue #2412: [CARBONDATA-2656] Presto vector stream readers perfo...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:

    https://github.com/apache/carbondata/pull/2412
 
    Build Success with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/5417/



---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata issue #2412: [CARBONDATA-2656] Presto vector stream readers perfo...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:

    https://github.com/apache/carbondata/pull/2412
 
    Build Success with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/5427/



---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2412: [CARBONDATA-2656] Presto vector stream reader...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user sv71294 commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2412#discussion_r199063837
 
    --- Diff: integration/presto/src/main/java/org/apache/carbondata/presto/CarbonVectorBatch.java ---
    @@ -20,50 +20,81 @@
     import java.util.HashSet;
     import java.util.Set;
     
    +import org.apache.carbondata.core.cache.dictionary.Dictionary;
    +import org.apache.carbondata.core.metadata.datatype.DataType;
    +import org.apache.carbondata.core.metadata.datatype.DataTypes;
    +import org.apache.carbondata.core.metadata.datatype.DecimalType;
     import org.apache.carbondata.core.metadata.datatype.StructField;
     import org.apache.carbondata.core.scan.result.vector.impl.CarbonColumnVectorImpl;
    +import org.apache.carbondata.presto.readers.BooleanStreamReader;
    +import org.apache.carbondata.presto.readers.DecimalSliceStreamReader;
    +import org.apache.carbondata.presto.readers.DoubleStreamReader;
    +import org.apache.carbondata.presto.readers.IntegerStreamReader;
    +import org.apache.carbondata.presto.readers.LongStreamReader;
    +import org.apache.carbondata.presto.readers.ObjectStreamReader;
    +import org.apache.carbondata.presto.readers.ShortStreamReader;
    +import org.apache.carbondata.presto.readers.SliceStreamReader;
    +import org.apache.carbondata.presto.readers.TimestampStreamReader;
    +
    +import com.facebook.presto.spi.block.SliceArrayBlock;
     
     public class CarbonVectorBatch {
     
    -  private static final int DEFAULT_BATCH_SIZE =  4 * 1024;
    +  private static final int DEFAULT_BATCH_SIZE = 4 * 1024;
     
    -  private final StructField[] schema;
       private final int capacity;
    -  private int numRows;
       private final CarbonColumnVectorImpl[] columns;
    -
       // True if the row is filtered.
       private final boolean[] filteredRows;
    -
       // Column indices that cannot have null values.
       private final Set<Integer> nullFilteredColumns;
    -
    +  private int numRows;
       // Total number of rows that have been filtered.
       private int numRowsFiltered = 0;
     
    -
    -  private CarbonVectorBatch(StructField[] schema, int maxRows) {
    -    this.schema = schema;
    +  private CarbonVectorBatch(StructField[] schema, CarbonDictionaryDecodeReadSupport readSupport,
    --- End diff --
   
    added the dictionary read support for the stream readers


---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2412: [CARBONDATA-2656] Presto vector stream reader...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user sv71294 commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2412#discussion_r199066373
 
    --- Diff: integration/presto/src/main/java/org/apache/carbondata/presto/CarbonVectorBatch.java ---
    @@ -20,50 +20,81 @@
     import java.util.HashSet;
     import java.util.Set;
     
    +import org.apache.carbondata.core.cache.dictionary.Dictionary;
    +import org.apache.carbondata.core.metadata.datatype.DataType;
    +import org.apache.carbondata.core.metadata.datatype.DataTypes;
    +import org.apache.carbondata.core.metadata.datatype.DecimalType;
     import org.apache.carbondata.core.metadata.datatype.StructField;
     import org.apache.carbondata.core.scan.result.vector.impl.CarbonColumnVectorImpl;
    +import org.apache.carbondata.presto.readers.BooleanStreamReader;
    +import org.apache.carbondata.presto.readers.DecimalSliceStreamReader;
    +import org.apache.carbondata.presto.readers.DoubleStreamReader;
    +import org.apache.carbondata.presto.readers.IntegerStreamReader;
    +import org.apache.carbondata.presto.readers.LongStreamReader;
    +import org.apache.carbondata.presto.readers.ObjectStreamReader;
    +import org.apache.carbondata.presto.readers.ShortStreamReader;
    +import org.apache.carbondata.presto.readers.SliceStreamReader;
    +import org.apache.carbondata.presto.readers.TimestampStreamReader;
    +
    +import com.facebook.presto.spi.block.SliceArrayBlock;
     
     public class CarbonVectorBatch {
     
    -  private static final int DEFAULT_BATCH_SIZE =  4 * 1024;
    +  private static final int DEFAULT_BATCH_SIZE = 4 * 1024;
     
    -  private final StructField[] schema;
    --- End diff --
   
    no need to have this in class level, I have moved it in the constructor  


---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2412: [CARBONDATA-2656] Presto vector stream reader...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user sv71294 commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2412#discussion_r199066588
 
    --- Diff: integration/presto/src/main/java/org/apache/carbondata/presto/CarbonVectorBatch.java ---
    @@ -20,50 +20,81 @@
     import java.util.HashSet;
     import java.util.Set;
     
    +import org.apache.carbondata.core.cache.dictionary.Dictionary;
    +import org.apache.carbondata.core.metadata.datatype.DataType;
    +import org.apache.carbondata.core.metadata.datatype.DataTypes;
    +import org.apache.carbondata.core.metadata.datatype.DecimalType;
     import org.apache.carbondata.core.metadata.datatype.StructField;
     import org.apache.carbondata.core.scan.result.vector.impl.CarbonColumnVectorImpl;
    +import org.apache.carbondata.presto.readers.BooleanStreamReader;
    +import org.apache.carbondata.presto.readers.DecimalSliceStreamReader;
    +import org.apache.carbondata.presto.readers.DoubleStreamReader;
    +import org.apache.carbondata.presto.readers.IntegerStreamReader;
    +import org.apache.carbondata.presto.readers.LongStreamReader;
    +import org.apache.carbondata.presto.readers.ObjectStreamReader;
    +import org.apache.carbondata.presto.readers.ShortStreamReader;
    +import org.apache.carbondata.presto.readers.SliceStreamReader;
    +import org.apache.carbondata.presto.readers.TimestampStreamReader;
    +
    +import com.facebook.presto.spi.block.SliceArrayBlock;
     
     public class CarbonVectorBatch {
     
    -  private static final int DEFAULT_BATCH_SIZE =  4 * 1024;
    +  private static final int DEFAULT_BATCH_SIZE = 4 * 1024;
     
    -  private final StructField[] schema;
       private final int capacity;
    -  private int numRows;
       private final CarbonColumnVectorImpl[] columns;
    -
       // True if the row is filtered.
       private final boolean[] filteredRows;
    -
       // Column indices that cannot have null values.
       private final Set<Integer> nullFilteredColumns;
    -
    +  private int numRows;
       // Total number of rows that have been filtered.
       private int numRowsFiltered = 0;
     
    -
    -  private CarbonVectorBatch(StructField[] schema, int maxRows) {
    -    this.schema = schema;
    +  private CarbonVectorBatch(StructField[] schema, CarbonDictionaryDecodeReadSupport readSupport,
    +      int maxRows) {
         this.capacity = maxRows;
         this.columns = new CarbonColumnVectorImpl[schema.length];
         this.nullFilteredColumns = new HashSet<>();
         this.filteredRows = new boolean[maxRows];
    +    Dictionary[] dictionaries = readSupport.getDictionaries();
    +    DataType[] dataTypes = readSupport.getDataTypes();
     
         for (int i = 0; i < schema.length; ++i) {
    -      StructField field = schema[i];
    -      columns[i] = new CarbonColumnVectorImpl(maxRows, field.getDataType());
    +      columns[i] = createDirectStreamReader(maxRows, dataTypes[i], schema[i], dictionaries[i],
    +          readSupport.getSliceArrayBlock(i));
         }
    -
       }
     
    -
    -  public static CarbonVectorBatch allocate(StructField[] schema) {
    -    return new CarbonVectorBatch(schema, DEFAULT_BATCH_SIZE);
    +  public static CarbonVectorBatch allocate(StructField[] schema,
    --- End diff --
   
    added the dictionary read support for the stream readers


---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2412: [CARBONDATA-2656] Presto vector stream reader...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user sv71294 commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2412#discussion_r199066819
 
    --- Diff: integration/presto/src/main/java/org/apache/carbondata/presto/CarbonVectorBatch.java ---
    @@ -20,50 +20,81 @@
     import java.util.HashSet;
     import java.util.Set;
     
    +import org.apache.carbondata.core.cache.dictionary.Dictionary;
    +import org.apache.carbondata.core.metadata.datatype.DataType;
    +import org.apache.carbondata.core.metadata.datatype.DataTypes;
    +import org.apache.carbondata.core.metadata.datatype.DecimalType;
     import org.apache.carbondata.core.metadata.datatype.StructField;
     import org.apache.carbondata.core.scan.result.vector.impl.CarbonColumnVectorImpl;
    +import org.apache.carbondata.presto.readers.BooleanStreamReader;
    +import org.apache.carbondata.presto.readers.DecimalSliceStreamReader;
    +import org.apache.carbondata.presto.readers.DoubleStreamReader;
    +import org.apache.carbondata.presto.readers.IntegerStreamReader;
    +import org.apache.carbondata.presto.readers.LongStreamReader;
    +import org.apache.carbondata.presto.readers.ObjectStreamReader;
    +import org.apache.carbondata.presto.readers.ShortStreamReader;
    +import org.apache.carbondata.presto.readers.SliceStreamReader;
    +import org.apache.carbondata.presto.readers.TimestampStreamReader;
    +
    +import com.facebook.presto.spi.block.SliceArrayBlock;
     
     public class CarbonVectorBatch {
     
    -  private static final int DEFAULT_BATCH_SIZE =  4 * 1024;
    +  private static final int DEFAULT_BATCH_SIZE = 4 * 1024;
     
    -  private final StructField[] schema;
       private final int capacity;
    -  private int numRows;
       private final CarbonColumnVectorImpl[] columns;
    -
       // True if the row is filtered.
       private final boolean[] filteredRows;
    -
       // Column indices that cannot have null values.
       private final Set<Integer> nullFilteredColumns;
    -
    +  private int numRows;
       // Total number of rows that have been filtered.
       private int numRowsFiltered = 0;
     
    -
    -  private CarbonVectorBatch(StructField[] schema, int maxRows) {
    -    this.schema = schema;
    +  private CarbonVectorBatch(StructField[] schema, CarbonDictionaryDecodeReadSupport readSupport,
    +      int maxRows) {
         this.capacity = maxRows;
         this.columns = new CarbonColumnVectorImpl[schema.length];
         this.nullFilteredColumns = new HashSet<>();
         this.filteredRows = new boolean[maxRows];
    +    Dictionary[] dictionaries = readSupport.getDictionaries();
    +    DataType[] dataTypes = readSupport.getDataTypes();
     
         for (int i = 0; i < schema.length; ++i) {
    -      StructField field = schema[i];
    -      columns[i] = new CarbonColumnVectorImpl(maxRows, field.getDataType());
    +      columns[i] = createDirectStreamReader(maxRows, dataTypes[i], schema[i], dictionaries[i],
    +          readSupport.getSliceArrayBlock(i));
         }
    -
       }
     
    -
    -  public static CarbonVectorBatch allocate(StructField[] schema) {
    -    return new CarbonVectorBatch(schema, DEFAULT_BATCH_SIZE);
    +  public static CarbonVectorBatch allocate(StructField[] schema,
    +      CarbonDictionaryDecodeReadSupport readSupport) {
    +    return new CarbonVectorBatch(schema, readSupport, DEFAULT_BATCH_SIZE);
       }
     
    -  public static CarbonVectorBatch allocate(StructField[] schema,  int maxRows) {
    -    return new CarbonVectorBatch(schema, maxRows);
    +  private CarbonColumnVectorImpl createDirectStreamReader(int batchSize, DataType dataType,
    --- End diff --
   
    added the method to create updated streamReaders (which extends CarbonColumnVectorImpl) instead of CarbonColumnVectorImpl


---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2412: [CARBONDATA-2656] Presto vector stream reader...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user sv71294 commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2412#discussion_r199067368
 
    --- Diff: integration/presto/src/main/java/org/apache/carbondata/presto/CarbondataPageSource.java ---
    @@ -47,25 +44,18 @@
     
       private static final LogService logger =
           LogServiceFactory.getLogService(CarbondataPageSource.class.getName());
    -  private final List<Type> types;
    +  private List<ColumnHandle> columnHandles;
       private boolean closed;
       private PrestoCarbonVectorizedRecordReader vectorReader;
    -  private CarbonDictionaryDecodeReadSupport<Object[]> readSupport;
    -  List<ColumnHandle> columnHandles;
       private long sizeOfData = 0;
    -  private final StreamReader[] readers ;
       private int batchId;
       private long nanoStart;
       private long nanoEnd;
     
    -  public CarbondataPageSource(CarbonDictionaryDecodeReadSupport readSupport,
    -      PrestoCarbonVectorizedRecordReader vectorizedRecordReader,
    -      List<ColumnHandle> columnHandles ) {
    +  CarbondataPageSource(PrestoCarbonVectorizedRecordReader vectorizedRecordReader,
    --- End diff --
   
    no need of creating streamReader and dictionary read support here, as they are moved to  CarbonVectorBatch


---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2412: [CARBONDATA-2656] Presto vector stream reader...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user sv71294 commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2412#discussion_r199067892
 
    --- Diff: integration/presto/src/main/java/org/apache/carbondata/presto/CarbondataPageSource.java ---
    @@ -108,22 +96,16 @@ public CarbondataPageSource(CarbonDictionaryDecodeReadSupport readSupport,
             return null;
           }
     
    -      Block[] blocks = new Block[types.size()];
    +      Block[] blocks = new Block[columnHandles.size()];
           for (int column = 0; column < blocks.length; column++) {
    -        Type type = types.get(column);
    --- End diff --
   
    readers are removed from this class, handled in CarbonVectorBatch


---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2412: [CARBONDATA-2656] Presto vector stream reader...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user sv71294 commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2412#discussion_r199068020
 
    --- Diff: integration/presto/src/main/java/org/apache/carbondata/presto/CarbondataPageSource.java ---
    @@ -166,61 +146,32 @@ protected void closeWithSuppression(Throwable throwable)
       /**
        * Lazy Block Implementation for the Carbondata
        */
    -  private final class CarbondataBlockLoader
    -      implements LazyBlockLoader<LazyBlock>
    -  {
    +  private final class CarbondataBlockLoader implements LazyBlockLoader<LazyBlock> {
         private final int expectedBatchId = batchId;
         private final int columnIndex;
    -    private final Type type;
         private boolean loaded;
     
    -    public CarbondataBlockLoader(int columnIndex, Type type)
    -    {
    +    CarbondataBlockLoader(int columnIndex) {
           this.columnIndex = columnIndex;
    -      this.type = requireNonNull(type, "type is null");
         }
     
    -    @Override
    -    public final void load(LazyBlock lazyBlock)
    -    {
    +    @Override public final void load(LazyBlock lazyBlock) {
           if (loaded) {
             return;
           }
           checkState(batchId == expectedBatchId);
           try {
    -        Block block = readers[columnIndex].readBlock(type);
    +        PrestoVectorBlockBuilder blockBuilder =
    +            (PrestoVectorBlockBuilder) vectorReader.getColumnarBatch().column(columnIndex);
    +        blockBuilder.setBatchSize(lazyBlock.getPositionCount());
    +        Block block = blockBuilder.buildBlock();
             sizeOfData += block.getSizeInBytes();
             lazyBlock.setBlock(block);
    -      }
    -      catch (IOException e) {
    +      } catch (Exception e) {
             throw new CarbonDataLoadingException("Error in Reading Data from Carbondata ", e);
           }
           loaded = true;
         }
    -
       }
     
    -
    -  /**
    -   * Create the Stream Reader for every column based on their type
    -   * This method will be initialized only once based on the types.
    -   *
    -   * @return
    -   */
    -  private StreamReader[] createStreamReaders( ) {
    --- End diff --
   
    removed from this class moved to CarbonVectorBatch


---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2412: [CARBONDATA-2656] Presto vector stream reader...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user sv71294 commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2412#discussion_r199068264
 
    --- Diff: integration/presto/src/main/java/org/apache/carbondata/presto/CarbondataPageSource.java ---
    @@ -166,61 +146,32 @@ protected void closeWithSuppression(Throwable throwable)
       /**
        * Lazy Block Implementation for the Carbondata
        */
    -  private final class CarbondataBlockLoader
    -      implements LazyBlockLoader<LazyBlock>
    -  {
    +  private final class CarbondataBlockLoader implements LazyBlockLoader<LazyBlock> {
         private final int expectedBatchId = batchId;
         private final int columnIndex;
    -    private final Type type;
         private boolean loaded;
     
    -    public CarbondataBlockLoader(int columnIndex, Type type)
    -    {
    +    CarbondataBlockLoader(int columnIndex) {
           this.columnIndex = columnIndex;
    -      this.type = requireNonNull(type, "type is null");
         }
     
    -    @Override
    -    public final void load(LazyBlock lazyBlock)
    -    {
    +    @Override public final void load(LazyBlock lazyBlock) {
           if (loaded) {
             return;
           }
           checkState(batchId == expectedBatchId);
           try {
    -        Block block = readers[columnIndex].readBlock(type);
    +        PrestoVectorBlockBuilder blockBuilder =
    --- End diff --
   
    getting presto block from the update streamReaders which extends CarbonColumnVectorImpl and implements PrestoVectorBlockBuilder


---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2412: [CARBONDATA-2656] Presto vector stream reader...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user sv71294 commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2412#discussion_r199068497
 
    --- Diff: integration/presto/src/main/java/org/apache/carbondata/presto/CarbondataPageSourceProvider.java ---
    @@ -103,7 +102,7 @@ private PrestoCarbonVectorizedRecordReader createReader(ConnectorSplit split,
           CarbonIterator iterator = queryExecutor.execute(queryModel);
           readSupport.initialize(queryModel.getProjectionColumns(), queryModel.getTable());
           PrestoCarbonVectorizedRecordReader reader = new PrestoCarbonVectorizedRecordReader(queryExecutor, queryModel,
    -          (AbstractDetailQueryResultIterator) iterator);
    +          (AbstractDetailQueryResultIterator) iterator, readSupport);
    --- End diff --
   
    added dictionary read support for streamReaders


---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2412: [CARBONDATA-2656] Presto vector stream reader...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user sv71294 commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2412#discussion_r199068654
 
    --- Diff: integration/presto/src/main/java/org/apache/carbondata/presto/PrestoCarbonVectorizedRecordReader.java ---
    @@ -80,11 +80,14 @@
     
       private long queryStartTime;
     
    +  private CarbonDictionaryDecodeReadSupport readSupport;
    +
       public PrestoCarbonVectorizedRecordReader(QueryExecutor queryExecutor, QueryModel queryModel,
    -      AbstractDetailQueryResultIterator iterator) {
    +      AbstractDetailQueryResultIterator iterator, CarbonDictionaryDecodeReadSupport readSupport) {
    --- End diff --
   
    added dictionary read support for streamReaders


---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2412: [CARBONDATA-2656] Presto vector stream reader...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user sv71294 commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2412#discussion_r199068669
 
    --- Diff: integration/presto/src/main/java/org/apache/carbondata/presto/PrestoCarbonVectorizedRecordReader.java ---
    @@ -212,7 +213,7 @@ private void initBatch() {
           }
         }
     
    -    columnarBatch = CarbonVectorBatch.allocate(fields);
    +    columnarBatch = CarbonVectorBatch.allocate(fields, readSupport);
    --- End diff --
   
    added dictionary read support for streamReaders


---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2412: [CARBONDATA-2656] Presto vector stream reader...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user sv71294 commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2412#discussion_r199068748
 
    --- Diff: integration/presto/src/main/java/org/apache/carbondata/presto/PrestoCarbonVectorizedRecordReader.java ---
    @@ -251,6 +251,9 @@ private boolean nextBatch() {
         return false;
       }
     
    +  public CarbonVectorBatch getColumnarBatch() {
    --- End diff --
   
    getter function for the columnar batch


---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2412: [CARBONDATA-2656] Presto vector stream reader...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user sv71294 commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2412#discussion_r199069051
 
    --- Diff: integration/presto/src/main/java/org/apache/carbondata/presto/readers/AbstractStreamReader.java ---
    @@ -1,66 +0,0 @@
    -/*
    --- End diff --
   
    no need this class now, as we have updated the interface and now streamReaders extends  CarbonColumnVectorImpl, all functionalities are captured within the reader


---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2412: [CARBONDATA-2656] Presto vector stream reader...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user sv71294 commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2412#discussion_r199069311
 
    --- Diff: integration/presto/src/main/java/org/apache/carbondata/presto/readers/BooleanStreamReader.java ---
    @@ -17,91 +17,64 @@
     
     package org.apache.carbondata.presto.readers;
     
    -import java.io.IOException;
    -
     import org.apache.carbondata.core.cache.dictionary.Dictionary;
    +import org.apache.carbondata.core.metadata.datatype.DataType;
     import org.apache.carbondata.core.metadata.datatype.DataTypes;
    +import org.apache.carbondata.core.scan.result.vector.impl.CarbonColumnVectorImpl;
     import org.apache.carbondata.core.util.DataTypeUtil;
     
     import com.facebook.presto.spi.block.Block;
     import com.facebook.presto.spi.block.BlockBuilder;
     import com.facebook.presto.spi.block.BlockBuilderStatus;
    +import com.facebook.presto.spi.type.BooleanType;
     import com.facebook.presto.spi.type.Type;
     
    -public class BooleanStreamReader extends AbstractStreamReader {
    +public class BooleanStreamReader extends CarbonColumnVectorImpl
    +    implements PrestoVectorBlockBuilder {
     
    -  private boolean isDictionary;
    -  private Dictionary dictionary;
    +  protected int batchSize;
     
    -  public BooleanStreamReader() {
    +  protected Type type = BooleanType.BOOLEAN;
    --- End diff --
   
    added the type for the block bluider


---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2412: [CARBONDATA-2656] Presto vector stream reader...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user sv71294 commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2412#discussion_r199069988
 
    --- Diff: integration/presto/src/main/java/org/apache/carbondata/presto/readers/BooleanStreamReader.java ---
    @@ -17,91 +17,64 @@
     
     package org.apache.carbondata.presto.readers;
     
    -import java.io.IOException;
    -
     import org.apache.carbondata.core.cache.dictionary.Dictionary;
    +import org.apache.carbondata.core.metadata.datatype.DataType;
     import org.apache.carbondata.core.metadata.datatype.DataTypes;
    +import org.apache.carbondata.core.scan.result.vector.impl.CarbonColumnVectorImpl;
     import org.apache.carbondata.core.util.DataTypeUtil;
     
     import com.facebook.presto.spi.block.Block;
     import com.facebook.presto.spi.block.BlockBuilder;
     import com.facebook.presto.spi.block.BlockBuilderStatus;
    +import com.facebook.presto.spi.type.BooleanType;
     import com.facebook.presto.spi.type.Type;
     
    -public class BooleanStreamReader extends AbstractStreamReader {
    +public class BooleanStreamReader extends CarbonColumnVectorImpl
    +    implements PrestoVectorBlockBuilder {
     
    -  private boolean isDictionary;
    -  private Dictionary dictionary;
    +  protected int batchSize;
     
    -  public BooleanStreamReader() {
    +  protected Type type = BooleanType.BOOLEAN;
     
    -  }
    +  protected BlockBuilder builder;
     
    -  public BooleanStreamReader(boolean isDictionary, Dictionary dictionary) {
    -    this.isDictionary = isDictionary;
    +  private Dictionary dictionary;
    +
    +  public BooleanStreamReader(int batchSize, DataType dataType, Dictionary dictionary) {
    +    super(batchSize, dataType);
    +    this.batchSize = batchSize;
    +    this.builder = type.createBlockBuilder(new BlockBuilderStatus(), batchSize);
         this.dictionary = dictionary;
       }
     
    -  public Block readBlock(Type type) throws IOException {
    -    int numberOfRows = 0;
    -    BlockBuilder builder = null;
    -    if (isVectorReader) {
    -      numberOfRows = batchSize;
    -      builder = type.createBlockBuilder(new BlockBuilderStatus(), numberOfRows);
    -      if (columnVector != null) {
    -        if (isDictionary) {
    -          populateDictionaryVector(type, numberOfRows, builder);
    -        } else {
    -          if (columnVector.anyNullsSet()) {
    -            handleNullInVector(type, numberOfRows, builder);
    -          } else {
    -            populateVector(type, numberOfRows, builder);
    -          }
    -        }
    -      }
    -    } else {
    -      numberOfRows = streamData.length;
    -      builder = type.createBlockBuilder(new BlockBuilderStatus(), numberOfRows);
    -      for (int i = 0; i < numberOfRows; i++) {
    -        type.writeBoolean(builder, byteToBoolean(streamData[i]));
    -      }
    -    }
    -
    +  @Override public Block buildBlock() {
         return builder.build();
       }
     
    -  private void handleNullInVector(Type type, int numberOfRows, BlockBuilder builder) {
    -    for (int i = 0; i < numberOfRows; i++) {
    -      if (columnVector.isNullAt(i)) {
    -        builder.appendNull();
    -      } else {
    -        type.writeBoolean(builder, byteToBoolean(columnVector.getData(i)));
    -      }
    +  @Override public void setBatchSize(int batchSize) {
    +    this.batchSize = batchSize;
    +  }
    +
    +  @Override public void putInt(int rowId, int value) {
    --- End diff --
   
    in case of dictionary over the column, int is received and the actual value is retrieved from the dictionary support


---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2412: [CARBONDATA-2656] Presto vector stream reader...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user sv71294 commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2412#discussion_r199070166
 
    --- Diff: integration/presto/src/main/java/org/apache/carbondata/presto/readers/BooleanStreamReader.java ---
    @@ -17,91 +17,64 @@
     
     package org.apache.carbondata.presto.readers;
     
    -import java.io.IOException;
    -
     import org.apache.carbondata.core.cache.dictionary.Dictionary;
    +import org.apache.carbondata.core.metadata.datatype.DataType;
     import org.apache.carbondata.core.metadata.datatype.DataTypes;
    +import org.apache.carbondata.core.scan.result.vector.impl.CarbonColumnVectorImpl;
     import org.apache.carbondata.core.util.DataTypeUtil;
     
     import com.facebook.presto.spi.block.Block;
     import com.facebook.presto.spi.block.BlockBuilder;
     import com.facebook.presto.spi.block.BlockBuilderStatus;
    +import com.facebook.presto.spi.type.BooleanType;
     import com.facebook.presto.spi.type.Type;
     
    -public class BooleanStreamReader extends AbstractStreamReader {
    +public class BooleanStreamReader extends CarbonColumnVectorImpl
    +    implements PrestoVectorBlockBuilder {
     
    -  private boolean isDictionary;
    -  private Dictionary dictionary;
    +  protected int batchSize;
     
    -  public BooleanStreamReader() {
    +  protected Type type = BooleanType.BOOLEAN;
     
    -  }
    +  protected BlockBuilder builder;
     
    -  public BooleanStreamReader(boolean isDictionary, Dictionary dictionary) {
    -    this.isDictionary = isDictionary;
    +  private Dictionary dictionary;
    +
    +  public BooleanStreamReader(int batchSize, DataType dataType, Dictionary dictionary) {
    +    super(batchSize, dataType);
    +    this.batchSize = batchSize;
    +    this.builder = type.createBlockBuilder(new BlockBuilderStatus(), batchSize);
         this.dictionary = dictionary;
       }
     
    -  public Block readBlock(Type type) throws IOException {
    -    int numberOfRows = 0;
    -    BlockBuilder builder = null;
    -    if (isVectorReader) {
    -      numberOfRows = batchSize;
    -      builder = type.createBlockBuilder(new BlockBuilderStatus(), numberOfRows);
    -      if (columnVector != null) {
    -        if (isDictionary) {
    -          populateDictionaryVector(type, numberOfRows, builder);
    -        } else {
    -          if (columnVector.anyNullsSet()) {
    -            handleNullInVector(type, numberOfRows, builder);
    -          } else {
    -            populateVector(type, numberOfRows, builder);
    -          }
    -        }
    -      }
    -    } else {
    -      numberOfRows = streamData.length;
    -      builder = type.createBlockBuilder(new BlockBuilderStatus(), numberOfRows);
    -      for (int i = 0; i < numberOfRows; i++) {
    -        type.writeBoolean(builder, byteToBoolean(streamData[i]));
    -      }
    -    }
    -
    +  @Override public Block buildBlock() {
         return builder.build();
       }
     
    -  private void handleNullInVector(Type type, int numberOfRows, BlockBuilder builder) {
    -    for (int i = 0; i < numberOfRows; i++) {
    -      if (columnVector.isNullAt(i)) {
    -        builder.appendNull();
    -      } else {
    -        type.writeBoolean(builder, byteToBoolean(columnVector.getData(i)));
    -      }
    +  @Override public void setBatchSize(int batchSize) {
    +    this.batchSize = batchSize;
    +  }
    +
    +  @Override public void putInt(int rowId, int value) {
    +    Object data = DataTypeUtil
    +        .getDataBasedOnDataType(dictionary.getDictionaryValueForKey(value), DataTypes.BOOLEAN);
    +    if (data != null) {
    +      type.writeBoolean(builder, (boolean) data);
    +    } else {
    +      builder.appendNull();
         }
       }
     
    -  private void populateVector(Type type, int numberOfRows, BlockBuilder builder) {
    -      for (int i = 0; i < numberOfRows; i++) {
    -        type.writeBoolean(builder, byteToBoolean(columnVector.getData(i)));
    -      }
    +  @Override public void putBoolean(int rowId, boolean value) {
    --- End diff --
   
    here we are directly writing the value to the builder instead of filling into the vector array


---
1234