Apache CarbonData Dev Mailing List archive › Apache CarbonData JIRA issues

[GitHub] carbondata pull request #2644: [WIP][CARBONDATA-2853] Implement file-level m...

Classic

List

Threaded

55 messages Options

123

qiuchenjian-2

[GitHub] carbondata issue #2644: [CARBONDATA-2853] Implement file-level min/max index...

Github user CarbonDataQA commented on the issue:

https://github.com/apache/carbondata/pull/2644

Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/8193/

---

qiuchenjian-2

[GitHub] carbondata issue #2644: [CARBONDATA-2853] Implement file-level min/max index...

In reply to this post by qiuchenjian-2

Github user CarbonDataQA commented on the issue:

https://github.com/apache/carbondata/pull/2644

Build Success with Spark 2.2.1, Please check CI http://95.216.28.178:8080/job/ApacheCarbonPRBuilder1/122/

---

qiuchenjian-2

[GitHub] carbondata pull request #2644: [CARBONDATA-2853] Implement file-level min/ma...

In reply to this post by qiuchenjian-2

Github user manishgupta88 commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2644#discussion_r214311953

--- Diff: hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java ---
@@ -342,60 +341,52 @@ public void refreshSegmentCacheIfRequired(JobContext job, CarbonTable carbonTabl
/**
* use file list in .carbonindex file to get the split of streaming.
*/
- public List<InputSplit> getSplitsOfStreaming(JobContext job, AbsoluteTableIdentifier identifier,
- List<Segment> streamSegments) throws IOException {
+ public List<InputSplit> getSplitsOfStreaming(JobContext job, List<Segment> streamSegments,
+ CarbonTable carbonTable, FilterResolverIntf filterResolverIntf) throws IOException {
List<InputSplit> splits = new ArrayList<InputSplit>();
if (streamSegments != null && !streamSegments.isEmpty()) {
numStreamSegments = streamSegments.size();
long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
long maxSize = getMaxSplitSize(job);
- for (Segment segment : streamSegments) {
- String segmentDir =
- CarbonTablePath.getSegmentPath(identifier.getTablePath(), segment.getSegmentNo());
- FileFactory.FileType fileType = FileFactory.getFileType(segmentDir);
- if (FileFactory.isFileExist(segmentDir, fileType)) {
- SegmentIndexFileStore segmentIndexFileStore = new SegmentIndexFileStore();
- segmentIndexFileStore.readAllIIndexOfSegment(segmentDir);
- Map<String, byte[]> carbonIndexMap = segmentIndexFileStore.getCarbonIndexMap();
- CarbonIndexFileReader indexReader = new CarbonIndexFileReader();
- for (byte[] fileData : carbonIndexMap.values()) {
- indexReader.openThriftReader(fileData);
- try {
- // map block index
- while (indexReader.hasNext()) {
- BlockIndex blockIndex = indexReader.readBlockIndexInfo();
- String filePath = segmentDir + File.separator + blockIndex.getFile_name();
- Path path = new Path(filePath);
- long length = blockIndex.getFile_size();
- if (length != 0) {
- BlockLocation[] blkLocations;
- FileSystem fs = FileFactory.getFileSystem(path);
- FileStatus file = fs.getFileStatus(path);
- blkLocations = fs.getFileBlockLocations(path, 0, length);
- long blockSize = file.getBlockSize();
- long splitSize = computeSplitSize(blockSize, minSize, maxSize);
- long bytesRemaining = length;
- while (((double) bytesRemaining) / splitSize > 1.1) {
- int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
- splits.add(makeSplit(segment.getSegmentNo(), path, length - bytesRemaining,
- splitSize, blkLocations[blkIndex].getHosts(),
- blkLocations[blkIndex].getCachedHosts(), FileFormat.ROW_V1));
- bytesRemaining -= splitSize;
- }
- if (bytesRemaining != 0) {
- int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
- splits.add(makeSplit(segment.getSegmentNo(), path, length - bytesRemaining,
- bytesRemaining, blkLocations[blkIndex].getHosts(),
- blkLocations[blkIndex].getCachedHosts(), FileFormat.ROW_V1));
- }
- } else {
- //Create empty hosts array for zero length files
- splits.add(makeSplit(segment.getSegmentNo(), path, 0, length, new String[0],
- FileFormat.ROW_V1));
- }
- }
- } finally {
- indexReader.closeThriftReader();
+
+ if (filterResolverIntf == null) {
+ if (carbonTable != null) {
+ Expression filter = getFilterPredicates(job.getConfiguration());
+ if (filter != null) {
+ carbonTable.processFilterExpression(filter, null, null);
+ filterResolverIntf = carbonTable.resolveFilter(filter);
+ }
+ }
+ }
+ StreamDataMap streamDataMap =
+ DataMapStoreManager.getInstance().getStreamDataMap(carbonTable);
+ streamDataMap.init(filterResolverIntf);
+ List<StreamFile> streamFiles = streamDataMap.prune(streamSegments);
+ for (StreamFile streamFile : streamFiles) {
+ if (FileFactory.isFileExist(streamFile.getFilePath())) {
+ Path path = new Path(streamFile.getFilePath());
+ long length = streamFile.getFileSize();
+ if (length != 0) {
+ BlockLocation[] blkLocations;
+ FileSystem fs = FileFactory.getFileSystem(path);
+ FileStatus file = fs.getFileStatus(path);
+ blkLocations = fs.getFileBlockLocations(path, 0, length);
+ long blockSize = file.getBlockSize();
+ long splitSize = computeSplitSize(blockSize, minSize, maxSize);
+ long bytesRemaining = length;
+ while (((double) bytesRemaining) / splitSize > 1.1) {
--- End diff --

Please add a comment here to clearly explain the logic of using 1.1 and explain the size computation

---

qiuchenjian-2

[GitHub] carbondata pull request #2644: [CARBONDATA-2853] Implement file-level min/ma...

In reply to this post by qiuchenjian-2

Github user manishgupta88 commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2644#discussion_r214311329

--- Diff: hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java ---
@@ -342,60 +341,52 @@ public void refreshSegmentCacheIfRequired(JobContext job, CarbonTable carbonTabl
/**
* use file list in .carbonindex file to get the split of streaming.
*/
- public List<InputSplit> getSplitsOfStreaming(JobContext job, AbsoluteTableIdentifier identifier,
- List<Segment> streamSegments) throws IOException {
+ public List<InputSplit> getSplitsOfStreaming(JobContext job, List<Segment> streamSegments,
+ CarbonTable carbonTable, FilterResolverIntf filterResolverIntf) throws IOException {
--- End diff --

You can write an overloaded method for getSplitsOfStreaming. One which accepts 3 parameters and one with 4 parameters.
1. getSplitsOfStreaming(JobContext job, AbsoluteTableIdentifier identifier,List<Segment> streamSegments)
-- From this method you can the other method and pass null as the 4th argument. This will avoid passing null at all places above.
2. getSplitsOfStreaming(JobContext job, List<Segment> streamSegments, CarbonTable carbonTable, FilterResolverIntf filterResolverIntf)

---

qiuchenjian-2

[GitHub] carbondata pull request #2644: [CARBONDATA-2853] Implement file-level min/ma...

In reply to this post by qiuchenjian-2

Github user manishgupta88 commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2644#discussion_r214305126

--- Diff: core/src/main/java/org/apache/carbondata/core/datamap/StreamDataMap.java ---
@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.core.datamap;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.carbondata.common.annotations.InterfaceAudience;
+import org.apache.carbondata.core.datastore.block.SegmentProperties;
+import org.apache.carbondata.core.datastore.impl.FileFactory;
+import org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore;
+import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
+import org.apache.carbondata.core.metadata.schema.table.CarbonTable;
+import org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn;
+import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension;
+import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema;
+import org.apache.carbondata.core.reader.CarbonIndexFileReader;
+import org.apache.carbondata.core.scan.filter.FilterUtil;
+import org.apache.carbondata.core.scan.filter.executer.FilterExecuter;
+import org.apache.carbondata.core.scan.filter.executer.ImplicitColumnFilterExecutor;
+import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf;
+import org.apache.carbondata.core.util.CarbonMetadataUtil;
+import org.apache.carbondata.core.util.path.CarbonTablePath;
+import org.apache.carbondata.format.BlockIndex;
+
+@InterfaceAudience.Internal
+public class StreamDataMap {
--- End diff --

Please check the feasibility if we can extend DataMap interface and implement all its method to keep it similar like BlockDataMap. I think it should be feasible

---

qiuchenjian-2

[GitHub] carbondata pull request #2644: [CARBONDATA-2853] Implement file-level min/ma...

In reply to this post by qiuchenjian-2

Github user manishgupta88 commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2644#discussion_r214303472

--- Diff: core/src/main/java/org/apache/carbondata/core/datamap/StreamDataMap.java ---
@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.core.datamap;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.carbondata.common.annotations.InterfaceAudience;
+import org.apache.carbondata.core.datastore.block.SegmentProperties;
+import org.apache.carbondata.core.datastore.impl.FileFactory;
+import org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore;
+import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
+import org.apache.carbondata.core.metadata.schema.table.CarbonTable;
+import org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn;
+import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension;
+import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema;
+import org.apache.carbondata.core.reader.CarbonIndexFileReader;
+import org.apache.carbondata.core.scan.filter.FilterUtil;
+import org.apache.carbondata.core.scan.filter.executer.FilterExecuter;
+import org.apache.carbondata.core.scan.filter.executer.ImplicitColumnFilterExecutor;
+import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf;
+import org.apache.carbondata.core.util.CarbonMetadataUtil;
+import org.apache.carbondata.core.util.path.CarbonTablePath;
+import org.apache.carbondata.format.BlockIndex;
+
+@InterfaceAudience.Internal
+public class StreamDataMap {
+
+ private CarbonTable carbonTable;
+
+ private AbsoluteTableIdentifier identifier;
--- End diff --

If carbonTable is getting stored then no need to store identifier...you can get it from carbontable

---

qiuchenjian-2

[GitHub] carbondata pull request #2644: [CARBONDATA-2853] Implement file-level min/ma...

In reply to this post by qiuchenjian-2

Github user manishgupta88 commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2644#discussion_r214307411

--- Diff: core/src/main/java/org/apache/carbondata/core/datamap/StreamDataMap.java ---
@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.core.datamap;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.carbondata.common.annotations.InterfaceAudience;
+import org.apache.carbondata.core.datastore.block.SegmentProperties;
+import org.apache.carbondata.core.datastore.impl.FileFactory;
+import org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore;
+import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
+import org.apache.carbondata.core.metadata.schema.table.CarbonTable;
+import org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn;
+import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension;
+import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema;
+import org.apache.carbondata.core.reader.CarbonIndexFileReader;
+import org.apache.carbondata.core.scan.filter.FilterUtil;
+import org.apache.carbondata.core.scan.filter.executer.FilterExecuter;
+import org.apache.carbondata.core.scan.filter.executer.ImplicitColumnFilterExecutor;
+import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf;
+import org.apache.carbondata.core.util.CarbonMetadataUtil;
+import org.apache.carbondata.core.util.path.CarbonTablePath;
+import org.apache.carbondata.format.BlockIndex;
+
+@InterfaceAudience.Internal
+public class StreamDataMap {
+
+ private CarbonTable carbonTable;
+
+ private AbsoluteTableIdentifier identifier;
+
+ private FilterExecuter filterExecuter;
+
+ public StreamDataMap(CarbonTable carbonTable) {
+ this.carbonTable = carbonTable;
+ this.identifier = carbonTable.getAbsoluteTableIdentifier();
+ }
+
+ public void init(FilterResolverIntf filterExp) {
+ if (filterExp != null) {
+
+ List<CarbonColumn> minMaxCacheColumns = new ArrayList<>();
+ for (CarbonDimension dimension : carbonTable.getDimensions()) {
+ if (!dimension.isComplex()) {
+ minMaxCacheColumns.add(dimension);
+ }
+ }
+ minMaxCacheColumns.addAll(carbonTable.getMeasures());
+
+ List<ColumnSchema> listOfColumns =
+ carbonTable.getTableInfo().getFactTable().getListOfColumns();
+ int[] columnCardinality = new int[listOfColumns.size()];
+ for (int index = 0; index < columnCardinality.length; index++) {
+ columnCardinality[index] = Integer.MAX_VALUE;
+ }
+
+ SegmentProperties segmentProperties =
+ new SegmentProperties(listOfColumns, columnCardinality);
+
+ filterExecuter = FilterUtil.getFilterExecuterTree(
+ filterExp, segmentProperties, null, minMaxCacheColumns);
+ }
+ }
+
+ public List<StreamFile> prune(List<Segment> segments) throws IOException {
+ if (filterExecuter == null) {
+ return listAllStreamFiles(segments, false);
+ } else {
+ List<StreamFile> streamFileList = new ArrayList<>();
+ for (StreamFile streamFile : listAllStreamFiles(segments, true)) {
+ if (isScanRequire(streamFile)) {
+ streamFileList.add(streamFile);
+ streamFile.setMinMaxIndex(null);
+ }
+ }
+ return streamFileList;
+ }
+ }
+
+ private boolean isScanRequire(StreamFile streamFile) {
+ // backward compatibility, old stream file without min/max index
+ if (streamFile.getMinMaxIndex() == null) {
+ return true;
+ }
+
+ byte[][] maxValue = streamFile.getMinMaxIndex().getMaxValues();
+ byte[][] minValue = streamFile.getMinMaxIndex().getMinValues();
+ BitSet bitSet;
+ if (filterExecuter instanceof ImplicitColumnFilterExecutor) {
+ String filePath = streamFile.getFilePath();
+ String uniqueBlockPath = filePath.substring(filePath.lastIndexOf("/Part") + 1);
+ bitSet = ((ImplicitColumnFilterExecutor) filterExecuter)
+ .isFilterValuesPresentInBlockOrBlocklet(maxValue, minValue, uniqueBlockPath);
+ } else {
+ bitSet = filterExecuter.isScanRequired(maxValue, minValue);
+ }
--- End diff --

No Need to check for if (filterExecuter instanceof ImplicitColumnFilterExecutor)
You can directly call
bitSet = filterExecuter.isScanRequired(maxValue, minValue);

---

qiuchenjian-2

[GitHub] carbondata pull request #2644: [CARBONDATA-2853] Implement file-level min/ma...

In reply to this post by qiuchenjian-2

Github user manishgupta88 commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2644#discussion_r214316607

--- Diff: streaming/src/main/java/org/apache/carbondata/streaming/CarbonStreamRecordWriter.java ---
@@ -212,9 +271,13 @@ private void initializeAtFirstRow() throws IOException, InterruptedException {
byte[] col = (byte[]) columnValue;
output.writeShort(col.length);
output.writeBytes(col);
+ dimensionStatsCollectors[dimCount].update(col);
} else {
output.writeInt((int) columnValue);
+ dimensionStatsCollectors[dimCount].update(ByteUtil.toBytes((int) columnValue));
--- End diff --

For min/max comparison you are converting from Int to byte array for all the rows. This can impact the writing performance. Instead you can typecast into Int and do the comparison. After all the data is loaded then at the end you can convert all the values into byte array based on datatype. At that time it will be only one conversion for the final min/max values

---

qiuchenjian-2

[GitHub] carbondata pull request #2644: [CARBONDATA-2853] Implement file-level min/ma...

In reply to this post by qiuchenjian-2

Github user manishgupta88 commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2644#discussion_r214310465

--- Diff: core/src/main/java/org/apache/carbondata/core/util/CarbonMetadataUtil.java ---
@@ -96,14 +96,35 @@ private static FileFooter3 getFileFooter3(List<BlockletInfo3> infoList,
return footer;
}

- public static BlockletIndex getBlockletIndex(
- org.apache.carbondata.core.metadata.blocklet.index.BlockletIndex info) {
+ public static org.apache.carbondata.core.metadata.blocklet.index.BlockletMinMaxIndex
+ convertExternalMinMaxIndex(BlockletMinMaxIndex minMaxIndex) {
--- End diff --

please add a method comment to explain what is meaning of convertExternalMinMaxIndex

---

qiuchenjian-2

[GitHub] carbondata pull request #2644: [CARBONDATA-2853] Implement file-level min/ma...

In reply to this post by qiuchenjian-2

Github user manishgupta88 commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2644#discussion_r214313170

--- Diff: integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/StreamHandoffRDD.scala ---
@@ -205,8 +205,9 @@ class StreamHandoffRDD[K, V](
segmentList.add(Segment.toSegment(handOffSegmentId, null))
val splits = inputFormat.getSplitsOfStreaming(
job,
- carbonLoadModel.getCarbonDataLoadSchema.getCarbonTable.getAbsoluteTableIdentifier,
- segmentList
+ segmentList,
+ carbonLoadModel.getCarbonDataLoadSchema.getCarbonTable,
+ null
--- End diff --

Once you add the overloaded method as explained in above comment you can call the method with 3 arguments from here

---

qiuchenjian-2

[GitHub] carbondata issue #2644: [CARBONDATA-2853] Implement file-level min/max index...

In reply to this post by qiuchenjian-2

Github user manishgupta88 commented on the issue:

https://github.com/apache/carbondata/pull/2644

@QiangCai ....In General I can see that you put empty lines at many places in the code. Please remove those empty lines everywhere and add some code comments for better understanding

---

qiuchenjian-2

[GitHub] carbondata issue #2644: [CARBONDATA-2853] Implement file-level min/max index...

In reply to this post by qiuchenjian-2

Github user CarbonDataQA commented on the issue:

https://github.com/apache/carbondata/pull/2644

Build Failed with Spark 2.2.1, Please check CI http://95.216.28.178:8080/job/ApacheCarbonPRBuilder1/173/

---

qiuchenjian-2

[GitHub] carbondata issue #2644: [CARBONDATA-2853] Implement file-level min/max index...

In reply to this post by qiuchenjian-2

Github user CarbonDataQA commented on the issue:

https://github.com/apache/carbondata/pull/2644

Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/8244/

---

qiuchenjian-2

[GitHub] carbondata issue #2644: [CARBONDATA-2853] Implement file-level min/max index...

In reply to this post by qiuchenjian-2

qiuchenjian-2

[GitHub] carbondata issue #2644: [CARBONDATA-2853] Implement file-level min/max index...

In reply to this post by qiuchenjian-2

qiuchenjian-2

[GitHub] carbondata issue #2644: [CARBONDATA-2853] Implement file-level min/max index...

In reply to this post by qiuchenjian-2

Github user QiangCai commented on the issue:

https://github.com/apache/carbondata/pull/2644

@ravipesala
the blocklet level min/max will be added in another PR.

---

qiuchenjian-2

[GitHub] carbondata issue #2644: [CARBONDATA-2853] Implement min/max index for stream...

In reply to this post by qiuchenjian-2

qiuchenjian-2

[GitHub] carbondata issue #2644: [CARBONDATA-2853] Implement min/max index for stream...

In reply to this post by qiuchenjian-2

qiuchenjian-2

[GitHub] carbondata pull request #2644: [CARBONDATA-2853] Implement min/max index for...

In reply to this post by qiuchenjian-2

Github user ravipesala commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2644#discussion_r214877247

--- Diff: streaming/src/main/java/org/apache/carbondata/streaming/CarbonStreamRecordWriter.java ---
@@ -212,9 +213,13 @@ private void initializeAtFirstRow() throws IOException, InterruptedException {
byte[] col = (byte[]) columnValue;
output.writeShort(col.length);
output.writeBytes(col);
+ output.dimStatsCollectors[dimCount].update(col);
} else {
output.writeInt((int) columnValue);
+ output.dimStatsCollectors[dimCount].update(ByteUtil.toBytes((int) columnValue));
--- End diff --

I think dictionary case never come for streaming. Anyway please use `output.dimStatsCollectors[dimCount].update((int) columnValue)` instead of converting binary

---

qiuchenjian-2

[GitHub] carbondata pull request #2644: [CARBONDATA-2853] Implement min/max index for...

In reply to this post by qiuchenjian-2

Github user ravipesala commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2644#discussion_r214877452

--- Diff: core/src/main/java/org/apache/carbondata/core/datamap/StreamDataMap.java ---
@@ -0,0 +1,148 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.core.datamap;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.carbondata.common.annotations.InterfaceAudience;
+import org.apache.carbondata.core.datastore.block.SegmentProperties;
+import org.apache.carbondata.core.datastore.impl.FileFactory;
+import org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore;
+import org.apache.carbondata.core.metadata.schema.table.CarbonTable;
+import org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn;
+import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension;
+import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema;
+import org.apache.carbondata.core.reader.CarbonIndexFileReader;
+import org.apache.carbondata.core.scan.filter.FilterUtil;
+import org.apache.carbondata.core.scan.filter.executer.FilterExecuter;
+import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf;
+import org.apache.carbondata.core.util.CarbonMetadataUtil;
+import org.apache.carbondata.core.util.path.CarbonTablePath;
+import org.apache.carbondata.format.BlockIndex;
+
+@InterfaceAudience.Internal
+public class StreamDataMap {
--- End diff --

I don't think it is inline with datamap interfaces. Either use datamap interface or remove the datamap name from class.

---

123