Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1359#discussion_r143711222 --- Diff: examples/spark2/src/main/scala/org/apache/carbondata/examples/MinMaxDataMapFactory.java --- @@ -0,0 +1,157 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.examples; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.carbondata.core.cache.Cache; +import org.apache.carbondata.core.cache.CacheProvider; +import org.apache.carbondata.core.cache.CacheType; +import org.apache.carbondata.core.datamap.DataMapDistributable; +import org.apache.carbondata.core.datamap.DataMapMeta; +import org.apache.carbondata.core.datamap.TableDataMap; +import org.apache.carbondata.core.datamap.dev.DataMap; +import org.apache.carbondata.core.datamap.dev.DataMapFactory; +import org.apache.carbondata.core.datamap.dev.DataMapWriter; +import org.apache.carbondata.core.datastore.filesystem.CarbonFile; +import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter; +import org.apache.carbondata.core.datastore.impl.FileFactory; +import org.apache.carbondata.core.events.ChangeEvent; +import org.apache.carbondata.core.indexstore.TableBlockIndexUniqueIdentifier; +import org.apache.carbondata.core.indexstore.blockletindex.BlockletDataMap; +import org.apache.carbondata.core.indexstore.schema.FilterType; +import org.apache.carbondata.core.memory.MemoryException; +import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; + + +/** + * Min Max DataMap Factory + */ +public class MinMaxDataMapFactory implements DataMapFactory { + + private AbsoluteTableIdentifier identifier; + + // segmentId -> list of index file + private Map<String, List<TableBlockIndexUniqueIdentifier>> segmentMap = new HashMap<>(); + + @Override + public void init(AbsoluteTableIdentifier identifier, String dataMapName) { + this.identifier = identifier; + } + + /** + * createWriter will return the MinMaxDataWriter. + * @param segmentId + * @return + */ + @Override + public DataMapWriter createWriter(String segmentId) { + return new MinMaxDataWriter(); + } + + /** + * getDataMaps Factory method Initializes the Min Max Data Map and returns. + * @param segmentId + * @return + * @throws IOException + */ + @Override + public List<DataMap> getDataMaps(String segmentId) throws IOException { + List<TableBlockIndexUniqueIdentifier> tableBlockIndexUniqueIdentifiers = + segmentMap.get(segmentId); + List<DataMap> dataMapList = new ArrayList<>(); + if (tableBlockIndexUniqueIdentifiers == null) { + tableBlockIndexUniqueIdentifiers = new ArrayList<>(); + CarbonFile[] listFiles = getCarbonIndexFiles(segmentId); + for (int i = 0; i < listFiles.length; i++) { + tableBlockIndexUniqueIdentifiers.add( + new TableBlockIndexUniqueIdentifier(identifier, segmentId, listFiles[i].getName())); + } + } + // Form a dataMap of Type MinMaxDataMap. + MinMaxDataMap dataMap = new MinMaxDataMap(); + try { + dataMap.init(tableBlockIndexUniqueIdentifiers.get(0).getFilePath()); + } catch (MemoryException ex) { + + } + dataMapList.add(dataMap); + return dataMapList; + } + + /** + * Routine to retrieve the carbonIndex. + * @param segmentId + * @return + */ + private CarbonFile[] getCarbonIndexFiles(String segmentId) { --- End diff -- why this method required --- |
In reply to this post by qiuchenjian-2
Github user sounakr commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1359#discussion_r143714171 --- Diff: examples/spark2/src/main/scala/org/apache/carbondata/examples/MinMaxBlockletComparator.java --- @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.examples; + +import java.nio.ByteBuffer; +import java.util.Comparator; + +import org.apache.carbondata.core.util.ByteUtil; + + +/** + * Data map comparator + */ +public class MinMaxBlockletComparator implements Comparator<byte[]> { --- End diff -- Removed --- |
In reply to this post by qiuchenjian-2
Github user sounakr commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1359#discussion_r143714764 --- Diff: examples/spark2/src/main/scala/org/apache/carbondata/examples/MinMaxDataMap.java --- @@ -0,0 +1,160 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.examples; + +import java.io.BufferedReader; +import java.io.DataInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.BitSet; +import java.util.List; + +import org.apache.carbondata.common.logging.LogService; +import org.apache.carbondata.common.logging.LogServiceFactory; +import org.apache.carbondata.core.cache.Cacheable; +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datamap.dev.DataMap; +import org.apache.carbondata.core.datastore.IndexKey; +import org.apache.carbondata.core.datastore.block.SegmentProperties; +import org.apache.carbondata.core.datastore.filesystem.CarbonFile; +import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter; +import org.apache.carbondata.core.datastore.impl.FileFactory; +import org.apache.carbondata.core.fileoperations.AtomicFileOperations; +import org.apache.carbondata.core.fileoperations.AtomicFileOperationsImpl; +import org.apache.carbondata.core.indexstore.Blocklet; +import org.apache.carbondata.core.memory.MemoryException; +import org.apache.carbondata.core.scan.filter.FilterUtil; +import org.apache.carbondata.core.scan.filter.executer.FilterExecuter; +import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf; +import org.apache.carbondata.core.util.CarbonUtil; + +import com.google.gson.Gson; + +/** + * Datamap implementation for min max blocklet. + */ +public class MinMaxDataMap implements DataMap, Cacheable { + + private static final LogService LOGGER = + LogServiceFactory.getLogService(MinMaxDataMap.class.getName()); + + public static final String NAME = "clustered.minmax.btree.blocklet"; + + private String filePath; + + private MinMaxIndexBlockDetails[] readMinMaxDataMap; + + @Override public void init(String filePath) throws MemoryException, IOException { + this.filePath = filePath; + CarbonFile[] listFiles = getCarbonIndexFiles(filePath, "0"); + for (int i = 0; i < listFiles.length; i++) { + readMinMaxDataMap = readJson(listFiles[i].getPath()); + } + } + + private CarbonFile[] getCarbonIndexFiles(String filePath, String segmentId) { --- End diff -- Done. --- |
In reply to this post by qiuchenjian-2
Github user sounakr commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1359#discussion_r143714831 --- Diff: examples/spark2/src/main/scala/org/apache/carbondata/examples/MinMaxDataMap.java --- @@ -0,0 +1,160 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.examples; + +import java.io.BufferedReader; +import java.io.DataInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.BitSet; +import java.util.List; + +import org.apache.carbondata.common.logging.LogService; +import org.apache.carbondata.common.logging.LogServiceFactory; +import org.apache.carbondata.core.cache.Cacheable; +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datamap.dev.DataMap; +import org.apache.carbondata.core.datastore.IndexKey; +import org.apache.carbondata.core.datastore.block.SegmentProperties; +import org.apache.carbondata.core.datastore.filesystem.CarbonFile; +import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter; +import org.apache.carbondata.core.datastore.impl.FileFactory; +import org.apache.carbondata.core.fileoperations.AtomicFileOperations; +import org.apache.carbondata.core.fileoperations.AtomicFileOperationsImpl; +import org.apache.carbondata.core.indexstore.Blocklet; +import org.apache.carbondata.core.memory.MemoryException; +import org.apache.carbondata.core.scan.filter.FilterUtil; +import org.apache.carbondata.core.scan.filter.executer.FilterExecuter; +import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf; +import org.apache.carbondata.core.util.CarbonUtil; + +import com.google.gson.Gson; + +/** + * Datamap implementation for min max blocklet. + */ +public class MinMaxDataMap implements DataMap, Cacheable { --- End diff -- Done --- |
In reply to this post by qiuchenjian-2
Github user sounakr commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1359#discussion_r143721417 --- Diff: examples/spark2/src/main/scala/org/apache/carbondata/examples/MinMaxDataMapFactory.java --- @@ -0,0 +1,157 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.examples; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.carbondata.core.cache.Cache; +import org.apache.carbondata.core.cache.CacheProvider; +import org.apache.carbondata.core.cache.CacheType; +import org.apache.carbondata.core.datamap.DataMapDistributable; +import org.apache.carbondata.core.datamap.DataMapMeta; +import org.apache.carbondata.core.datamap.TableDataMap; +import org.apache.carbondata.core.datamap.dev.DataMap; +import org.apache.carbondata.core.datamap.dev.DataMapFactory; +import org.apache.carbondata.core.datamap.dev.DataMapWriter; +import org.apache.carbondata.core.datastore.filesystem.CarbonFile; +import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter; +import org.apache.carbondata.core.datastore.impl.FileFactory; +import org.apache.carbondata.core.events.ChangeEvent; +import org.apache.carbondata.core.indexstore.TableBlockIndexUniqueIdentifier; +import org.apache.carbondata.core.indexstore.blockletindex.BlockletDataMap; +import org.apache.carbondata.core.indexstore.schema.FilterType; +import org.apache.carbondata.core.memory.MemoryException; +import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; + + +/** + * Min Max DataMap Factory + */ +public class MinMaxDataMapFactory implements DataMapFactory { + + private AbsoluteTableIdentifier identifier; + + // segmentId -> list of index file + private Map<String, List<TableBlockIndexUniqueIdentifier>> segmentMap = new HashMap<>(); + + @Override + public void init(AbsoluteTableIdentifier identifier, String dataMapName) { + this.identifier = identifier; + } + + /** + * createWriter will return the MinMaxDataWriter. + * @param segmentId + * @return + */ + @Override + public DataMapWriter createWriter(String segmentId) { + return new MinMaxDataWriter(); + } + + /** + * getDataMaps Factory method Initializes the Min Max Data Map and returns. + * @param segmentId + * @return + * @throws IOException + */ + @Override + public List<DataMap> getDataMaps(String segmentId) throws IOException { + List<TableBlockIndexUniqueIdentifier> tableBlockIndexUniqueIdentifiers = + segmentMap.get(segmentId); + List<DataMap> dataMapList = new ArrayList<>(); + if (tableBlockIndexUniqueIdentifiers == null) { + tableBlockIndexUniqueIdentifiers = new ArrayList<>(); + CarbonFile[] listFiles = getCarbonIndexFiles(segmentId); + for (int i = 0; i < listFiles.length; i++) { + tableBlockIndexUniqueIdentifiers.add( + new TableBlockIndexUniqueIdentifier(identifier, segmentId, listFiles[i].getName())); + } + } + // Form a dataMap of Type MinMaxDataMap. + MinMaxDataMap dataMap = new MinMaxDataMap(); + try { + dataMap.init(tableBlockIndexUniqueIdentifiers.get(0).getFilePath()); + } catch (MemoryException ex) { + + } + dataMapList.add(dataMap); + return dataMapList; + } + + /** + * Routine to retrieve the carbonIndex. + * @param segmentId + * @return + */ + private CarbonFile[] getCarbonIndexFiles(String segmentId) { --- End diff -- removed --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on the issue:
https://github.com/apache/carbondata/pull/1359 SDV Build Success , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/1041/ --- |
In reply to this post by qiuchenjian-2
Github user sounakr commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1359#discussion_r143741467 --- Diff: core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMapFactory.java --- @@ -219,4 +225,27 @@ public DataMapMeta getMeta() { // TODO: pass SORT_COLUMNS into this class return null; } + + @Override public SegmentProperties getSegmentProperties(String segmentId) throws IOException { + SegmentProperties segmentProperties = null; --- End diff -- Done. --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/1359 Build Success with Spark 1.6, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/291/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/1359 Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/415/ --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on the issue:
https://github.com/apache/carbondata/pull/1359 SDV Build Fail , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/1045/ --- |
In reply to this post by qiuchenjian-2
Github user sounakr commented on the issue:
https://github.com/apache/carbondata/pull/1359 Retest this please. --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/1359 Build Success with Spark 1.6, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/292/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/1359 Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/416/ --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on the issue:
https://github.com/apache/carbondata/pull/1359 SDV Build Success , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/1046/ --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1359#discussion_r144206409 --- Diff: core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapWriter.java --- @@ -45,14 +45,13 @@ * @param blockletId sequence number of blocklet in the block */ void onBlockletEnd(int blockletId); - /** * Add the column pages row to the datamap, order of pages is same as `indexColumns` in * DataMapMeta returned in DataMapFactory. * * Implementation should copy the content of `pages` as needed, because `pages` memory * may be freed after this method returns, if using unsafe column page. */ - void onPageAdded(int blockletId, int pageId, ColumnPage[] pages); + void onPageAdded(int blockletId, int pageId, ColumnPage[] pages, String directoryPath); --- End diff -- instead of passing the directoryPath for every call, better add init method and pass the segment directory path to it. --- |
In reply to this post by qiuchenjian-2
Github user sounakr commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1359#discussion_r144249578 --- Diff: core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapWriter.java --- @@ -45,14 +45,13 @@ * @param blockletId sequence number of blocklet in the block */ void onBlockletEnd(int blockletId); - /** * Add the column pages row to the datamap, order of pages is same as `indexColumns` in * DataMapMeta returned in DataMapFactory. * * Implementation should copy the content of `pages` as needed, because `pages` memory * may be freed after this method returns, if using unsafe column page. */ - void onPageAdded(int blockletId, int pageId, ColumnPage[] pages); + void onPageAdded(int blockletId, int pageId, ColumnPage[] pages, String directoryPath); --- End diff -- This is not a directory path, instead the full path to the block. SO this is constant for a block. So removed it from OnPageAdded and passed as a extra parameter to OnBlockStart as this blockPath has to be initialized on every block start. --- |
In reply to this post by qiuchenjian-2
Github user sounakr commented on the issue:
https://github.com/apache/carbondata/pull/1359 Retest this please. --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/1359 Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/436/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/1359 Build Success with Spark 1.6, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/311/ --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on the issue:
https://github.com/apache/carbondata/pull/1359 SDV Build Success , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/1064/ --- |
Free forum by Nabble | Edit this page |