GitHub user sounakr opened a pull request:
https://github.com/apache/carbondata/pull/1329 [WIP] Min Max DataMap Example Min Max DataMap Example You can merge this pull request into a Git repository by running: $ git pull https://github.com/sounakr/incubator-carbondata MinMaxDataMap Alternatively you can review and apply these changes as the patch at: https://github.com/apache/carbondata/pull/1329.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #1329 ---- commit c01d4cd6dfa172564eff2cf53cadab5cb3d2b46a Author: sounakr <[hidden email]> Date: 2017-09-06T04:45:25Z Min Max DataMap 1st Phase ---- --- |
Github user jackylk commented on the issue:
https://github.com/apache/carbondata/pull/1329 Great, thanks for working on this. My first comment is that can you move this example to example module, since "letting developer extend datamap in their own library" is one of the goal of datamap framework. We should verify the datamap framework toward this goal. --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1329#discussion_r137212634 --- Diff: core/src/main/java/org/apache/carbondata/core/cache/CacheType.java --- @@ -61,6 +61,9 @@ public static final CacheType<TableSegmentUniqueIdentifier, SegmentTaskIndexWrapper> DRIVER_BLOCKLET_DATAMAP = new CacheType("driver_blocklet_datamap"); + public static final CacheType<TableSegmentUniqueIdentifier, SegmentTaskIndexWrapper> --- End diff -- Don't add cache type, no need to handle cache in example --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1329#discussion_r137212887 --- Diff: core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/DataMapMinMax.java --- @@ -0,0 +1,454 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.indexstore.blockletindex; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutput; +import java.io.DataOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.BitSet; +import java.util.Comparator; +import java.util.List; + +import org.apache.carbondata.common.logging.LogService; +import org.apache.carbondata.common.logging.LogServiceFactory; +import org.apache.carbondata.core.cache.Cacheable; +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datamap.dev.DataMap; +import org.apache.carbondata.core.datastore.IndexKey; +import org.apache.carbondata.core.datastore.block.SegmentProperties; +import org.apache.carbondata.core.datastore.block.TableBlockInfo; +import org.apache.carbondata.core.indexstore.Blocklet; +import org.apache.carbondata.core.indexstore.BlockletDetailInfo; +import org.apache.carbondata.core.indexstore.UnsafeMemoryDMStore; +import org.apache.carbondata.core.indexstore.row.DataMapRow; +import org.apache.carbondata.core.indexstore.row.DataMapRowImpl; +import org.apache.carbondata.core.indexstore.schema.DataMapSchema; +import org.apache.carbondata.core.keygenerator.KeyGenException; +import org.apache.carbondata.core.memory.MemoryException; +import org.apache.carbondata.core.metadata.blocklet.BlockletInfo; +import org.apache.carbondata.core.metadata.blocklet.DataFileFooter; +import org.apache.carbondata.core.metadata.blocklet.index.BlockletMinMaxIndex; +import org.apache.carbondata.core.metadata.datatype.DataType; +import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema; +import org.apache.carbondata.core.scan.filter.FilterUtil; +import org.apache.carbondata.core.scan.filter.executer.FilterExecuter; +import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf; +import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.DataFileFooterConverter; + +/** + * Datamap implementation for blocklet. + */ +public class DataMapMinMax implements DataMap, Cacheable { --- End diff -- No need to use Unsafe in example, don't confuse users with unsafe in example. just make it as simple as possibe --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1329#discussion_r137213067 --- Diff: integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datamap/DataMapMinMaxWriter.scala --- @@ -0,0 +1,163 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.spark.testsuite.datamap + +import org.apache.spark.sql.test.util.QueryTest +import org.apache.spark.sql.{DataFrame, SaveMode} +import org.scalatest.BeforeAndAfterAll + +import org.apache.carbondata.core.datamap.{DataMapStoreManager, TableDataMap} +import org.apache.carbondata.core.datamap.dev.DataMapWriter +import org.apache.carbondata.core.datastore.page.ColumnPage +import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier +import org.apache.carbondata.core.metadata.datatype.DataType +import org.apache.carbondata.core.util.CarbonProperties + + +class DataMapMinMaxWriter extends QueryTest with BeforeAndAfterAll { --- End diff -- Append testcase to the file name --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1329#discussion_r137213430 --- Diff: integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datamap/MinMaxDataMap.java --- @@ -0,0 +1,193 @@ +package org.apache.carbondata.spark.testsuite.datamap; + +import java.io.ByteArrayOutputStream; +import java.io.DataOutput; +import java.io.DataOutputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.carbondata.common.logging.LogServiceFactory; +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datamap.dev.DataMap; +import org.apache.carbondata.core.datastore.block.SegmentProperties; +import org.apache.carbondata.core.datastore.block.TableBlockInfo; +import org.apache.carbondata.core.indexstore.Blocklet; +import org.apache.carbondata.core.indexstore.UnsafeMemoryDMStore; +import org.apache.carbondata.core.indexstore.blockletindex.BlockletDataMap; +import org.apache.carbondata.core.indexstore.row.DataMapRow; +import org.apache.carbondata.core.indexstore.row.DataMapRowImpl; +import org.apache.carbondata.core.indexstore.schema.DataMapSchema; +import org.apache.carbondata.core.memory.MemoryException; +import org.apache.carbondata.core.metadata.blocklet.BlockletInfo; +import org.apache.carbondata.core.metadata.blocklet.DataFileFooter; +import org.apache.carbondata.core.metadata.blocklet.index.BlockletMinMaxIndex; +import org.apache.carbondata.core.metadata.datatype.DataType; +import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema; +import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf; +import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.DataFileFooterConverter; + +public class MinMaxDataMap implements DataMap { + + public static final String NAME = "clustered.btree.minmax"; + + private static int KEY_INDEX = 0; + + private static int MIN_VALUES_INDEX = 1; + + private static int MAX_VALUES_INDEX = 2; + + private UnsafeMemoryDMStore unsafeMemoryDMStore; + + private SegmentProperties segmentProperties; + + private int[] columnCardinality; + + + @Override public void init(String filePath) throws MemoryException, IOException { + long startTime = System.currentTimeMillis(); + DataFileFooterConverter fileFooterConverter = new DataFileFooterConverter(); + List<DataFileFooter> indexInfo = fileFooterConverter.getIndexInfo(filePath); + for (DataFileFooter fileFooter : indexInfo) { + List<ColumnSchema> columnInTable = fileFooter.getColumnInTable(); + if (segmentProperties == null) { + columnCardinality = fileFooter.getSegmentInfo().getColumnCardinality(); + segmentProperties = new SegmentProperties(columnInTable, columnCardinality); + //createSchema(segmentProperties); + } + TableBlockInfo blockInfo = fileFooter.getBlockInfo().getTableBlockInfo(); + if (fileFooter.getBlockletList() == null || fileFooter.getBlockletList().size() == 0) { +// LOGGER +// .info("Reading carbondata file footer to get blocklet info " + blockInfo.getFilePath()); + fileFooter = CarbonUtil.readMetadatFile(blockInfo); + } + + loadToUnsafe(fileFooter, segmentProperties, blockInfo.getFilePath()); + } + if (unsafeMemoryDMStore != null) { + unsafeMemoryDMStore.finishWriting(); + } +// LOGGER.info("Time taken to load blocklet datamap from file : " + filePath + "is " + +// (System.currentTimeMillis() - startTime)); + + } + + @Override public List<Blocklet> prune(FilterResolverIntf filterExp) { + return null; + } + + @Override public void clear() { + + } + + public void updateMinMaxIndex(String filePath) throws IOException, MemoryException { --- End diff -- it shouldn't be read from index file. Use the Datamap writer to write example index file and read here --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/1329 Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/9/ --- |
In reply to this post by qiuchenjian-2
|
Free forum by Nabble | Edit this page |