Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2565 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/7590/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2565 Build Success with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/6345/ --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2565#discussion_r206008805 --- Diff: hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java --- @@ -492,6 +493,26 @@ protected Expression getFilterPredicates(Configuration configuration) { return prunedBlocklets; } + private List<ExtendedBlocklet> intersectFilteredBlocklets(CarbonTable carbonTable, + List<ExtendedBlocklet> previousDataMapPrunedBlocklets, + List<ExtendedBlocklet> otherDataMapPrunedBlocklets) { + List<ExtendedBlocklet> prunedBlocklets = null; + if (BlockletDataMapUtil + .isCacheLevelBlock(carbonTable, BlockletDataMapFactory.CACHE_LEVEL_BLOCKLET)) { + prunedBlocklets = new ArrayList<>(otherDataMapPrunedBlocklets); + // add blocklets from previous dataMap that are not filtered by other dataMaps + for (ExtendedBlocklet previousBlocklet : previousDataMapPrunedBlocklets) { + if (!otherDataMapPrunedBlocklets.contains(previousBlocklet)) { + prunedBlocklets.add(previousBlocklet); --- End diff -- It supposed to be same as with blocklet level. why adding extra non pruned blocklets should be added to the list? Any use case for this check? --- |
In reply to this post by qiuchenjian-2
Github user xuchuanyin commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2565#discussion_r206040947 --- Diff: hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java --- @@ -492,6 +493,26 @@ protected Expression getFilterPredicates(Configuration configuration) { return prunedBlocklets; } + private List<ExtendedBlocklet> intersectFilteredBlocklets(CarbonTable carbonTable, + List<ExtendedBlocklet> previousDataMapPrunedBlocklets, + List<ExtendedBlocklet> otherDataMapPrunedBlocklets) { + List<ExtendedBlocklet> prunedBlocklets = null; + if (BlockletDataMapUtil + .isCacheLevelBlock(carbonTable, BlockletDataMapFactory.CACHE_LEVEL_BLOCKLET)) { + prunedBlocklets = new ArrayList<>(otherDataMapPrunedBlocklets); + // add blocklets from previous dataMap that are not filtered by other dataMaps + for (ExtendedBlocklet previousBlocklet : previousDataMapPrunedBlocklets) { + if (!otherDataMapPrunedBlocklets.contains(previousBlocklet)) { + prunedBlocklets.add(previousBlocklet); --- End diff -- Fixed as we talked --- |
In reply to this post by qiuchenjian-2
Github user xuchuanyin commented on the issue:
https://github.com/apache/carbondata/pull/2565 As for the incorrect explain output, I raised a jira 2797 to track this. --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2565 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/7602/ --- |
In reply to this post by qiuchenjian-2
Github user chenliang613 commented on the issue:
https://github.com/apache/carbondata/pull/2565 retest this please --- |
In reply to this post by qiuchenjian-2
Github user chenliang613 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2565#discussion_r206178641 --- Diff: core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMapFactory.java --- @@ -71,7 +71,7 @@ /** * variable for cache level BLOCKLET */ - private static final String CACHE_LEVEL_BLOCKLET = "BLOCKLET"; + public static final String CACHE_LEVEL_BLOCKLET = "BLOCKLET"; --- End diff -- why changes to public --- |
In reply to this post by qiuchenjian-2
Github user chenliang613 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2565#discussion_r206178902 --- Diff: core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockDataMap.java --- @@ -318,13 +318,22 @@ private DataMapRowImpl loadBlockMetaInfo(CarbonRowSchema[] taskSummarySchema, blockMinValues, blockMaxValues); blockletCountInEachBlock.add(totalBlockletsInOneBlock); } - byte[] blockletCount = ArrayUtils - .toPrimitive(blockletCountInEachBlock.toArray(new Byte[blockletCountInEachBlock.size()])); + byte[] blockletCount = convertRowCountFromShortToByteArray(blockletCountInEachBlock); // blocklet count index is the last index summaryRow.setByteArray(blockletCount, taskSummarySchema.length - 1); return summaryRow; } + private byte[] convertRowCountFromShortToByteArray(List<Short> blockletCountInEachBlock) { --- End diff -- why need to do the convert ? --- |
In reply to this post by qiuchenjian-2
Github user chenliang613 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2565#discussion_r206181478 --- Diff: integration/spark-common-test/src/test/scala/org/apache/carbondata/datamap/lucene/LuceneFineGrainDataMapSuite.scala --- @@ -371,7 +371,7 @@ class LuceneFineGrainDataMapSuite extends QueryTest with BeforeAndAfterAll { """ | CREATE TABLE datamap_test_table(id INT, name STRING, city STRING, age INT) | STORED BY 'carbondata' - | TBLPROPERTIES('SORT_COLUMNS'='city,name', 'SORT_SCOPE'='GLOBAL_SORT') + | TBLPROPERTIES('SORT_COLUMNS'='city,name', 'SORT_SCOPE'='GLOBAL_SORT', 'CACHE_LEVEL'='BLOCKLET') --- End diff -- why need change "CACHE_LEVEL" to "BLOCKLET" --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2565 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/7621/ --- |
In reply to this post by qiuchenjian-2
Github user brijoobopanna commented on the issue:
https://github.com/apache/carbondata/pull/2565 retest this please --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2565 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/7640/ --- |
In reply to this post by qiuchenjian-2
Github user xuchuanyin commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2565#discussion_r206367396 --- Diff: core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMapFactory.java --- @@ -71,7 +71,7 @@ /** * variable for cache level BLOCKLET */ - private static final String CACHE_LEVEL_BLOCKLET = "BLOCKLET"; + public static final String CACHE_LEVEL_BLOCKLET = "BLOCKLET"; --- End diff -- Because this member needs to be accessed outside this class. Currently in `CarbonInputFormat` we need to use this variable to know the current cache level. --- |
In reply to this post by qiuchenjian-2
Github user xuchuanyin commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2565#discussion_r206367642 --- Diff: core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockDataMap.java --- @@ -318,13 +318,22 @@ private DataMapRowImpl loadBlockMetaInfo(CarbonRowSchema[] taskSummarySchema, blockMinValues, blockMaxValues); blockletCountInEachBlock.add(totalBlockletsInOneBlock); } - byte[] blockletCount = ArrayUtils - .toPrimitive(blockletCountInEachBlock.toArray(new Byte[blockletCountInEachBlock.size()])); + byte[] blockletCount = convertRowCountFromShortToByteArray(blockletCountInEachBlock); // blocklet count index is the last index summaryRow.setByteArray(blockletCount, taskSummarySchema.length - 1); return summaryRow; } + private byte[] convertRowCountFromShortToByteArray(List<Short> blockletCountInEachBlock) { --- End diff -- because we are using offheap store, which needs to store the bytes. --- |
In reply to this post by qiuchenjian-2
Github user xuchuanyin commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2565#discussion_r206367929 --- Diff: integration/spark-common-test/src/test/scala/org/apache/carbondata/datamap/lucene/LuceneFineGrainDataMapSuite.scala --- @@ -371,7 +371,7 @@ class LuceneFineGrainDataMapSuite extends QueryTest with BeforeAndAfterAll { """ | CREATE TABLE datamap_test_table(id INT, name STRING, city STRING, age INT) | STORED BY 'carbondata' - | TBLPROPERTIES('SORT_COLUMNS'='city,name', 'SORT_SCOPE'='GLOBAL_SORT') + | TBLPROPERTIES('SORT_COLUMNS'='city,name', 'SORT_SCOPE'='GLOBAL_SORT', 'CACHE_LEVEL'='BLOCKLET') --- End diff -- By default the cache_level is BLOCK which may affect the pruning info. In some test cases in this file, they assert on the content of pruning info. So here, I just change the cache_level to BLOCKLET, so that I do not to modify the assertion. --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2565 Build Success with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/6365/ --- |
In reply to this post by qiuchenjian-2
Github user manishgupta88 commented on the issue:
https://github.com/apache/carbondata/pull/2565 LGTM --- |
In reply to this post by qiuchenjian-2
|
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2565 Build Success with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/6382/ --- |
Free forum by Nabble | Edit this page |