GitHub user dhatchayani opened a pull request:
https://github.com/apache/carbondata/pull/2134 [CARBONDATA-2310] Refactored code to improve Distributable interface Refactored code to improve Distributable interface - [ ] Any interfaces changed? - [ ] Any backward compatibility impacted? - [ ] Document update required? - [x] Testing done Manual Testing - [ ] For large changes, please consider breaking it into sub-tasks under an umbrella JIRA. You can merge this pull request into a Git repository by running: $ git pull https://github.com/dhatchayani/carbondata distributable Alternatively you can review and apply these changes as the patch at: https://github.com/apache/carbondata/pull/2134.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #2134 ---- commit dfad93daa4c9cf549f5a9d0d4362024354964454 Author: dhatchayani <dhatcha.official@...> Date: 2018-04-03T05:49:43Z [CARBONDATA-2310] Refactored code to improve Distributable interface ---- --- |
Github user manishgupta88 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2134#discussion_r178720946 --- Diff: core/src/main/java/org/apache/carbondata/core/cache/dictionary/ReverseDictionaryCache.java --- @@ -168,6 +168,10 @@ public ReverseDictionaryCache(CarbonLRUCache carbonLRUCache) { CacheType.REVERSE_DICTIONARY)); } + @Override public void put(DictionaryColumnUniqueIdentifier key, Dictionary value) { + + } + --- End diff -- override the method in abstract class and in the implementation throw UnsupportedException for both reverse and forward cache --- |
In reply to this post by qiuchenjian-2
Github user manishgupta88 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2134#discussion_r178721496 --- Diff: hadoop/src/main/java/org/apache/carbondata/hadoop/api/DataMapJob.java --- @@ -19,15 +19,21 @@ import java.io.Serializable; import java.util.List; +import org.apache.carbondata.core.datamap.dev.DataMap; import org.apache.carbondata.core.indexstore.ExtendedBlocklet; +import org.apache.carbondata.core.metadata.schema.table.CarbonTable; import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf; +import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; + /** * Distributable datamap job to execute the #DistributableDataMapFormat in cluster. it prunes the * datamaps distributably and returns the final blocklet list */ public interface DataMapJob extends Serializable { + List<DataMap> execute(FileInputFormat<Void, DataMap> dataMapFormat, CarbonTable carbonTable); + --- End diff -- Change the argument order, first should be carbonTable --- |
In reply to this post by qiuchenjian-2
Github user manishgupta88 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2134#discussion_r178721874 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/BlockIndexStore.java --- @@ -229,6 +229,10 @@ private String getLruCacheKey(AbsoluteTableIdentifier absoluteTableIdentifier, .remove(getLruCacheKey(tableBlockUniqueIdentifier.getAbsoluteTableIdentifier(), blockInfo)); } + @Override public void put(TableBlockUniqueIdentifier key, AbstractIndex value) { + + } + --- End diff -- throw unsupportedException --- |
In reply to this post by qiuchenjian-2
Github user manishgupta88 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2134#discussion_r178721900 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/SegmentTaskIndexStore.java --- @@ -140,6 +140,10 @@ public SegmentTaskIndexWrapper get(TableSegmentUniqueIdentifier tableSegmentUniq lruCache.remove(tableSegmentUniqueIdentifier.getUniqueTableSegmentIdentifier()); } + @Override public void put(TableSegmentUniqueIdentifier key, SegmentTaskIndexWrapper value) { + + } + --- End diff -- throw unsupportedException --- |
In reply to this post by qiuchenjian-2
Github user manishgupta88 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2134#discussion_r178722797 --- Diff: core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDataMapIndexStore.java --- @@ -206,6 +206,31 @@ public void invalidate(TableBlockIndexUniqueIdentifier tableSegmentUniqueIdentif lruCache.remove(tableSegmentUniqueIdentifier.getUniqueTableSegmentIdentifier()); } + @Override public void put(TableBlockIndexUniqueIdentifier tableBlockIndexUniqueIdentifier, + BlockletDataMap blockletDataMap) throws IOException, MemoryException { + String uniqueTableSegmentIdentifier = + tableBlockIndexUniqueIdentifier.getUniqueTableSegmentIdentifier(); + Object lock = segmentLockMap.get(uniqueTableSegmentIdentifier); + if (lock == null) { + lock = addAndGetSegmentLock(uniqueTableSegmentIdentifier); + } + if (null == getIfPresent(tableBlockIndexUniqueIdentifier)) { --- End diff -- add a comment why getIfPresent Check is required - Reason: As dataMap will use unsafe memory, it is not recommended to overwrite an existing entry as in that case clearing unsafe memory need to be taken card. If at all datamap entry in the cache need to be overwritten then use the invalidate interface and then use the put interface --- |
In reply to this post by qiuchenjian-2
Github user manishgupta88 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2134#discussion_r178723089 --- Diff: core/src/main/java/org/apache/carbondata/core/indexstore/UnsafeMemoryDMStore.java --- @@ -97,12 +119,17 @@ public void addIndexRowToUnsafe(DataMapRow indexRow) throws MemoryException { int rowSize = indexRow.getTotalSizeInBytes(); // Check whether allocated memory is sufficient or not. ensureSize(rowSize); - int pointer = runningLength; + if (addRowToUnsafe) { + int pointer = runningLength; - for (int i = 0; i < schema.length; i++) { - addToUnsafe(schema[i], indexRow, i); + for (int i = 0; i < schema.length; i++) { + addToUnsafe(schema[i], indexRow, i); + } + pointers[rowCount++] = pointer; + } else { + // add dataMap rows to in memory + addDataMapRow(indexRow); --- End diff -- Rename the method name to addDataMapRowToMemory --- |
In reply to this post by qiuchenjian-2
Github user manishgupta88 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2134#discussion_r178723320 --- Diff: core/src/main/java/org/apache/carbondata/core/indexstore/UnsafeMemoryDMStore.java --- @@ -177,10 +204,45 @@ public UnsafeDataMapRow getUnsafeRow(int index) { return new UnsafeDataMapRow(schema, memoryBlock, pointers[index]); } - public void finishWriting() throws MemoryException { + /** + * Add the index row to dataMapRows, basically to in memory. + * + * @param indexRow + * @return + */ + private void addDataMapRow(DataMapRow indexRow) throws MemoryException { + dataMapRows.add(indexRow); + } + + /** + * This method will write all the dataMapRows to unsafe + * + * @throws MemoryException + * @throws IOException + */ + private void adddataMapRowToUnsafe() throws MemoryException, IOException { --- End diff -- Correct the typo in method name and rename to addDataMapRowsToUnsafe --- |
In reply to this post by qiuchenjian-2
Github user manishgupta88 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2134#discussion_r178723434 --- Diff: core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java --- @@ -77,6 +78,11 @@ */ public class BlockletDataMap implements DataMap, Cacheable { + /** + * default serial version ID. + */ + private static final long serialVersionUID = 1L; --- End diff -- generate proper serial version UID --- |
In reply to this post by qiuchenjian-2
Github user manishgupta88 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2134#discussion_r178723563 --- Diff: core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java --- @@ -124,10 +130,12 @@ private UnsafeMemoryDMStore unsafeMemorySummaryDMStore; - private SegmentProperties segmentProperties; + private transient SegmentProperties segmentProperties; --- End diff -- Add reason why it is made transient Reason: As it is a heavy object it is not recommended to serialize this object --- |
In reply to this post by qiuchenjian-2
Github user manishgupta88 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2134#discussion_r178723724 --- Diff: core/src/main/java/org/apache/carbondata/core/indexstore/row/UnsafeDataMapRow.java --- @@ -30,7 +30,12 @@ */ public class UnsafeDataMapRow extends DataMapRow { - private MemoryBlock block; + /** + * default serial version ID. + */ + private static final long serialVersionUID = 1L; + + private transient MemoryBlock block; --- End diff -- Add reason for making it transient Reason: As it is an unsafe memory block it is not recommended to serialize --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on the issue:
https://github.com/apache/carbondata/pull/2134 SDV Build Fail , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/4274/ --- |
In reply to this post by qiuchenjian-2
Github user manishgupta88 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2134#discussion_r178724054 --- Diff: core/src/main/java/org/apache/carbondata/core/indexstore/UnsafeMemoryDMStore.java --- @@ -32,9 +37,14 @@ /** * Store the data map row @{@link DataMapRow} data to unsafe. */ -public class UnsafeMemoryDMStore { +public class UnsafeMemoryDMStore implements Serializable { + + /** + * default serial version ID. + */ + private static final long serialVersionUID = 1L; - private MemoryBlock memoryBlock; + private transient MemoryBlock memoryBlock; --- End diff -- Add reason for making it transient --- |
In reply to this post by qiuchenjian-2
Github user manishgupta88 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2134#discussion_r178724390 --- Diff: core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMapDistributable.java --- @@ -38,4 +38,5 @@ public BlockletDataMapDistributable(String indexFilePath) { public String getFilePath() { return filePath; } + --- End diff -- remove empty line --- |
In reply to this post by qiuchenjian-2
Github user manishgupta88 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2134#discussion_r178724959 --- Diff: core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMapFactory.java --- @@ -156,15 +167,18 @@ private ExtendedBlocklet getExtendedBlocklet(List<TableBlockIndexUniqueIdentifie @Override public List<DataMapDistributable> toDistributable(Segment segment) { List<DataMapDistributable> distributables = new ArrayList<>(); + Map<String, String> indexFiles = null; try { CarbonFile[] carbonIndexFiles; if (segment.getSegmentFileName() == null) { carbonIndexFiles = SegmentIndexFileStore.getCarbonIndexFiles( CarbonTablePath.getSegmentPath(identifier.getTablePath(), segment.getSegmentNo())); } else { + // TODO: Buggy code, this code will not work as we need to list only the + // physically existing files --- End diff -- Remove this TODO and handle partition case --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2134 Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/4771/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2134 Build Success with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/3543/ --- |
In reply to this post by qiuchenjian-2
Github user manishgupta88 commented on the issue:
https://github.com/apache/carbondata/pull/2134 Add few UT's to test this code --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on the issue:
https://github.com/apache/carbondata/pull/2134 SDV Build Fail , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/4276/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2134 Build Success with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/3545/ --- |
Free forum by Nabble | Edit this page |