GitHub user jackylk opened a pull request:
https://github.com/apache/carbondata/pull/1237 Clean up CarbonFactDataHandlerColumnar Clean up CarbonFactDataHandlerColumnar and Writer to prepare for DataMap write logic You can merge this pull request into a Git repository by running: $ git pull https://github.com/jackylk/incubator-carbondata isadded Alternatively you can review and apply these changes as the patch at: https://github.com/apache/carbondata/pull/1237.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #1237 ---- commit f07f15d5770b62965d46c96e3ecc084de36e15ce Author: Jacky Li <[hidden email]> Date: 2017-08-04T12:59:18Z remove unnecessary logic commit bc0882ab2aeae804d1abd3b61d8125ebc9d2b15d Author: Jacky Li <[hidden email]> Date: 2017-08-04T18:40:52Z refactory commit 9b2dfee73c07801dafe72ba118187895ccab3a88 Author: Jacky Li <[hidden email]> Date: 2017-08-04T13:33:57Z clean up writer ---- --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1237#discussion_r131462140 --- Diff: processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java --- @@ -471,124 +450,57 @@ public void closeHandler() throws CarbonDataWriterException { */ private void setWritingConfiguration() throws CarbonDataWriterException { // get blocklet size - this.blockletSize = Integer.parseInt(CarbonProperties.getInstance() + this.pageSizeThreshold = Integer.parseInt(CarbonProperties.getInstance() .getProperty(CarbonCommonConstants.BLOCKLET_SIZE, CarbonCommonConstants.BLOCKLET_SIZE_DEFAULT_VAL)); if (version == ColumnarFormatVersion.V3) { - this.blockletSize = Integer.parseInt(CarbonProperties.getInstance() + this.pageSizeThreshold = Integer.parseInt(CarbonProperties.getInstance() .getProperty(CarbonV3DataFormatConstants.NUMBER_OF_ROWS_PER_BLOCKLET_COLUMN_PAGE, CarbonV3DataFormatConstants.NUMBER_OF_ROWS_PER_BLOCKLET_COLUMN_PAGE_DEFAULT)); } - LOGGER.info("Number of rows per column blocklet " + blockletSize); - dataRows = new ArrayList<>(this.blockletSize); - int dimSet = - Integer.parseInt(CarbonCommonConstants.DIMENSION_SPLIT_VALUE_IN_COLUMNAR_DEFAULTVALUE); - // if atleast one dimension is present then initialize column splitter otherwise null - int noOfColStore = colGrpModel.getNoOfColumnStore(); - int[] keyBlockSize = new int[noOfColStore + getExpandedComplexColsCount()]; - + LOGGER.info("Number of rows per column blocklet " + pageSizeThreshold); + dataRows = new ArrayList<>(this.pageSizeThreshold); if (model.getDimLens().length > 0) { //Using Variable length variable split generator //This will help in splitting mdkey to columns. variable split is required because all // columns which are part of //row store will be in single column store //e.g if {0,1,2,3,4,5} is dimension and {0,1,2) is row store dimension //than below splitter will return column as {0,1,2}{3}{4}{5} - this.columnarSplitter = model.getSegmentProperties().getFixedLengthKeySplitter(); - System.arraycopy(columnarSplitter.getBlockKeySize(), 0, keyBlockSize, 0, noOfColStore); + ColumnarSplitter columnarSplitter = model.getSegmentProperties().getFixedLengthKeySplitter(); this.keyBlockHolder = - new CarbonKeyBlockHolder[this.columnarSplitter.getBlockKeySize().length]; + new CarbonKeyBlockHolder[columnarSplitter.getBlockKeySize().length]; } else { this.keyBlockHolder = new CarbonKeyBlockHolder[0]; } for (int i = 0; i < keyBlockHolder.length; i++) { - this.keyBlockHolder[i] = new CarbonKeyBlockHolder(blockletSize); + this.keyBlockHolder[i] = new CarbonKeyBlockHolder(pageSizeThreshold); this.keyBlockHolder[i].resetCounter(); } - // agg type - List<Integer> otherMeasureIndexList = - new ArrayList<Integer>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE); - List<Integer> customMeasureIndexList = - new ArrayList<Integer>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE); - DataType[] type = model.getMeasureDataType(); - for (int j = 0; j < type.length; j++) { - if (type[j] != DataType.BYTE && type[j] != DataType.DECIMAL) { - otherMeasureIndexList.add(j); - } else { - customMeasureIndexList.add(j); - } - } - - int[] otherMeasureIndex = new int[otherMeasureIndexList.size()]; - int[] customMeasureIndex = new int[customMeasureIndexList.size()]; - for (int i = 0; i < otherMeasureIndex.length; i++) { - otherMeasureIndex[i] = otherMeasureIndexList.get(i); - } - for (int i = 0; i < customMeasureIndex.length; i++) { - customMeasureIndex[i] = customMeasureIndexList.get(i); - } setComplexMapSurrogateIndex(model.getDimensionCount()); - int[] blockKeySize = getBlockKeySizeWithComplexTypes(new MultiDimKeyVarLengthEquiSplitGenerator( - CarbonUtil.getIncrementedCardinalityFullyFilled(model.getDimLens().clone()), (byte) dimSet) - .getBlockKeySize()); - System.arraycopy(blockKeySize, noOfColStore, keyBlockSize, noOfColStore, - blockKeySize.length - noOfColStore); - this.dataWriter = getFactDataWriter(keyBlockSize); + this.dataWriter = getFactDataWriter(); this.dataWriter.setIsNoDictionary(isNoDictionary); // initialize the channel; this.dataWriter.initializeWriter(); - //initializeColGrpMinMax(); - } - - /** - * This method combines primitive dimensions with complex metadata columns - * - * @param primitiveBlockKeySize - * @return all dimensions cardinality including complex dimension metadata column - */ - private int[] getBlockKeySizeWithComplexTypes(int[] primitiveBlockKeySize) { - int allColsCount = getExpandedComplexColsCount(); - int[] blockKeySizeWithComplexTypes = - new int[this.colGrpModel.getNoOfColumnStore() + allColsCount]; - - List<Integer> blockKeySizeWithComplex = - new ArrayList<Integer>(blockKeySizeWithComplexTypes.length); - int dictDimensionCount = model.getDimensionCount(); - for (int i = 0; i < dictDimensionCount; i++) { - GenericDataType complexDataType = model.getComplexIndexMap().get(i); - if (complexDataType != null) { - complexDataType.fillBlockKeySize(blockKeySizeWithComplex, primitiveBlockKeySize); - } else { - blockKeySizeWithComplex.add(primitiveBlockKeySize[i]); - } - } - for (int i = 0; i < blockKeySizeWithComplexTypes.length; i++) { - blockKeySizeWithComplexTypes[i] = blockKeySizeWithComplex.get(i); - } - - return blockKeySizeWithComplexTypes; } /** * Below method will be used to get the fact data writer instance * - * @param keyBlockSize * @return data writer instance */ - private CarbonFactDataWriter<?> getFactDataWriter(int[] keyBlockSize) { --- End diff -- keyBlockSize is not used --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1237#discussion_r131462351 --- Diff: processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java --- @@ -644,10 +556,9 @@ private void resetBlockletProcessingCount() { } /** - * This class will hold the holder objects and manage producer and consumer for reading - * and writing the blocklet data + * This class will hold the encoded table pages. */ - private final class BlockletDataHolder { + private final class TablePageList { --- End diff -- This class only holds some table pages, size of which equals to number of cores, but not blocklet. So rename it. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1237#discussion_r131462552 --- Diff: processing/src/main/java/org/apache/carbondata/processing/store/writer/v3/CarbonFactDataWriterImplV3.java --- @@ -106,23 +106,13 @@ public CarbonFactDataWriterImplV3(CarbonDataWriterVo dataWriterVo) { throws CarbonDataWriterException { // condition for writting all the pages if (!encodedTablePage.isLastPage()) { - boolean isAdded = false; // check if size more than blocklet size then write the page to file if (dataWriterHolder.getSize() + encodedTablePage.getEncodedSize() >= blockletSize) { - // if one page size is more than blocklet size - if (dataWriterHolder.getEncodedTablePages().size() == 0) { - isAdded = true; - dataWriterHolder.addPage(encodedTablePage); - } - LOGGER.info("Number of Pages for blocklet is: " + dataWriterHolder.getNumberOfPagesAdded() + " :Rows Added: " + dataWriterHolder.getTotalRows()); // write the data writeBlockletToFile(); } - if (!isAdded) { - dataWriterHolder.addPage(encodedTablePage); --- End diff -- For both cases (isAdded true or false), it will invoke `dataWriterHolder.addPage(encodedTablePage);`, so removing it. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on the issue:
https://github.com/apache/carbondata/pull/1237 Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder/3380/ --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on the issue:
https://github.com/apache/carbondata/pull/1237 Build Failed with Spark 1.6, Please check CI http://144.76.159.231:8080/job/ApacheCarbonPRBuilder/783/ --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on the issue:
https://github.com/apache/carbondata/pull/1237 SDV Build Failed with Spark 2.1, Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/110/ --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on the issue:
https://github.com/apache/carbondata/pull/1237 Build Success with Spark 1.6, Please check CI http://144.76.159.231:8080/job/ApacheCarbonPRBuilder/784/ --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on the issue:
https://github.com/apache/carbondata/pull/1237 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder/3381/ --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on the issue:
https://github.com/apache/carbondata/pull/1237 SDV Build Success with Spark 2.1, Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/111/ --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on the issue:
https://github.com/apache/carbondata/pull/1237 SDV Build Failed with Spark 2.1, Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/112/ --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on the issue:
https://github.com/apache/carbondata/pull/1237 Build Failed with Spark 1.6, Please check CI http://144.76.159.231:8080/job/ApacheCarbonPRBuilder/785/ --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on the issue:
https://github.com/apache/carbondata/pull/1237 Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder/3382/ --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user jackylk closed the pull request at:
https://github.com/apache/carbondata/pull/1237 --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
Free forum by Nabble | Edit this page |