Apache CarbonData Dev Mailing List archive › Apache CarbonData JIRA issues

[GitHub] carbondata pull request #1237: Clean up CarbonFactDataHandlerColumnar

Classic

List

Threaded

14 messages Options

qiuchenjian-2

[GitHub] carbondata pull request #1237: Clean up CarbonFactDataHandlerColumnar

GitHub user jackylk opened a pull request:

https://github.com/apache/carbondata/pull/1237

Clean up CarbonFactDataHandlerColumnar

Clean up CarbonFactDataHandlerColumnar and Writer to prepare for DataMap write logic

You can merge this pull request into a Git repository by running:

$ git pull https://github.com/jackylk/incubator-carbondata isadded

Alternatively you can review and apply these changes as the patch at:

https://github.com/apache/carbondata/pull/1237.patch

To close this pull request, make a commit to your master/trunk branch
with (at least) the following in the commit message:

This closes #1237

----
commit f07f15d5770b62965d46c96e3ecc084de36e15ce
Author: Jacky Li <[hidden email]>
Date: 2017-08-04T12:59:18Z

remove unnecessary logic

commit bc0882ab2aeae804d1abd3b61d8125ebc9d2b15d
Author: Jacky Li <[hidden email]>
Date: 2017-08-04T18:40:52Z

refactory

commit 9b2dfee73c07801dafe72ba118187895ccab3a88
Author: Jacky Li <[hidden email]>
Date: 2017-08-04T13:33:57Z

clean up writer

----

---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [hidden email] or file a JIRA ticket
with INFRA.
---

qiuchenjian-2

[GitHub] carbondata pull request #1237: Clean up CarbonFactDataHandlerColumnar

Github user jackylk commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/1237#discussion_r131462140

--- Diff: processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java ---
@@ -471,124 +450,57 @@ public void closeHandler() throws CarbonDataWriterException {
*/
private void setWritingConfiguration() throws CarbonDataWriterException {
// get blocklet size
- this.blockletSize = Integer.parseInt(CarbonProperties.getInstance()
+ this.pageSizeThreshold = Integer.parseInt(CarbonProperties.getInstance()
.getProperty(CarbonCommonConstants.BLOCKLET_SIZE,
CarbonCommonConstants.BLOCKLET_SIZE_DEFAULT_VAL));
if (version == ColumnarFormatVersion.V3) {
- this.blockletSize = Integer.parseInt(CarbonProperties.getInstance()
+ this.pageSizeThreshold = Integer.parseInt(CarbonProperties.getInstance()
.getProperty(CarbonV3DataFormatConstants.NUMBER_OF_ROWS_PER_BLOCKLET_COLUMN_PAGE,
CarbonV3DataFormatConstants.NUMBER_OF_ROWS_PER_BLOCKLET_COLUMN_PAGE_DEFAULT));
}
- LOGGER.info("Number of rows per column blocklet " + blockletSize);
- dataRows = new ArrayList<>(this.blockletSize);
- int dimSet =
- Integer.parseInt(CarbonCommonConstants.DIMENSION_SPLIT_VALUE_IN_COLUMNAR_DEFAULTVALUE);
- // if atleast one dimension is present then initialize column splitter otherwise null
- int noOfColStore = colGrpModel.getNoOfColumnStore();
- int[] keyBlockSize = new int[noOfColStore + getExpandedComplexColsCount()];
-
+ LOGGER.info("Number of rows per column blocklet " + pageSizeThreshold);
+ dataRows = new ArrayList<>(this.pageSizeThreshold);
if (model.getDimLens().length > 0) {
//Using Variable length variable split generator
//This will help in splitting mdkey to columns. variable split is required because all
// columns which are part of
//row store will be in single column store
//e.g if {0,1,2,3,4,5} is dimension and {0,1,2) is row store dimension
//than below splitter will return column as {0,1,2}{3}{4}{5}
- this.columnarSplitter = model.getSegmentProperties().getFixedLengthKeySplitter();
- System.arraycopy(columnarSplitter.getBlockKeySize(), 0, keyBlockSize, 0, noOfColStore);
+ ColumnarSplitter columnarSplitter = model.getSegmentProperties().getFixedLengthKeySplitter();
this.keyBlockHolder =
- new CarbonKeyBlockHolder[this.columnarSplitter.getBlockKeySize().length];
+ new CarbonKeyBlockHolder[columnarSplitter.getBlockKeySize().length];
} else {
this.keyBlockHolder = new CarbonKeyBlockHolder[0];
}

for (int i = 0; i < keyBlockHolder.length; i++) {
- this.keyBlockHolder[i] = new CarbonKeyBlockHolder(blockletSize);
+ this.keyBlockHolder[i] = new CarbonKeyBlockHolder(pageSizeThreshold);
this.keyBlockHolder[i].resetCounter();
}

- // agg type
- List<Integer> otherMeasureIndexList =
- new ArrayList<Integer>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
- List<Integer> customMeasureIndexList =
- new ArrayList<Integer>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
- DataType[] type = model.getMeasureDataType();
- for (int j = 0; j < type.length; j++) {
- if (type[j] != DataType.BYTE && type[j] != DataType.DECIMAL) {
- otherMeasureIndexList.add(j);
- } else {
- customMeasureIndexList.add(j);
- }
- }
-
- int[] otherMeasureIndex = new int[otherMeasureIndexList.size()];
- int[] customMeasureIndex = new int[customMeasureIndexList.size()];
- for (int i = 0; i < otherMeasureIndex.length; i++) {
- otherMeasureIndex[i] = otherMeasureIndexList.get(i);
- }
- for (int i = 0; i < customMeasureIndex.length; i++) {
- customMeasureIndex[i] = customMeasureIndexList.get(i);
- }
setComplexMapSurrogateIndex(model.getDimensionCount());
- int[] blockKeySize = getBlockKeySizeWithComplexTypes(new MultiDimKeyVarLengthEquiSplitGenerator(
- CarbonUtil.getIncrementedCardinalityFullyFilled(model.getDimLens().clone()), (byte) dimSet)
- .getBlockKeySize());
- System.arraycopy(blockKeySize, noOfColStore, keyBlockSize, noOfColStore,
- blockKeySize.length - noOfColStore);
- this.dataWriter = getFactDataWriter(keyBlockSize);
+ this.dataWriter = getFactDataWriter();
this.dataWriter.setIsNoDictionary(isNoDictionary);
// initialize the channel;
this.dataWriter.initializeWriter();
- //initializeColGrpMinMax();
- }
-
- /**
- * This method combines primitive dimensions with complex metadata columns
- *
- * @param primitiveBlockKeySize
- * @return all dimensions cardinality including complex dimension metadata column
- */
- private int[] getBlockKeySizeWithComplexTypes(int[] primitiveBlockKeySize) {
- int allColsCount = getExpandedComplexColsCount();
- int[] blockKeySizeWithComplexTypes =
- new int[this.colGrpModel.getNoOfColumnStore() + allColsCount];
-
- List<Integer> blockKeySizeWithComplex =
- new ArrayList<Integer>(blockKeySizeWithComplexTypes.length);
- int dictDimensionCount = model.getDimensionCount();
- for (int i = 0; i < dictDimensionCount; i++) {
- GenericDataType complexDataType = model.getComplexIndexMap().get(i);
- if (complexDataType != null) {
- complexDataType.fillBlockKeySize(blockKeySizeWithComplex, primitiveBlockKeySize);
- } else {
- blockKeySizeWithComplex.add(primitiveBlockKeySize[i]);
- }
- }
- for (int i = 0; i < blockKeySizeWithComplexTypes.length; i++) {
- blockKeySizeWithComplexTypes[i] = blockKeySizeWithComplex.get(i);
- }
-
- return blockKeySizeWithComplexTypes;
}

/**
* Below method will be used to get the fact data writer instance
*
- * @param keyBlockSize
* @return data writer instance
*/
- private CarbonFactDataWriter<?> getFactDataWriter(int[] keyBlockSize) {
--- End diff --

keyBlockSize is not used

---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [hidden email] or file a JIRA ticket
with INFRA.
---

qiuchenjian-2