GitHub user sounakr opened a pull request:
https://github.com/apache/incubator-carbondata/pull/670 [CARBONDATA-792] Range Filter Optimization Problem : LessThan and GreaterThan Expression are merged into a single RANGE expression. Analysis : If LessThan and GreaterThan Expression on a same column given as filter predicates, then each expression is evaluated independently and finally AND operator is applied on individual results to get the final output. Fix : If the LessThan and GreaterThan Expression values can form a Range then it is Converted into a RANGE expression and results are evaluated through single expression. I.e. if the value lies within Range then choosen. This fix will optimize Block Prunning and Reduce Scanning time in case queries have range expressions in filter. You can merge this pull request into a Git repository by running: $ git pull https://github.com/sounakr/incubator-carbondata range1 Alternatively you can review and apply these changes as the patch at: https://github.com/apache/incubator-carbondata/pull/670.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #670 ---- commit e77cc5a84e9d6313383cb6347f78846f3319442e Author: sounakr <[hidden email]> Date: 2017-03-15T16:06:16Z [CARBONDATA-792] Range Filter Optimization ---- --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
Github user CarbonDataQA commented on the issue:
https://github.com/apache/incubator-carbondata/pull/670 Build Success with Spark 1.6.2, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder/1222/ --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user gvramana commented on a diff in the pull request:
https://github.com/apache/incubator-carbondata/pull/670#discussion_r106798401 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/expression/BinaryExpression.java --- @@ -38,4 +38,17 @@ public Expression getRight() { return right; } + @Override public void setChildren(Expression oldExpr, Expression newExpr) { --- End diff -- Change this function name to findAndSetChild. Also implement this method in **all** non leaf nodes, Like ListExpression --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user gvramana commented on a diff in the pull request:
https://github.com/apache/incubator-carbondata/pull/670#discussion_r106799973 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java --- @@ -0,0 +1,557 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.scan.filter.executer; + +import java.io.IOException; +import java.util.BitSet; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datastore.block.SegmentProperties; +import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk; +import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk; +import org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionDataChunk; +import org.apache.carbondata.core.datastore.chunk.impl.VariableLengthDimensionDataChunk; +import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; +import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension; +import org.apache.carbondata.core.scan.expression.Expression; +import org.apache.carbondata.core.scan.expression.conditional.GreaterThanEqualToExpression; +import org.apache.carbondata.core.scan.expression.conditional.GreaterThanExpression; +import org.apache.carbondata.core.scan.expression.conditional.LessThanEqualToExpression; +import org.apache.carbondata.core.scan.expression.conditional.LessThanExpression; +import org.apache.carbondata.core.scan.expression.exception.FilterUnsupportedException; +import org.apache.carbondata.core.scan.filter.FilterUtil; +import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo; +import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.MeasureColumnResolvedFilterInfo; +import org.apache.carbondata.core.scan.processor.BlocksChunkHolder; +import org.apache.carbondata.core.util.BitSetGroup; +import org.apache.carbondata.core.util.ByteUtil; +import org.apache.carbondata.core.util.CarbonUtil; + +public class RangeValueFilterExecuterImpl extends ValueBasedFilterExecuterImpl { --- End diff -- add Class header --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user gvramana commented on a diff in the pull request:
https://github.com/apache/incubator-carbondata/pull/670#discussion_r106798507 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/expression/Expression.java --- @@ -42,6 +42,8 @@ public abstract ExpressionResult evaluate(RowIntf value) return children; } + public abstract void setChildren(Expression oldExpr, Expression newExpr); --- End diff -- write method comment --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user gvramana commented on a diff in the pull request:
https://github.com/apache/incubator-carbondata/pull/670#discussion_r106799796 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java --- @@ -0,0 +1,557 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.scan.filter.executer; + +import java.io.IOException; +import java.util.BitSet; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datastore.block.SegmentProperties; +import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk; +import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk; +import org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionDataChunk; +import org.apache.carbondata.core.datastore.chunk.impl.VariableLengthDimensionDataChunk; +import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; +import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension; +import org.apache.carbondata.core.scan.expression.Expression; +import org.apache.carbondata.core.scan.expression.conditional.GreaterThanEqualToExpression; +import org.apache.carbondata.core.scan.expression.conditional.GreaterThanExpression; +import org.apache.carbondata.core.scan.expression.conditional.LessThanEqualToExpression; +import org.apache.carbondata.core.scan.expression.conditional.LessThanExpression; +import org.apache.carbondata.core.scan.expression.exception.FilterUnsupportedException; +import org.apache.carbondata.core.scan.filter.FilterUtil; +import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo; +import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.MeasureColumnResolvedFilterInfo; +import org.apache.carbondata.core.scan.processor.BlocksChunkHolder; +import org.apache.carbondata.core.util.BitSetGroup; +import org.apache.carbondata.core.util.ByteUtil; +import org.apache.carbondata.core.util.CarbonUtil; + +public class RangeValueFilterExecuterImpl extends ValueBasedFilterExecuterImpl { + + private DimColumnResolvedFilterInfo dimColEvaluatorInfo; + private MeasureColumnResolvedFilterInfo msrColEvalutorInfo; + private AbsoluteTableIdentifier tableIdentifier; + private Expression exp; + private byte[][] filterRangesValues; + private SegmentProperties segmentProperties; + private boolean isDefaultValuePresentInFilter; + /** + * it has index at which given dimension is stored in file + */ + protected int dimensionBlocksIndex; + + /** + * flag to check whether the filter dimension is present in current block list of dimensions. + * Applicable for restructure scenarios + */ + protected boolean isDimensionPresentInCurrentBlock; + + public RangeValueFilterExecuterImpl(DimColumnResolvedFilterInfo dimColEvaluatorInfo, + MeasureColumnResolvedFilterInfo msrColEvaluatorInfo, Expression exp, + AbsoluteTableIdentifier tableIdentifier, byte[][] filterRangeValues, + SegmentProperties segmentProperties) { + + this.dimColEvaluatorInfo = dimColEvaluatorInfo; + this.msrColEvalutorInfo = msrColEvaluatorInfo; + this.exp = exp; + this.segmentProperties = segmentProperties; + this.tableIdentifier = tableIdentifier; + this.filterRangesValues = filterRangeValues; + initDimensionBlockIndexes(); + ifDefaultValueMatchesFilter(); + } + + /** + * This method will initialize the dimension info for the current block to be + * used for filtering the data + */ + private void initDimensionBlockIndexes() { + // find the dimension in the current block dimensions list + CarbonDimension dimensionFromCurrentBlock = + segmentProperties.getDimensionFromCurrentBlock(dimColEvaluatorInfo.getDimension()); + if (null != dimensionFromCurrentBlock) { + dimColEvaluatorInfo.setColumnIndex(dimensionFromCurrentBlock.getOrdinal()); + this.dimensionBlocksIndex = segmentProperties.getDimensionOrdinalToBlockMapping() + .get(dimensionFromCurrentBlock.getOrdinal()); + isDimensionPresentInCurrentBlock = true; + } + } + + /** + * This method will check whether default value is present in the given filter values + */ + private void ifDefaultValueMatchesFilter() { + isDefaultValuePresentInFilter = false; + if (this.isDimensionPresentInCurrentBlock) { + CarbonDimension dimension = this.dimColEvaluatorInfo.getDimension(); + byte[] defaultValue = dimension.getDefaultValue(); + if (null != defaultValue) { + int maxCompare = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(defaultValue, filterRangesValues[0]); + int minCompare = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterRangesValues[1], defaultValue); + + if (((isGreaterThan() && maxCompare > 0) || (isGreaterThanEqualTo() && maxCompare >= 0)) + && ((isLessThan() && minCompare > 0) || (isLessThanEqualTo() && minCompare >= 0))) { + isDefaultValuePresentInFilter = true; + } + } + } + } + + /** + * Method to apply the filter. + * @param blockChunkHolder + * @return + * @throws FilterUnsupportedException + * @throws IOException + */ + public BitSetGroup applyFilter(BlocksChunkHolder blockChunkHolder) + throws FilterUnsupportedException, IOException { + return applyNoAndDirectFilter(blockChunkHolder); + } + + /** + * Method to find presence of LessThan Expression. + * @return + */ + private boolean isLessThan() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof LessThanExpression) { + return true; + } + } + return false; + } + + /** + * Method to find presence of LessThanEqualTo Expression. + * @return + */ + private boolean isLessThanEqualTo() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof LessThanEqualToExpression) { + return true; + } + } + return false; + } + + /** + * Method to find presence of GreaterThan Expression. + * @return + */ + private boolean isGreaterThan() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof GreaterThanExpression) { + return true; + } + } + return false; + } + + /** + * Method to find presence of GreaterThanEqual Expression. + * @return + */ + private boolean isGreaterThanEqualTo() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof GreaterThanEqualToExpression) { + return true; + } + } + return false; + } + + /** + * Method to identify if scanning of Data Block required or not by comparing the Block Min and Max + * values and comparing them with filter min and max value. + * @param blockMinValue + * @param blockMaxValue + * @param filterValues + * @return + */ + public boolean isScanRequired(byte[] blockMinValue, byte[] blockMaxValue, byte[][] filterValues) { + boolean isScanRequired = true; + + boolean lessThan = isLessThan(); --- End diff -- Move this to constructor and fill all of them at one shot --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user gvramana commented on a diff in the pull request:
https://github.com/apache/incubator-carbondata/pull/670#discussion_r106799773 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java --- @@ -0,0 +1,557 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.scan.filter.executer; + +import java.io.IOException; +import java.util.BitSet; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datastore.block.SegmentProperties; +import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk; +import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk; +import org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionDataChunk; +import org.apache.carbondata.core.datastore.chunk.impl.VariableLengthDimensionDataChunk; +import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; +import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension; +import org.apache.carbondata.core.scan.expression.Expression; +import org.apache.carbondata.core.scan.expression.conditional.GreaterThanEqualToExpression; +import org.apache.carbondata.core.scan.expression.conditional.GreaterThanExpression; +import org.apache.carbondata.core.scan.expression.conditional.LessThanEqualToExpression; +import org.apache.carbondata.core.scan.expression.conditional.LessThanExpression; +import org.apache.carbondata.core.scan.expression.exception.FilterUnsupportedException; +import org.apache.carbondata.core.scan.filter.FilterUtil; +import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo; +import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.MeasureColumnResolvedFilterInfo; +import org.apache.carbondata.core.scan.processor.BlocksChunkHolder; +import org.apache.carbondata.core.util.BitSetGroup; +import org.apache.carbondata.core.util.ByteUtil; +import org.apache.carbondata.core.util.CarbonUtil; + +public class RangeValueFilterExecuterImpl extends ValueBasedFilterExecuterImpl { + + private DimColumnResolvedFilterInfo dimColEvaluatorInfo; + private MeasureColumnResolvedFilterInfo msrColEvalutorInfo; + private AbsoluteTableIdentifier tableIdentifier; + private Expression exp; + private byte[][] filterRangesValues; + private SegmentProperties segmentProperties; + private boolean isDefaultValuePresentInFilter; + /** + * it has index at which given dimension is stored in file + */ + protected int dimensionBlocksIndex; + + /** + * flag to check whether the filter dimension is present in current block list of dimensions. + * Applicable for restructure scenarios + */ + protected boolean isDimensionPresentInCurrentBlock; + + public RangeValueFilterExecuterImpl(DimColumnResolvedFilterInfo dimColEvaluatorInfo, + MeasureColumnResolvedFilterInfo msrColEvaluatorInfo, Expression exp, + AbsoluteTableIdentifier tableIdentifier, byte[][] filterRangeValues, + SegmentProperties segmentProperties) { + + this.dimColEvaluatorInfo = dimColEvaluatorInfo; + this.msrColEvalutorInfo = msrColEvaluatorInfo; + this.exp = exp; + this.segmentProperties = segmentProperties; + this.tableIdentifier = tableIdentifier; + this.filterRangesValues = filterRangeValues; + initDimensionBlockIndexes(); + ifDefaultValueMatchesFilter(); + } + + /** + * This method will initialize the dimension info for the current block to be + * used for filtering the data + */ + private void initDimensionBlockIndexes() { + // find the dimension in the current block dimensions list + CarbonDimension dimensionFromCurrentBlock = + segmentProperties.getDimensionFromCurrentBlock(dimColEvaluatorInfo.getDimension()); + if (null != dimensionFromCurrentBlock) { + dimColEvaluatorInfo.setColumnIndex(dimensionFromCurrentBlock.getOrdinal()); + this.dimensionBlocksIndex = segmentProperties.getDimensionOrdinalToBlockMapping() + .get(dimensionFromCurrentBlock.getOrdinal()); + isDimensionPresentInCurrentBlock = true; + } + } + + /** + * This method will check whether default value is present in the given filter values + */ + private void ifDefaultValueMatchesFilter() { + isDefaultValuePresentInFilter = false; + if (this.isDimensionPresentInCurrentBlock) { + CarbonDimension dimension = this.dimColEvaluatorInfo.getDimension(); + byte[] defaultValue = dimension.getDefaultValue(); + if (null != defaultValue) { + int maxCompare = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(defaultValue, filterRangesValues[0]); + int minCompare = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterRangesValues[1], defaultValue); + + if (((isGreaterThan() && maxCompare > 0) || (isGreaterThanEqualTo() && maxCompare >= 0)) + && ((isLessThan() && minCompare > 0) || (isLessThanEqualTo() && minCompare >= 0))) { + isDefaultValuePresentInFilter = true; + } + } + } + } + + /** + * Method to apply the filter. + * @param blockChunkHolder + * @return + * @throws FilterUnsupportedException + * @throws IOException + */ + public BitSetGroup applyFilter(BlocksChunkHolder blockChunkHolder) + throws FilterUnsupportedException, IOException { + return applyNoAndDirectFilter(blockChunkHolder); + } + + /** + * Method to find presence of LessThan Expression. + * @return + */ + private boolean isLessThan() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof LessThanExpression) { + return true; + } + } + return false; + } + + /** + * Method to find presence of LessThanEqualTo Expression. + * @return + */ + private boolean isLessThanEqualTo() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof LessThanEqualToExpression) { + return true; + } + } + return false; + } + + /** + * Method to find presence of GreaterThan Expression. + * @return + */ + private boolean isGreaterThan() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof GreaterThanExpression) { + return true; + } + } + return false; + } + + /** + * Method to find presence of GreaterThanEqual Expression. + * @return + */ + private boolean isGreaterThanEqualTo() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof GreaterThanEqualToExpression) { + return true; + } + } + return false; + } + + /** + * Method to identify if scanning of Data Block required or not by comparing the Block Min and Max + * values and comparing them with filter min and max value. + * @param blockMinValue + * @param blockMaxValue + * @param filterValues + * @return + */ + public boolean isScanRequired(byte[] blockMinValue, byte[] blockMaxValue, byte[][] filterValues) { + boolean isScanRequired = true; + + boolean lessThan = isLessThan(); + boolean greaterThan = isGreaterThan(); + boolean greaterThanEqualTo = isGreaterThanEqualTo(); + boolean lessThanEqualTo = isLessThanEqualTo(); + + int filterMinlessThanBlockMin = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(blockMinValue, filterValues[0]); + int filterMaxLessThanBlockMin = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(blockMinValue, filterValues[1]); + + int filterMinGreaterThanBlockMax = --- End diff -- directly move this to condition --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user gvramana commented on a diff in the pull request:
https://github.com/apache/incubator-carbondata/pull/670#discussion_r106799714 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java --- @@ -0,0 +1,557 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.scan.filter.executer; + +import java.io.IOException; +import java.util.BitSet; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datastore.block.SegmentProperties; +import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk; +import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk; +import org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionDataChunk; +import org.apache.carbondata.core.datastore.chunk.impl.VariableLengthDimensionDataChunk; +import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; +import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension; +import org.apache.carbondata.core.scan.expression.Expression; +import org.apache.carbondata.core.scan.expression.conditional.GreaterThanEqualToExpression; +import org.apache.carbondata.core.scan.expression.conditional.GreaterThanExpression; +import org.apache.carbondata.core.scan.expression.conditional.LessThanEqualToExpression; +import org.apache.carbondata.core.scan.expression.conditional.LessThanExpression; +import org.apache.carbondata.core.scan.expression.exception.FilterUnsupportedException; +import org.apache.carbondata.core.scan.filter.FilterUtil; +import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo; +import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.MeasureColumnResolvedFilterInfo; +import org.apache.carbondata.core.scan.processor.BlocksChunkHolder; +import org.apache.carbondata.core.util.BitSetGroup; +import org.apache.carbondata.core.util.ByteUtil; +import org.apache.carbondata.core.util.CarbonUtil; + +public class RangeValueFilterExecuterImpl extends ValueBasedFilterExecuterImpl { + + private DimColumnResolvedFilterInfo dimColEvaluatorInfo; + private MeasureColumnResolvedFilterInfo msrColEvalutorInfo; + private AbsoluteTableIdentifier tableIdentifier; + private Expression exp; + private byte[][] filterRangesValues; + private SegmentProperties segmentProperties; + private boolean isDefaultValuePresentInFilter; + /** + * it has index at which given dimension is stored in file + */ + protected int dimensionBlocksIndex; + + /** + * flag to check whether the filter dimension is present in current block list of dimensions. + * Applicable for restructure scenarios + */ + protected boolean isDimensionPresentInCurrentBlock; + + public RangeValueFilterExecuterImpl(DimColumnResolvedFilterInfo dimColEvaluatorInfo, + MeasureColumnResolvedFilterInfo msrColEvaluatorInfo, Expression exp, + AbsoluteTableIdentifier tableIdentifier, byte[][] filterRangeValues, + SegmentProperties segmentProperties) { + + this.dimColEvaluatorInfo = dimColEvaluatorInfo; + this.msrColEvalutorInfo = msrColEvaluatorInfo; + this.exp = exp; + this.segmentProperties = segmentProperties; + this.tableIdentifier = tableIdentifier; + this.filterRangesValues = filterRangeValues; + initDimensionBlockIndexes(); + ifDefaultValueMatchesFilter(); + } + + /** + * This method will initialize the dimension info for the current block to be + * used for filtering the data + */ + private void initDimensionBlockIndexes() { + // find the dimension in the current block dimensions list + CarbonDimension dimensionFromCurrentBlock = + segmentProperties.getDimensionFromCurrentBlock(dimColEvaluatorInfo.getDimension()); + if (null != dimensionFromCurrentBlock) { + dimColEvaluatorInfo.setColumnIndex(dimensionFromCurrentBlock.getOrdinal()); + this.dimensionBlocksIndex = segmentProperties.getDimensionOrdinalToBlockMapping() + .get(dimensionFromCurrentBlock.getOrdinal()); + isDimensionPresentInCurrentBlock = true; + } + } + + /** + * This method will check whether default value is present in the given filter values + */ + private void ifDefaultValueMatchesFilter() { + isDefaultValuePresentInFilter = false; + if (this.isDimensionPresentInCurrentBlock) { + CarbonDimension dimension = this.dimColEvaluatorInfo.getDimension(); + byte[] defaultValue = dimension.getDefaultValue(); + if (null != defaultValue) { + int maxCompare = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(defaultValue, filterRangesValues[0]); + int minCompare = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterRangesValues[1], defaultValue); + + if (((isGreaterThan() && maxCompare > 0) || (isGreaterThanEqualTo() && maxCompare >= 0)) + && ((isLessThan() && minCompare > 0) || (isLessThanEqualTo() && minCompare >= 0))) { + isDefaultValuePresentInFilter = true; + } + } + } + } + + /** + * Method to apply the filter. + * @param blockChunkHolder + * @return + * @throws FilterUnsupportedException + * @throws IOException + */ + public BitSetGroup applyFilter(BlocksChunkHolder blockChunkHolder) + throws FilterUnsupportedException, IOException { + return applyNoAndDirectFilter(blockChunkHolder); + } + + /** + * Method to find presence of LessThan Expression. + * @return + */ + private boolean isLessThan() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof LessThanExpression) { + return true; + } + } + return false; + } + + /** + * Method to find presence of LessThanEqualTo Expression. + * @return + */ + private boolean isLessThanEqualTo() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof LessThanEqualToExpression) { + return true; + } + } + return false; + } + + /** + * Method to find presence of GreaterThan Expression. + * @return + */ + private boolean isGreaterThan() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof GreaterThanExpression) { + return true; + } + } + return false; + } + + /** + * Method to find presence of GreaterThanEqual Expression. + * @return + */ + private boolean isGreaterThanEqualTo() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof GreaterThanEqualToExpression) { + return true; + } + } + return false; + } + + /** + * Method to identify if scanning of Data Block required or not by comparing the Block Min and Max + * values and comparing them with filter min and max value. + * @param blockMinValue + * @param blockMaxValue + * @param filterValues + * @return + */ + public boolean isScanRequired(byte[] blockMinValue, byte[] blockMaxValue, byte[][] filterValues) { + boolean isScanRequired = true; + + boolean lessThan = isLessThan(); + boolean greaterThan = isGreaterThan(); + boolean greaterThanEqualTo = isGreaterThanEqualTo(); + boolean lessThanEqualTo = isLessThanEqualTo(); + + int filterMinlessThanBlockMin = --- End diff -- remove this not used --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user gvramana commented on a diff in the pull request:
https://github.com/apache/incubator-carbondata/pull/670#discussion_r106799658 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java --- @@ -0,0 +1,557 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.scan.filter.executer; + +import java.io.IOException; +import java.util.BitSet; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datastore.block.SegmentProperties; +import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk; +import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk; +import org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionDataChunk; +import org.apache.carbondata.core.datastore.chunk.impl.VariableLengthDimensionDataChunk; +import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; +import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension; +import org.apache.carbondata.core.scan.expression.Expression; +import org.apache.carbondata.core.scan.expression.conditional.GreaterThanEqualToExpression; +import org.apache.carbondata.core.scan.expression.conditional.GreaterThanExpression; +import org.apache.carbondata.core.scan.expression.conditional.LessThanEqualToExpression; +import org.apache.carbondata.core.scan.expression.conditional.LessThanExpression; +import org.apache.carbondata.core.scan.expression.exception.FilterUnsupportedException; +import org.apache.carbondata.core.scan.filter.FilterUtil; +import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo; +import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.MeasureColumnResolvedFilterInfo; +import org.apache.carbondata.core.scan.processor.BlocksChunkHolder; +import org.apache.carbondata.core.util.BitSetGroup; +import org.apache.carbondata.core.util.ByteUtil; +import org.apache.carbondata.core.util.CarbonUtil; + +public class RangeValueFilterExecuterImpl extends ValueBasedFilterExecuterImpl { + + private DimColumnResolvedFilterInfo dimColEvaluatorInfo; + private MeasureColumnResolvedFilterInfo msrColEvalutorInfo; + private AbsoluteTableIdentifier tableIdentifier; + private Expression exp; + private byte[][] filterRangesValues; + private SegmentProperties segmentProperties; + private boolean isDefaultValuePresentInFilter; + /** + * it has index at which given dimension is stored in file + */ + protected int dimensionBlocksIndex; + + /** + * flag to check whether the filter dimension is present in current block list of dimensions. + * Applicable for restructure scenarios + */ + protected boolean isDimensionPresentInCurrentBlock; + + public RangeValueFilterExecuterImpl(DimColumnResolvedFilterInfo dimColEvaluatorInfo, + MeasureColumnResolvedFilterInfo msrColEvaluatorInfo, Expression exp, + AbsoluteTableIdentifier tableIdentifier, byte[][] filterRangeValues, + SegmentProperties segmentProperties) { + + this.dimColEvaluatorInfo = dimColEvaluatorInfo; + this.msrColEvalutorInfo = msrColEvaluatorInfo; + this.exp = exp; + this.segmentProperties = segmentProperties; + this.tableIdentifier = tableIdentifier; + this.filterRangesValues = filterRangeValues; + initDimensionBlockIndexes(); + ifDefaultValueMatchesFilter(); + } + + /** + * This method will initialize the dimension info for the current block to be + * used for filtering the data + */ + private void initDimensionBlockIndexes() { + // find the dimension in the current block dimensions list + CarbonDimension dimensionFromCurrentBlock = + segmentProperties.getDimensionFromCurrentBlock(dimColEvaluatorInfo.getDimension()); + if (null != dimensionFromCurrentBlock) { + dimColEvaluatorInfo.setColumnIndex(dimensionFromCurrentBlock.getOrdinal()); + this.dimensionBlocksIndex = segmentProperties.getDimensionOrdinalToBlockMapping() + .get(dimensionFromCurrentBlock.getOrdinal()); + isDimensionPresentInCurrentBlock = true; + } + } + + /** + * This method will check whether default value is present in the given filter values + */ + private void ifDefaultValueMatchesFilter() { + isDefaultValuePresentInFilter = false; + if (this.isDimensionPresentInCurrentBlock) { + CarbonDimension dimension = this.dimColEvaluatorInfo.getDimension(); + byte[] defaultValue = dimension.getDefaultValue(); + if (null != defaultValue) { + int maxCompare = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(defaultValue, filterRangesValues[0]); + int minCompare = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterRangesValues[1], defaultValue); + + if (((isGreaterThan() && maxCompare > 0) || (isGreaterThanEqualTo() && maxCompare >= 0)) + && ((isLessThan() && minCompare > 0) || (isLessThanEqualTo() && minCompare >= 0))) { + isDefaultValuePresentInFilter = true; + } + } + } + } + + /** + * Method to apply the filter. + * @param blockChunkHolder + * @return + * @throws FilterUnsupportedException + * @throws IOException + */ + public BitSetGroup applyFilter(BlocksChunkHolder blockChunkHolder) + throws FilterUnsupportedException, IOException { + return applyNoAndDirectFilter(blockChunkHolder); + } + + /** + * Method to find presence of LessThan Expression. + * @return + */ + private boolean isLessThan() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof LessThanExpression) { + return true; + } + } + return false; + } + + /** + * Method to find presence of LessThanEqualTo Expression. + * @return + */ + private boolean isLessThanEqualTo() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof LessThanEqualToExpression) { + return true; + } + } + return false; + } + + /** + * Method to find presence of GreaterThan Expression. + * @return + */ + private boolean isGreaterThan() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof GreaterThanExpression) { + return true; + } + } + return false; + } + + /** + * Method to find presence of GreaterThanEqual Expression. + * @return + */ + private boolean isGreaterThanEqualTo() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof GreaterThanEqualToExpression) { + return true; + } + } + return false; + } + + /** + * Method to identify if scanning of Data Block required or not by comparing the Block Min and Max + * values and comparing them with filter min and max value. + * @param blockMinValue + * @param blockMaxValue + * @param filterValues + * @return + */ + public boolean isScanRequired(byte[] blockMinValue, byte[] blockMaxValue, byte[][] filterValues) { + boolean isScanRequired = true; + + boolean lessThan = isLessThan(); + boolean greaterThan = isGreaterThan(); + boolean greaterThanEqualTo = isGreaterThanEqualTo(); + boolean lessThanEqualTo = isLessThanEqualTo(); + + int filterMinlessThanBlockMin = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(blockMinValue, filterValues[0]); + int filterMaxLessThanBlockMin = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(blockMinValue, filterValues[1]); + + int filterMinGreaterThanBlockMax = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterValues[0], blockMaxValue); + int filterMaxGreaterThanBlockMax = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterValues[1], blockMaxValue); + + // if any filter value is in range than this block needs to be + // scanned less than equal to max range. + + if (isDimensionPresentInCurrentBlock == true) { --- End diff -- incase of alter table , write in comment --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user gvramana commented on a diff in the pull request:
https://github.com/apache/incubator-carbondata/pull/670#discussion_r106800910 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java --- @@ -0,0 +1,557 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.scan.filter.executer; + +import java.io.IOException; +import java.util.BitSet; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datastore.block.SegmentProperties; +import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk; +import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk; +import org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionDataChunk; +import org.apache.carbondata.core.datastore.chunk.impl.VariableLengthDimensionDataChunk; +import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; +import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension; +import org.apache.carbondata.core.scan.expression.Expression; +import org.apache.carbondata.core.scan.expression.conditional.GreaterThanEqualToExpression; +import org.apache.carbondata.core.scan.expression.conditional.GreaterThanExpression; +import org.apache.carbondata.core.scan.expression.conditional.LessThanEqualToExpression; +import org.apache.carbondata.core.scan.expression.conditional.LessThanExpression; +import org.apache.carbondata.core.scan.expression.exception.FilterUnsupportedException; +import org.apache.carbondata.core.scan.filter.FilterUtil; +import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo; +import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.MeasureColumnResolvedFilterInfo; +import org.apache.carbondata.core.scan.processor.BlocksChunkHolder; +import org.apache.carbondata.core.util.BitSetGroup; +import org.apache.carbondata.core.util.ByteUtil; +import org.apache.carbondata.core.util.CarbonUtil; + +public class RangeValueFilterExecuterImpl extends ValueBasedFilterExecuterImpl { + + private DimColumnResolvedFilterInfo dimColEvaluatorInfo; + private MeasureColumnResolvedFilterInfo msrColEvalutorInfo; + private AbsoluteTableIdentifier tableIdentifier; + private Expression exp; + private byte[][] filterRangesValues; + private SegmentProperties segmentProperties; + private boolean isDefaultValuePresentInFilter; + /** + * it has index at which given dimension is stored in file + */ + protected int dimensionBlocksIndex; + + /** + * flag to check whether the filter dimension is present in current block list of dimensions. + * Applicable for restructure scenarios + */ + protected boolean isDimensionPresentInCurrentBlock; + + public RangeValueFilterExecuterImpl(DimColumnResolvedFilterInfo dimColEvaluatorInfo, + MeasureColumnResolvedFilterInfo msrColEvaluatorInfo, Expression exp, + AbsoluteTableIdentifier tableIdentifier, byte[][] filterRangeValues, + SegmentProperties segmentProperties) { + + this.dimColEvaluatorInfo = dimColEvaluatorInfo; + this.msrColEvalutorInfo = msrColEvaluatorInfo; + this.exp = exp; + this.segmentProperties = segmentProperties; + this.tableIdentifier = tableIdentifier; + this.filterRangesValues = filterRangeValues; + initDimensionBlockIndexes(); + ifDefaultValueMatchesFilter(); + } + + /** + * This method will initialize the dimension info for the current block to be + * used for filtering the data + */ + private void initDimensionBlockIndexes() { + // find the dimension in the current block dimensions list + CarbonDimension dimensionFromCurrentBlock = + segmentProperties.getDimensionFromCurrentBlock(dimColEvaluatorInfo.getDimension()); + if (null != dimensionFromCurrentBlock) { + dimColEvaluatorInfo.setColumnIndex(dimensionFromCurrentBlock.getOrdinal()); + this.dimensionBlocksIndex = segmentProperties.getDimensionOrdinalToBlockMapping() + .get(dimensionFromCurrentBlock.getOrdinal()); + isDimensionPresentInCurrentBlock = true; + } + } + + /** + * This method will check whether default value is present in the given filter values + */ + private void ifDefaultValueMatchesFilter() { + isDefaultValuePresentInFilter = false; + if (this.isDimensionPresentInCurrentBlock) { + CarbonDimension dimension = this.dimColEvaluatorInfo.getDimension(); + byte[] defaultValue = dimension.getDefaultValue(); + if (null != defaultValue) { + int maxCompare = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(defaultValue, filterRangesValues[0]); + int minCompare = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterRangesValues[1], defaultValue); + + if (((isGreaterThan() && maxCompare > 0) || (isGreaterThanEqualTo() && maxCompare >= 0)) + && ((isLessThan() && minCompare > 0) || (isLessThanEqualTo() && minCompare >= 0))) { + isDefaultValuePresentInFilter = true; + } + } + } + } + + /** + * Method to apply the filter. + * @param blockChunkHolder + * @return + * @throws FilterUnsupportedException + * @throws IOException + */ + public BitSetGroup applyFilter(BlocksChunkHolder blockChunkHolder) + throws FilterUnsupportedException, IOException { + return applyNoAndDirectFilter(blockChunkHolder); + } + + /** + * Method to find presence of LessThan Expression. + * @return + */ + private boolean isLessThan() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof LessThanExpression) { + return true; + } + } + return false; + } + + /** + * Method to find presence of LessThanEqualTo Expression. + * @return + */ + private boolean isLessThanEqualTo() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof LessThanEqualToExpression) { + return true; + } + } + return false; + } + + /** + * Method to find presence of GreaterThan Expression. + * @return + */ + private boolean isGreaterThan() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof GreaterThanExpression) { + return true; + } + } + return false; + } + + /** + * Method to find presence of GreaterThanEqual Expression. + * @return + */ + private boolean isGreaterThanEqualTo() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof GreaterThanEqualToExpression) { + return true; + } + } + return false; + } + + /** + * Method to identify if scanning of Data Block required or not by comparing the Block Min and Max + * values and comparing them with filter min and max value. + * @param blockMinValue + * @param blockMaxValue + * @param filterValues + * @return + */ + public boolean isScanRequired(byte[] blockMinValue, byte[] blockMaxValue, byte[][] filterValues) { + boolean isScanRequired = true; + + boolean lessThan = isLessThan(); + boolean greaterThan = isGreaterThan(); + boolean greaterThanEqualTo = isGreaterThanEqualTo(); + boolean lessThanEqualTo = isLessThanEqualTo(); + + int filterMinlessThanBlockMin = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(blockMinValue, filterValues[0]); + int filterMaxLessThanBlockMin = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(blockMinValue, filterValues[1]); + + int filterMinGreaterThanBlockMax = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterValues[0], blockMaxValue); + int filterMaxGreaterThanBlockMax = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterValues[1], blockMaxValue); + + // if any filter value is in range than this block needs to be + // scanned less than equal to max range. + + if (isDimensionPresentInCurrentBlock == true) { + if (((lessThan == true) && (filterMaxLessThanBlockMin >= 0)) || + ((lessThanEqualTo == true) && (filterMaxLessThanBlockMin > 0)) || + ((greaterThan == true) && (filterMinGreaterThanBlockMax >= 0)) || + ((greaterThanEqualTo == true) && (filterMinGreaterThanBlockMax > 0))) { + isScanRequired = false; + } + return isScanRequired; + } else { + return isDefaultValuePresentInFilter; + } + } + + /** + * Method checks is the scan lies within the range values or not. + * @param blockMaxValue + * @param blockMinValue + * @return + */ + @Override public BitSet isScanRequired(byte[][] blockMaxValue, byte[][] blockMinValue) { + BitSet bitSet = new BitSet(1); + byte[][] filterValues = this.filterRangesValues; + int columnIndex = this.dimColEvaluatorInfo.getColumnIndex(); + boolean isScanRequired = + isScanRequired(blockMinValue[columnIndex], blockMaxValue[columnIndex], filterValues); + if (isScanRequired) { + bitSet.set(0); + } + return bitSet; + } + + /** + * Method to apply the Range Filter. + * @param blockChunkHolder + * @return + * @throws FilterUnsupportedException + * @throws IOException + */ + public BitSetGroup applyNoAndDirectFilter(BlocksChunkHolder blockChunkHolder) + throws FilterUnsupportedException, IOException { + + // select all rows if dimension does not exists in the current block + if (!isDimensionPresentInCurrentBlock) { + int numberOfRows = blockChunkHolder.getDataBlock().nodeSize(); + return FilterUtil + .createBitSetGroupWithDefaultValue(blockChunkHolder.getDataBlock().numberOfPages(), + numberOfRows, true); + } + + int blockIndex = segmentProperties.getDimensionOrdinalToBlockMapping() + .get(dimColEvaluatorInfo.getColumnIndex()); + + if (null == blockChunkHolder.getDimensionRawDataChunk()[blockIndex]) { + blockChunkHolder.getDimensionRawDataChunk()[blockIndex] = blockChunkHolder.getDataBlock() + .getDimensionChunk(blockChunkHolder.getFileReader(), blockIndex); + } + + DimensionRawColumnChunk rawColumnChunk = + blockChunkHolder.getDimensionRawDataChunk()[blockIndex]; + BitSetGroup bitSetGroup = new BitSetGroup(rawColumnChunk.getPagesCount()); + for (int i = 0; i < rawColumnChunk.getPagesCount(); i++) { + if (rawColumnChunk.getMaxValues() != null) { + if (isScanRequired(rawColumnChunk.getMinValues()[i], rawColumnChunk.getMaxValues()[i], + this.filterRangesValues)) { + if (isRangeFullyCovered(rawColumnChunk.getMinValues()[i], + rawColumnChunk.getMaxValues()[i], this.filterRangesValues)) { + // Set all the bits in this case as filter Min Max values cover the whole block. + BitSet bitSet = new BitSet(rawColumnChunk.getRowCount()[i]); + bitSet.flip(0, rawColumnChunk.getRowCount()[i]); + bitSetGroup.setBitSet(bitSet, i); + } else { + BitSet bitSet = getFilteredIndexes(rawColumnChunk.convertToDimColDataChunk(i), + rawColumnChunk.getRowCount()[i]); + bitSetGroup.setBitSet(bitSet, i); + } + } + } else { + BitSet bitSet = getFilteredIndexes(rawColumnChunk.convertToDimColDataChunk(i), + rawColumnChunk.getRowCount()[i]); + bitSetGroup.setBitSet(bitSet, i); + } + } + return bitSetGroup; + } + + /** + * Method to verify if the block is fully covered by Filter Min and Max. + * @param blockMinValue + * @param blockMaxValue + * @param filterValues + * @return + */ + private boolean isRangeFullyCovered(byte[] blockMinValue, byte[] blockMaxValue, + byte[][] filterValues) { + boolean blockWithinRange = false; + + boolean lessThan = isLessThan(); + boolean greaterThan = isGreaterThan(); + + int filterMinlessThanBlockMin = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(blockMinValue, filterValues[0]); + + int filterMaxGreaterThanBlockMax = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterValues[1], blockMaxValue); + + if ((((greaterThan == true) && (filterMinlessThanBlockMin > 0)) || ((greaterThan == false) && ( + filterMinlessThanBlockMin >= 0))) && ( + ((lessThan == true) && (filterMaxGreaterThanBlockMax > 0)) || ((lessThan == false) && ( + filterMaxGreaterThanBlockMax >= 0)))) { + blockWithinRange = true; + } + return blockWithinRange; + } + + private BitSet getFilteredIndexes(DimensionColumnDataChunk dimensionColumnDataChunk, + int numerOfRows) { + if (dimensionColumnDataChunk.isExplicitSorted() + && dimensionColumnDataChunk instanceof FixedLengthDimensionDataChunk) { + return setFilterdIndexToBitSetWithColumnIndex( + (FixedLengthDimensionDataChunk) dimensionColumnDataChunk, numerOfRows); + } + return setFilterdIndexToBitSet(dimensionColumnDataChunk, numerOfRows); + } + + /** + * Method will scan the block and finds the range start index from which all members + * will be considered for applying range filters. this method will be called if the + * column is not supported by default so column index mapping will be present for + * accesing the members from the block. + * + * @param dimensionColumnDataChunk + * @param numerOfRows + * @return BitSet. + */ + private BitSet setFilterdIndexToBitSetWithColumnIndex( + FixedLengthDimensionDataChunk dimensionColumnDataChunk, int numerOfRows) { + BitSet bitSet = new BitSet(numerOfRows); + int start = 0; + int startIndex = 0; + int startMin = 0; + int endMax = 0; + byte[][] filterValues = this.filterRangesValues; + + // For Range expression we expect two values. The First is the Min Value and Second is the + // Max value. + // Get the Min Value + start = CarbonUtil + .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1, + filterValues[0], false); + if (isGreaterThan() == true && start >= 0) { + start = CarbonUtil + .nextGreaterValueToTarget(start, dimensionColumnDataChunk, filterValues[0], numerOfRows); + } + + if (start < 0) { + start = -(start + 1); + if (start == numerOfRows) { + start = start - 1; + } + // Method will compare the tentative index value after binary search, this tentative + // index needs to be compared by the filter member if its >= filter then from that + // index the bitset will be considered for filtering process. + if ((isGreaterThan() == true) && (ByteUtil.compare(filterValues[0], + dimensionColumnDataChunk.getChunkData(dimensionColumnDataChunk.getInvertedIndex(start)))) + > 0) { + start = start + 1; + } else if ((isGreaterThanEqualTo() == true) && (ByteUtil.compare(filterValues[0], + dimensionColumnDataChunk.getChunkData(dimensionColumnDataChunk.getInvertedIndex(start)))) + >= 0) { + start = start + 1; + } + } + startMin = start; + + // Get the Max value + start = CarbonUtil + .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1, + filterValues[1], false); + if (isLessThan() == true && start >= 0) { + start = CarbonUtil.nextLesserValueToTarget(start, dimensionColumnDataChunk, filterValues[1]); + } + + if (start < 0) { + start = -(start + 1); + if (start == numerOfRows) { + start = start - 1; + } + // Method will compare the tentative index value after binary search, this tentative + // index needs to be compared by the filter member if its >= filter then from that + // index the bitset will be considered for filtering process. + if ((isLessThan() == true) && (ByteUtil.compare(filterValues[1], + dimensionColumnDataChunk.getChunkData(dimensionColumnDataChunk.getInvertedIndex(start))) + < 0)) { + start = start - 1; + } else if ((isLessThanEqualTo() == true) && (ByteUtil.compare(filterValues[1], + dimensionColumnDataChunk.getChunkData(dimensionColumnDataChunk.getInvertedIndex(start))) + <= 0)) { + start = start - 1; + } + } + endMax = start; + + for (int j = startMin; j <= endMax; j++) { + bitSet.set(dimensionColumnDataChunk.getInvertedIndex(j)); + } + + if (dimensionColumnDataChunk.isNoDicitionaryColumn()) { --- End diff -- Binary search cannot be done on "@NU#LL$!", so need to check and compare for null on matching rows. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user gvramana commented on a diff in the pull request:
https://github.com/apache/incubator-carbondata/pull/670#discussion_r106800606 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java --- @@ -0,0 +1,557 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.scan.filter.executer; + +import java.io.IOException; +import java.util.BitSet; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datastore.block.SegmentProperties; +import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk; +import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk; +import org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionDataChunk; +import org.apache.carbondata.core.datastore.chunk.impl.VariableLengthDimensionDataChunk; +import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; +import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension; +import org.apache.carbondata.core.scan.expression.Expression; +import org.apache.carbondata.core.scan.expression.conditional.GreaterThanEqualToExpression; +import org.apache.carbondata.core.scan.expression.conditional.GreaterThanExpression; +import org.apache.carbondata.core.scan.expression.conditional.LessThanEqualToExpression; +import org.apache.carbondata.core.scan.expression.conditional.LessThanExpression; +import org.apache.carbondata.core.scan.expression.exception.FilterUnsupportedException; +import org.apache.carbondata.core.scan.filter.FilterUtil; +import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo; +import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.MeasureColumnResolvedFilterInfo; +import org.apache.carbondata.core.scan.processor.BlocksChunkHolder; +import org.apache.carbondata.core.util.BitSetGroup; +import org.apache.carbondata.core.util.ByteUtil; +import org.apache.carbondata.core.util.CarbonUtil; + +public class RangeValueFilterExecuterImpl extends ValueBasedFilterExecuterImpl { + + private DimColumnResolvedFilterInfo dimColEvaluatorInfo; + private MeasureColumnResolvedFilterInfo msrColEvalutorInfo; + private AbsoluteTableIdentifier tableIdentifier; + private Expression exp; + private byte[][] filterRangesValues; + private SegmentProperties segmentProperties; + private boolean isDefaultValuePresentInFilter; + /** + * it has index at which given dimension is stored in file + */ + protected int dimensionBlocksIndex; + + /** + * flag to check whether the filter dimension is present in current block list of dimensions. + * Applicable for restructure scenarios + */ + protected boolean isDimensionPresentInCurrentBlock; + + public RangeValueFilterExecuterImpl(DimColumnResolvedFilterInfo dimColEvaluatorInfo, + MeasureColumnResolvedFilterInfo msrColEvaluatorInfo, Expression exp, + AbsoluteTableIdentifier tableIdentifier, byte[][] filterRangeValues, + SegmentProperties segmentProperties) { + + this.dimColEvaluatorInfo = dimColEvaluatorInfo; + this.msrColEvalutorInfo = msrColEvaluatorInfo; + this.exp = exp; + this.segmentProperties = segmentProperties; + this.tableIdentifier = tableIdentifier; + this.filterRangesValues = filterRangeValues; + initDimensionBlockIndexes(); + ifDefaultValueMatchesFilter(); + } + + /** + * This method will initialize the dimension info for the current block to be + * used for filtering the data + */ + private void initDimensionBlockIndexes() { + // find the dimension in the current block dimensions list + CarbonDimension dimensionFromCurrentBlock = + segmentProperties.getDimensionFromCurrentBlock(dimColEvaluatorInfo.getDimension()); + if (null != dimensionFromCurrentBlock) { + dimColEvaluatorInfo.setColumnIndex(dimensionFromCurrentBlock.getOrdinal()); + this.dimensionBlocksIndex = segmentProperties.getDimensionOrdinalToBlockMapping() + .get(dimensionFromCurrentBlock.getOrdinal()); + isDimensionPresentInCurrentBlock = true; + } + } + + /** + * This method will check whether default value is present in the given filter values + */ + private void ifDefaultValueMatchesFilter() { + isDefaultValuePresentInFilter = false; + if (this.isDimensionPresentInCurrentBlock) { + CarbonDimension dimension = this.dimColEvaluatorInfo.getDimension(); + byte[] defaultValue = dimension.getDefaultValue(); + if (null != defaultValue) { + int maxCompare = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(defaultValue, filterRangesValues[0]); + int minCompare = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterRangesValues[1], defaultValue); + + if (((isGreaterThan() && maxCompare > 0) || (isGreaterThanEqualTo() && maxCompare >= 0)) + && ((isLessThan() && minCompare > 0) || (isLessThanEqualTo() && minCompare >= 0))) { + isDefaultValuePresentInFilter = true; + } + } + } + } + + /** + * Method to apply the filter. + * @param blockChunkHolder + * @return + * @throws FilterUnsupportedException + * @throws IOException + */ + public BitSetGroup applyFilter(BlocksChunkHolder blockChunkHolder) + throws FilterUnsupportedException, IOException { + return applyNoAndDirectFilter(blockChunkHolder); + } + + /** + * Method to find presence of LessThan Expression. + * @return + */ + private boolean isLessThan() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof LessThanExpression) { + return true; + } + } + return false; + } + + /** + * Method to find presence of LessThanEqualTo Expression. + * @return + */ + private boolean isLessThanEqualTo() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof LessThanEqualToExpression) { + return true; + } + } + return false; + } + + /** + * Method to find presence of GreaterThan Expression. + * @return + */ + private boolean isGreaterThan() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof GreaterThanExpression) { + return true; + } + } + return false; + } + + /** + * Method to find presence of GreaterThanEqual Expression. + * @return + */ + private boolean isGreaterThanEqualTo() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof GreaterThanEqualToExpression) { + return true; + } + } + return false; + } + + /** + * Method to identify if scanning of Data Block required or not by comparing the Block Min and Max + * values and comparing them with filter min and max value. + * @param blockMinValue + * @param blockMaxValue + * @param filterValues + * @return + */ + public boolean isScanRequired(byte[] blockMinValue, byte[] blockMaxValue, byte[][] filterValues) { + boolean isScanRequired = true; + + boolean lessThan = isLessThan(); + boolean greaterThan = isGreaterThan(); + boolean greaterThanEqualTo = isGreaterThanEqualTo(); + boolean lessThanEqualTo = isLessThanEqualTo(); + + int filterMinlessThanBlockMin = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(blockMinValue, filterValues[0]); + int filterMaxLessThanBlockMin = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(blockMinValue, filterValues[1]); + + int filterMinGreaterThanBlockMax = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterValues[0], blockMaxValue); + int filterMaxGreaterThanBlockMax = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterValues[1], blockMaxValue); + + // if any filter value is in range than this block needs to be + // scanned less than equal to max range. + + if (isDimensionPresentInCurrentBlock == true) { + if (((lessThan == true) && (filterMaxLessThanBlockMin >= 0)) || + ((lessThanEqualTo == true) && (filterMaxLessThanBlockMin > 0)) || + ((greaterThan == true) && (filterMinGreaterThanBlockMax >= 0)) || + ((greaterThanEqualTo == true) && (filterMinGreaterThanBlockMax > 0))) { + isScanRequired = false; + } + return isScanRequired; + } else { + return isDefaultValuePresentInFilter; + } + } + + /** + * Method checks is the scan lies within the range values or not. + * @param blockMaxValue + * @param blockMinValue + * @return + */ + @Override public BitSet isScanRequired(byte[][] blockMaxValue, byte[][] blockMinValue) { + BitSet bitSet = new BitSet(1); + byte[][] filterValues = this.filterRangesValues; + int columnIndex = this.dimColEvaluatorInfo.getColumnIndex(); + boolean isScanRequired = + isScanRequired(blockMinValue[columnIndex], blockMaxValue[columnIndex], filterValues); + if (isScanRequired) { + bitSet.set(0); + } + return bitSet; + } + + /** + * Method to apply the Range Filter. + * @param blockChunkHolder + * @return + * @throws FilterUnsupportedException + * @throws IOException + */ + public BitSetGroup applyNoAndDirectFilter(BlocksChunkHolder blockChunkHolder) + throws FilterUnsupportedException, IOException { + + // select all rows if dimension does not exists in the current block + if (!isDimensionPresentInCurrentBlock) { + int numberOfRows = blockChunkHolder.getDataBlock().nodeSize(); + return FilterUtil + .createBitSetGroupWithDefaultValue(blockChunkHolder.getDataBlock().numberOfPages(), + numberOfRows, true); + } + + int blockIndex = segmentProperties.getDimensionOrdinalToBlockMapping() + .get(dimColEvaluatorInfo.getColumnIndex()); + + if (null == blockChunkHolder.getDimensionRawDataChunk()[blockIndex]) { + blockChunkHolder.getDimensionRawDataChunk()[blockIndex] = blockChunkHolder.getDataBlock() + .getDimensionChunk(blockChunkHolder.getFileReader(), blockIndex); + } + + DimensionRawColumnChunk rawColumnChunk = + blockChunkHolder.getDimensionRawDataChunk()[blockIndex]; + BitSetGroup bitSetGroup = new BitSetGroup(rawColumnChunk.getPagesCount()); + for (int i = 0; i < rawColumnChunk.getPagesCount(); i++) { + if (rawColumnChunk.getMaxValues() != null) { + if (isScanRequired(rawColumnChunk.getMinValues()[i], rawColumnChunk.getMaxValues()[i], + this.filterRangesValues)) { + if (isRangeFullyCovered(rawColumnChunk.getMinValues()[i], + rawColumnChunk.getMaxValues()[i], this.filterRangesValues)) { + // Set all the bits in this case as filter Min Max values cover the whole block. + BitSet bitSet = new BitSet(rawColumnChunk.getRowCount()[i]); + bitSet.flip(0, rawColumnChunk.getRowCount()[i]); + bitSetGroup.setBitSet(bitSet, i); + } else { + BitSet bitSet = getFilteredIndexes(rawColumnChunk.convertToDimColDataChunk(i), + rawColumnChunk.getRowCount()[i]); + bitSetGroup.setBitSet(bitSet, i); + } + } + } else { + BitSet bitSet = getFilteredIndexes(rawColumnChunk.convertToDimColDataChunk(i), + rawColumnChunk.getRowCount()[i]); + bitSetGroup.setBitSet(bitSet, i); + } + } + return bitSetGroup; + } + + /** + * Method to verify if the block is fully covered by Filter Min and Max. + * @param blockMinValue + * @param blockMaxValue + * @param filterValues + * @return + */ + private boolean isRangeFullyCovered(byte[] blockMinValue, byte[] blockMaxValue, + byte[][] filterValues) { + boolean blockWithinRange = false; + + boolean lessThan = isLessThan(); + boolean greaterThan = isGreaterThan(); + + int filterMinlessThanBlockMin = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(blockMinValue, filterValues[0]); + + int filterMaxGreaterThanBlockMax = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterValues[1], blockMaxValue); + + if ((((greaterThan == true) && (filterMinlessThanBlockMin > 0)) || ((greaterThan == false) && ( + filterMinlessThanBlockMin >= 0))) && ( + ((lessThan == true) && (filterMaxGreaterThanBlockMax > 0)) || ((lessThan == false) && ( + filterMaxGreaterThanBlockMax >= 0)))) { + blockWithinRange = true; + } + return blockWithinRange; + } + + private BitSet getFilteredIndexes(DimensionColumnDataChunk dimensionColumnDataChunk, + int numerOfRows) { + if (dimensionColumnDataChunk.isExplicitSorted() + && dimensionColumnDataChunk instanceof FixedLengthDimensionDataChunk) { + return setFilterdIndexToBitSetWithColumnIndex( + (FixedLengthDimensionDataChunk) dimensionColumnDataChunk, numerOfRows); + } + return setFilterdIndexToBitSet(dimensionColumnDataChunk, numerOfRows); + } + + /** + * Method will scan the block and finds the range start index from which all members + * will be considered for applying range filters. this method will be called if the + * column is not supported by default so column index mapping will be present for + * accesing the members from the block. + * + * @param dimensionColumnDataChunk + * @param numerOfRows + * @return BitSet. + */ + private BitSet setFilterdIndexToBitSetWithColumnIndex( + FixedLengthDimensionDataChunk dimensionColumnDataChunk, int numerOfRows) { + BitSet bitSet = new BitSet(numerOfRows); + int start = 0; + int startIndex = 0; + int startMin = 0; + int endMax = 0; + byte[][] filterValues = this.filterRangesValues; + + // For Range expression we expect two values. The First is the Min Value and Second is the + // Max value. + // Get the Min Value + start = CarbonUtil + .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1, + filterValues[0], false); --- End diff -- Change the flag in binary search based on greaterthan case --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user gvramana commented on a diff in the pull request:
https://github.com/apache/incubator-carbondata/pull/670#discussion_r106800320 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java --- @@ -0,0 +1,557 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.scan.filter.executer; + +import java.io.IOException; +import java.util.BitSet; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datastore.block.SegmentProperties; +import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk; +import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk; +import org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionDataChunk; +import org.apache.carbondata.core.datastore.chunk.impl.VariableLengthDimensionDataChunk; +import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; +import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension; +import org.apache.carbondata.core.scan.expression.Expression; +import org.apache.carbondata.core.scan.expression.conditional.GreaterThanEqualToExpression; +import org.apache.carbondata.core.scan.expression.conditional.GreaterThanExpression; +import org.apache.carbondata.core.scan.expression.conditional.LessThanEqualToExpression; +import org.apache.carbondata.core.scan.expression.conditional.LessThanExpression; +import org.apache.carbondata.core.scan.expression.exception.FilterUnsupportedException; +import org.apache.carbondata.core.scan.filter.FilterUtil; +import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo; +import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.MeasureColumnResolvedFilterInfo; +import org.apache.carbondata.core.scan.processor.BlocksChunkHolder; +import org.apache.carbondata.core.util.BitSetGroup; +import org.apache.carbondata.core.util.ByteUtil; +import org.apache.carbondata.core.util.CarbonUtil; + +public class RangeValueFilterExecuterImpl extends ValueBasedFilterExecuterImpl { + + private DimColumnResolvedFilterInfo dimColEvaluatorInfo; + private MeasureColumnResolvedFilterInfo msrColEvalutorInfo; + private AbsoluteTableIdentifier tableIdentifier; + private Expression exp; + private byte[][] filterRangesValues; + private SegmentProperties segmentProperties; + private boolean isDefaultValuePresentInFilter; + /** + * it has index at which given dimension is stored in file + */ + protected int dimensionBlocksIndex; + + /** + * flag to check whether the filter dimension is present in current block list of dimensions. + * Applicable for restructure scenarios + */ + protected boolean isDimensionPresentInCurrentBlock; + + public RangeValueFilterExecuterImpl(DimColumnResolvedFilterInfo dimColEvaluatorInfo, + MeasureColumnResolvedFilterInfo msrColEvaluatorInfo, Expression exp, + AbsoluteTableIdentifier tableIdentifier, byte[][] filterRangeValues, + SegmentProperties segmentProperties) { + + this.dimColEvaluatorInfo = dimColEvaluatorInfo; + this.msrColEvalutorInfo = msrColEvaluatorInfo; + this.exp = exp; + this.segmentProperties = segmentProperties; + this.tableIdentifier = tableIdentifier; + this.filterRangesValues = filterRangeValues; + initDimensionBlockIndexes(); + ifDefaultValueMatchesFilter(); + } + + /** + * This method will initialize the dimension info for the current block to be + * used for filtering the data + */ + private void initDimensionBlockIndexes() { + // find the dimension in the current block dimensions list + CarbonDimension dimensionFromCurrentBlock = + segmentProperties.getDimensionFromCurrentBlock(dimColEvaluatorInfo.getDimension()); + if (null != dimensionFromCurrentBlock) { + dimColEvaluatorInfo.setColumnIndex(dimensionFromCurrentBlock.getOrdinal()); + this.dimensionBlocksIndex = segmentProperties.getDimensionOrdinalToBlockMapping() + .get(dimensionFromCurrentBlock.getOrdinal()); + isDimensionPresentInCurrentBlock = true; + } + } + + /** + * This method will check whether default value is present in the given filter values + */ + private void ifDefaultValueMatchesFilter() { + isDefaultValuePresentInFilter = false; + if (this.isDimensionPresentInCurrentBlock) { + CarbonDimension dimension = this.dimColEvaluatorInfo.getDimension(); + byte[] defaultValue = dimension.getDefaultValue(); + if (null != defaultValue) { + int maxCompare = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(defaultValue, filterRangesValues[0]); + int minCompare = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterRangesValues[1], defaultValue); + + if (((isGreaterThan() && maxCompare > 0) || (isGreaterThanEqualTo() && maxCompare >= 0)) + && ((isLessThan() && minCompare > 0) || (isLessThanEqualTo() && minCompare >= 0))) { + isDefaultValuePresentInFilter = true; + } + } + } + } + + /** + * Method to apply the filter. + * @param blockChunkHolder + * @return + * @throws FilterUnsupportedException + * @throws IOException + */ + public BitSetGroup applyFilter(BlocksChunkHolder blockChunkHolder) + throws FilterUnsupportedException, IOException { + return applyNoAndDirectFilter(blockChunkHolder); + } + + /** + * Method to find presence of LessThan Expression. + * @return + */ + private boolean isLessThan() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof LessThanExpression) { + return true; + } + } + return false; + } + + /** + * Method to find presence of LessThanEqualTo Expression. + * @return + */ + private boolean isLessThanEqualTo() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof LessThanEqualToExpression) { + return true; + } + } + return false; + } + + /** + * Method to find presence of GreaterThan Expression. + * @return + */ + private boolean isGreaterThan() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof GreaterThanExpression) { + return true; + } + } + return false; + } + + /** + * Method to find presence of GreaterThanEqual Expression. + * @return + */ + private boolean isGreaterThanEqualTo() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof GreaterThanEqualToExpression) { + return true; + } + } + return false; + } + + /** + * Method to identify if scanning of Data Block required or not by comparing the Block Min and Max + * values and comparing them with filter min and max value. + * @param blockMinValue + * @param blockMaxValue + * @param filterValues + * @return + */ + public boolean isScanRequired(byte[] blockMinValue, byte[] blockMaxValue, byte[][] filterValues) { + boolean isScanRequired = true; + + boolean lessThan = isLessThan(); + boolean greaterThan = isGreaterThan(); + boolean greaterThanEqualTo = isGreaterThanEqualTo(); + boolean lessThanEqualTo = isLessThanEqualTo(); + + int filterMinlessThanBlockMin = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(blockMinValue, filterValues[0]); + int filterMaxLessThanBlockMin = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(blockMinValue, filterValues[1]); + + int filterMinGreaterThanBlockMax = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterValues[0], blockMaxValue); + int filterMaxGreaterThanBlockMax = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterValues[1], blockMaxValue); + + // if any filter value is in range than this block needs to be + // scanned less than equal to max range. + + if (isDimensionPresentInCurrentBlock == true) { + if (((lessThan == true) && (filterMaxLessThanBlockMin >= 0)) || + ((lessThanEqualTo == true) && (filterMaxLessThanBlockMin > 0)) || + ((greaterThan == true) && (filterMinGreaterThanBlockMax >= 0)) || + ((greaterThanEqualTo == true) && (filterMinGreaterThanBlockMax > 0))) { + isScanRequired = false; + } + return isScanRequired; + } else { + return isDefaultValuePresentInFilter; + } + } + + /** + * Method checks is the scan lies within the range values or not. + * @param blockMaxValue + * @param blockMinValue + * @return + */ + @Override public BitSet isScanRequired(byte[][] blockMaxValue, byte[][] blockMinValue) { + BitSet bitSet = new BitSet(1); + byte[][] filterValues = this.filterRangesValues; + int columnIndex = this.dimColEvaluatorInfo.getColumnIndex(); + boolean isScanRequired = + isScanRequired(blockMinValue[columnIndex], blockMaxValue[columnIndex], filterValues); + if (isScanRequired) { + bitSet.set(0); + } + return bitSet; + } + + /** + * Method to apply the Range Filter. + * @param blockChunkHolder + * @return + * @throws FilterUnsupportedException + * @throws IOException + */ + public BitSetGroup applyNoAndDirectFilter(BlocksChunkHolder blockChunkHolder) + throws FilterUnsupportedException, IOException { + + // select all rows if dimension does not exists in the current block + if (!isDimensionPresentInCurrentBlock) { + int numberOfRows = blockChunkHolder.getDataBlock().nodeSize(); + return FilterUtil + .createBitSetGroupWithDefaultValue(blockChunkHolder.getDataBlock().numberOfPages(), + numberOfRows, true); + } + + int blockIndex = segmentProperties.getDimensionOrdinalToBlockMapping() + .get(dimColEvaluatorInfo.getColumnIndex()); + + if (null == blockChunkHolder.getDimensionRawDataChunk()[blockIndex]) { + blockChunkHolder.getDimensionRawDataChunk()[blockIndex] = blockChunkHolder.getDataBlock() + .getDimensionChunk(blockChunkHolder.getFileReader(), blockIndex); + } + + DimensionRawColumnChunk rawColumnChunk = + blockChunkHolder.getDimensionRawDataChunk()[blockIndex]; + BitSetGroup bitSetGroup = new BitSetGroup(rawColumnChunk.getPagesCount()); + for (int i = 0; i < rawColumnChunk.getPagesCount(); i++) { + if (rawColumnChunk.getMaxValues() != null) { + if (isScanRequired(rawColumnChunk.getMinValues()[i], rawColumnChunk.getMaxValues()[i], + this.filterRangesValues)) { + if (isRangeFullyCovered(rawColumnChunk.getMinValues()[i], + rawColumnChunk.getMaxValues()[i], this.filterRangesValues)) { + // Set all the bits in this case as filter Min Max values cover the whole block. + BitSet bitSet = new BitSet(rawColumnChunk.getRowCount()[i]); + bitSet.flip(0, rawColumnChunk.getRowCount()[i]); + bitSetGroup.setBitSet(bitSet, i); + } else { + BitSet bitSet = getFilteredIndexes(rawColumnChunk.convertToDimColDataChunk(i), + rawColumnChunk.getRowCount()[i]); + bitSetGroup.setBitSet(bitSet, i); + } + } + } else { + BitSet bitSet = getFilteredIndexes(rawColumnChunk.convertToDimColDataChunk(i), + rawColumnChunk.getRowCount()[i]); + bitSetGroup.setBitSet(bitSet, i); + } + } + return bitSetGroup; + } + + /** + * Method to verify if the block is fully covered by Filter Min and Max. + * @param blockMinValue + * @param blockMaxValue + * @param filterValues + * @return + */ + private boolean isRangeFullyCovered(byte[] blockMinValue, byte[] blockMaxValue, + byte[][] filterValues) { + boolean blockWithinRange = false; + + boolean lessThan = isLessThan(); + boolean greaterThan = isGreaterThan(); + + int filterMinlessThanBlockMin = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(blockMinValue, filterValues[0]); + + int filterMaxGreaterThanBlockMax = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterValues[1], blockMaxValue); + + if ((((greaterThan == true) && (filterMinlessThanBlockMin > 0)) || ((greaterThan == false) && ( + filterMinlessThanBlockMin >= 0))) && ( + ((lessThan == true) && (filterMaxGreaterThanBlockMax > 0)) || ((lessThan == false) && ( + filterMaxGreaterThanBlockMax >= 0)))) { + blockWithinRange = true; + } + return blockWithinRange; + } + + private BitSet getFilteredIndexes(DimensionColumnDataChunk dimensionColumnDataChunk, + int numerOfRows) { + if (dimensionColumnDataChunk.isExplicitSorted() + && dimensionColumnDataChunk instanceof FixedLengthDimensionDataChunk) { + return setFilterdIndexToBitSetWithColumnIndex( + (FixedLengthDimensionDataChunk) dimensionColumnDataChunk, numerOfRows); + } + return setFilterdIndexToBitSet(dimensionColumnDataChunk, numerOfRows); + } + + /** + * Method will scan the block and finds the range start index from which all members + * will be considered for applying range filters. this method will be called if the + * column is not supported by default so column index mapping will be present for + * accesing the members from the block. + * + * @param dimensionColumnDataChunk + * @param numerOfRows + * @return BitSet. + */ + private BitSet setFilterdIndexToBitSetWithColumnIndex( + FixedLengthDimensionDataChunk dimensionColumnDataChunk, int numerOfRows) { + BitSet bitSet = new BitSet(numerOfRows); + int start = 0; + int startIndex = 0; + int startMin = 0; + int endMax = 0; + byte[][] filterValues = this.filterRangesValues; + + // For Range expression we expect two values. The First is the Min Value and Second is the + // Max value. + // Get the Min Value + start = CarbonUtil + .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows - 1, --- End diff -- If start value is greater than min , then no binary search required on min value Also write comment on getFirstIndexUsingBinarySearch function definition --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user gvramana commented on a diff in the pull request:
https://github.com/apache/incubator-carbondata/pull/670#discussion_r106800228 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java --- @@ -0,0 +1,557 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.scan.filter.executer; + +import java.io.IOException; +import java.util.BitSet; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datastore.block.SegmentProperties; +import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk; +import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk; +import org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionDataChunk; +import org.apache.carbondata.core.datastore.chunk.impl.VariableLengthDimensionDataChunk; +import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; +import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension; +import org.apache.carbondata.core.scan.expression.Expression; +import org.apache.carbondata.core.scan.expression.conditional.GreaterThanEqualToExpression; +import org.apache.carbondata.core.scan.expression.conditional.GreaterThanExpression; +import org.apache.carbondata.core.scan.expression.conditional.LessThanEqualToExpression; +import org.apache.carbondata.core.scan.expression.conditional.LessThanExpression; +import org.apache.carbondata.core.scan.expression.exception.FilterUnsupportedException; +import org.apache.carbondata.core.scan.filter.FilterUtil; +import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo; +import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.MeasureColumnResolvedFilterInfo; +import org.apache.carbondata.core.scan.processor.BlocksChunkHolder; +import org.apache.carbondata.core.util.BitSetGroup; +import org.apache.carbondata.core.util.ByteUtil; +import org.apache.carbondata.core.util.CarbonUtil; + +public class RangeValueFilterExecuterImpl extends ValueBasedFilterExecuterImpl { + + private DimColumnResolvedFilterInfo dimColEvaluatorInfo; + private MeasureColumnResolvedFilterInfo msrColEvalutorInfo; + private AbsoluteTableIdentifier tableIdentifier; + private Expression exp; + private byte[][] filterRangesValues; + private SegmentProperties segmentProperties; + private boolean isDefaultValuePresentInFilter; + /** + * it has index at which given dimension is stored in file + */ + protected int dimensionBlocksIndex; + + /** + * flag to check whether the filter dimension is present in current block list of dimensions. + * Applicable for restructure scenarios + */ + protected boolean isDimensionPresentInCurrentBlock; + + public RangeValueFilterExecuterImpl(DimColumnResolvedFilterInfo dimColEvaluatorInfo, + MeasureColumnResolvedFilterInfo msrColEvaluatorInfo, Expression exp, + AbsoluteTableIdentifier tableIdentifier, byte[][] filterRangeValues, + SegmentProperties segmentProperties) { + + this.dimColEvaluatorInfo = dimColEvaluatorInfo; + this.msrColEvalutorInfo = msrColEvaluatorInfo; + this.exp = exp; + this.segmentProperties = segmentProperties; + this.tableIdentifier = tableIdentifier; + this.filterRangesValues = filterRangeValues; + initDimensionBlockIndexes(); + ifDefaultValueMatchesFilter(); + } + + /** + * This method will initialize the dimension info for the current block to be + * used for filtering the data + */ + private void initDimensionBlockIndexes() { + // find the dimension in the current block dimensions list + CarbonDimension dimensionFromCurrentBlock = + segmentProperties.getDimensionFromCurrentBlock(dimColEvaluatorInfo.getDimension()); + if (null != dimensionFromCurrentBlock) { + dimColEvaluatorInfo.setColumnIndex(dimensionFromCurrentBlock.getOrdinal()); + this.dimensionBlocksIndex = segmentProperties.getDimensionOrdinalToBlockMapping() + .get(dimensionFromCurrentBlock.getOrdinal()); + isDimensionPresentInCurrentBlock = true; + } + } + + /** + * This method will check whether default value is present in the given filter values + */ + private void ifDefaultValueMatchesFilter() { + isDefaultValuePresentInFilter = false; + if (this.isDimensionPresentInCurrentBlock) { + CarbonDimension dimension = this.dimColEvaluatorInfo.getDimension(); + byte[] defaultValue = dimension.getDefaultValue(); + if (null != defaultValue) { + int maxCompare = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(defaultValue, filterRangesValues[0]); + int minCompare = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterRangesValues[1], defaultValue); + + if (((isGreaterThan() && maxCompare > 0) || (isGreaterThanEqualTo() && maxCompare >= 0)) + && ((isLessThan() && minCompare > 0) || (isLessThanEqualTo() && minCompare >= 0))) { + isDefaultValuePresentInFilter = true; + } + } + } + } + + /** + * Method to apply the filter. + * @param blockChunkHolder + * @return + * @throws FilterUnsupportedException + * @throws IOException + */ + public BitSetGroup applyFilter(BlocksChunkHolder blockChunkHolder) + throws FilterUnsupportedException, IOException { + return applyNoAndDirectFilter(blockChunkHolder); + } + + /** + * Method to find presence of LessThan Expression. + * @return + */ + private boolean isLessThan() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof LessThanExpression) { + return true; + } + } + return false; + } + + /** + * Method to find presence of LessThanEqualTo Expression. + * @return + */ + private boolean isLessThanEqualTo() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof LessThanEqualToExpression) { + return true; + } + } + return false; + } + + /** + * Method to find presence of GreaterThan Expression. + * @return + */ + private boolean isGreaterThan() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof GreaterThanExpression) { + return true; + } + } + return false; + } + + /** + * Method to find presence of GreaterThanEqual Expression. + * @return + */ + private boolean isGreaterThanEqualTo() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof GreaterThanEqualToExpression) { + return true; + } + } + return false; + } + + /** + * Method to identify if scanning of Data Block required or not by comparing the Block Min and Max + * values and comparing them with filter min and max value. + * @param blockMinValue + * @param blockMaxValue + * @param filterValues + * @return + */ + public boolean isScanRequired(byte[] blockMinValue, byte[] blockMaxValue, byte[][] filterValues) { + boolean isScanRequired = true; + + boolean lessThan = isLessThan(); + boolean greaterThan = isGreaterThan(); + boolean greaterThanEqualTo = isGreaterThanEqualTo(); + boolean lessThanEqualTo = isLessThanEqualTo(); + + int filterMinlessThanBlockMin = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(blockMinValue, filterValues[0]); + int filterMaxLessThanBlockMin = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(blockMinValue, filterValues[1]); + + int filterMinGreaterThanBlockMax = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterValues[0], blockMaxValue); + int filterMaxGreaterThanBlockMax = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterValues[1], blockMaxValue); + + // if any filter value is in range than this block needs to be + // scanned less than equal to max range. + + if (isDimensionPresentInCurrentBlock == true) { + if (((lessThan == true) && (filterMaxLessThanBlockMin >= 0)) || + ((lessThanEqualTo == true) && (filterMaxLessThanBlockMin > 0)) || + ((greaterThan == true) && (filterMinGreaterThanBlockMax >= 0)) || + ((greaterThanEqualTo == true) && (filterMinGreaterThanBlockMax > 0))) { + isScanRequired = false; + } + return isScanRequired; + } else { + return isDefaultValuePresentInFilter; + } + } + + /** + * Method checks is the scan lies within the range values or not. + * @param blockMaxValue + * @param blockMinValue + * @return + */ + @Override public BitSet isScanRequired(byte[][] blockMaxValue, byte[][] blockMinValue) { + BitSet bitSet = new BitSet(1); + byte[][] filterValues = this.filterRangesValues; + int columnIndex = this.dimColEvaluatorInfo.getColumnIndex(); + boolean isScanRequired = + isScanRequired(blockMinValue[columnIndex], blockMaxValue[columnIndex], filterValues); + if (isScanRequired) { + bitSet.set(0); + } + return bitSet; + } + + /** + * Method to apply the Range Filter. + * @param blockChunkHolder + * @return + * @throws FilterUnsupportedException + * @throws IOException + */ + public BitSetGroup applyNoAndDirectFilter(BlocksChunkHolder blockChunkHolder) + throws FilterUnsupportedException, IOException { + + // select all rows if dimension does not exists in the current block + if (!isDimensionPresentInCurrentBlock) { + int numberOfRows = blockChunkHolder.getDataBlock().nodeSize(); + return FilterUtil + .createBitSetGroupWithDefaultValue(blockChunkHolder.getDataBlock().numberOfPages(), + numberOfRows, true); + } + + int blockIndex = segmentProperties.getDimensionOrdinalToBlockMapping() + .get(dimColEvaluatorInfo.getColumnIndex()); + + if (null == blockChunkHolder.getDimensionRawDataChunk()[blockIndex]) { + blockChunkHolder.getDimensionRawDataChunk()[blockIndex] = blockChunkHolder.getDataBlock() + .getDimensionChunk(blockChunkHolder.getFileReader(), blockIndex); + } + + DimensionRawColumnChunk rawColumnChunk = + blockChunkHolder.getDimensionRawDataChunk()[blockIndex]; + BitSetGroup bitSetGroup = new BitSetGroup(rawColumnChunk.getPagesCount()); + for (int i = 0; i < rawColumnChunk.getPagesCount(); i++) { + if (rawColumnChunk.getMaxValues() != null) { + if (isScanRequired(rawColumnChunk.getMinValues()[i], rawColumnChunk.getMaxValues()[i], + this.filterRangesValues)) { + if (isRangeFullyCovered(rawColumnChunk.getMinValues()[i], + rawColumnChunk.getMaxValues()[i], this.filterRangesValues)) { + // Set all the bits in this case as filter Min Max values cover the whole block. + BitSet bitSet = new BitSet(rawColumnChunk.getRowCount()[i]); + bitSet.flip(0, rawColumnChunk.getRowCount()[i]); + bitSetGroup.setBitSet(bitSet, i); + } else { + BitSet bitSet = getFilteredIndexes(rawColumnChunk.convertToDimColDataChunk(i), + rawColumnChunk.getRowCount()[i]); + bitSetGroup.setBitSet(bitSet, i); + } + } + } else { + BitSet bitSet = getFilteredIndexes(rawColumnChunk.convertToDimColDataChunk(i), + rawColumnChunk.getRowCount()[i]); + bitSetGroup.setBitSet(bitSet, i); + } + } + return bitSetGroup; + } + + /** + * Method to verify if the block is fully covered by Filter Min and Max. + * @param blockMinValue + * @param blockMaxValue + * @param filterValues + * @return + */ + private boolean isRangeFullyCovered(byte[] blockMinValue, byte[] blockMaxValue, + byte[][] filterValues) { + boolean blockWithinRange = false; + + boolean lessThan = isLessThan(); + boolean greaterThan = isGreaterThan(); + + int filterMinlessThanBlockMin = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(blockMinValue, filterValues[0]); + + int filterMaxGreaterThanBlockMax = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterValues[1], blockMaxValue); + + if ((((greaterThan == true) && (filterMinlessThanBlockMin > 0)) || ((greaterThan == false) && ( --- End diff -- Move check into condition --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user gvramana commented on a diff in the pull request:
https://github.com/apache/incubator-carbondata/pull/670#discussion_r106800118 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java --- @@ -0,0 +1,557 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.scan.filter.executer; + +import java.io.IOException; +import java.util.BitSet; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datastore.block.SegmentProperties; +import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk; +import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk; +import org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionDataChunk; +import org.apache.carbondata.core.datastore.chunk.impl.VariableLengthDimensionDataChunk; +import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; +import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension; +import org.apache.carbondata.core.scan.expression.Expression; +import org.apache.carbondata.core.scan.expression.conditional.GreaterThanEqualToExpression; +import org.apache.carbondata.core.scan.expression.conditional.GreaterThanExpression; +import org.apache.carbondata.core.scan.expression.conditional.LessThanEqualToExpression; +import org.apache.carbondata.core.scan.expression.conditional.LessThanExpression; +import org.apache.carbondata.core.scan.expression.exception.FilterUnsupportedException; +import org.apache.carbondata.core.scan.filter.FilterUtil; +import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo; +import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.MeasureColumnResolvedFilterInfo; +import org.apache.carbondata.core.scan.processor.BlocksChunkHolder; +import org.apache.carbondata.core.util.BitSetGroup; +import org.apache.carbondata.core.util.ByteUtil; +import org.apache.carbondata.core.util.CarbonUtil; + +public class RangeValueFilterExecuterImpl extends ValueBasedFilterExecuterImpl { + + private DimColumnResolvedFilterInfo dimColEvaluatorInfo; + private MeasureColumnResolvedFilterInfo msrColEvalutorInfo; + private AbsoluteTableIdentifier tableIdentifier; + private Expression exp; + private byte[][] filterRangesValues; + private SegmentProperties segmentProperties; + private boolean isDefaultValuePresentInFilter; + /** + * it has index at which given dimension is stored in file + */ + protected int dimensionBlocksIndex; + + /** + * flag to check whether the filter dimension is present in current block list of dimensions. + * Applicable for restructure scenarios + */ + protected boolean isDimensionPresentInCurrentBlock; + + public RangeValueFilterExecuterImpl(DimColumnResolvedFilterInfo dimColEvaluatorInfo, + MeasureColumnResolvedFilterInfo msrColEvaluatorInfo, Expression exp, + AbsoluteTableIdentifier tableIdentifier, byte[][] filterRangeValues, + SegmentProperties segmentProperties) { + + this.dimColEvaluatorInfo = dimColEvaluatorInfo; + this.msrColEvalutorInfo = msrColEvaluatorInfo; + this.exp = exp; + this.segmentProperties = segmentProperties; + this.tableIdentifier = tableIdentifier; + this.filterRangesValues = filterRangeValues; + initDimensionBlockIndexes(); + ifDefaultValueMatchesFilter(); + } + + /** + * This method will initialize the dimension info for the current block to be + * used for filtering the data + */ + private void initDimensionBlockIndexes() { + // find the dimension in the current block dimensions list + CarbonDimension dimensionFromCurrentBlock = + segmentProperties.getDimensionFromCurrentBlock(dimColEvaluatorInfo.getDimension()); + if (null != dimensionFromCurrentBlock) { + dimColEvaluatorInfo.setColumnIndex(dimensionFromCurrentBlock.getOrdinal()); + this.dimensionBlocksIndex = segmentProperties.getDimensionOrdinalToBlockMapping() + .get(dimensionFromCurrentBlock.getOrdinal()); + isDimensionPresentInCurrentBlock = true; + } + } + + /** + * This method will check whether default value is present in the given filter values + */ + private void ifDefaultValueMatchesFilter() { + isDefaultValuePresentInFilter = false; + if (this.isDimensionPresentInCurrentBlock) { + CarbonDimension dimension = this.dimColEvaluatorInfo.getDimension(); + byte[] defaultValue = dimension.getDefaultValue(); + if (null != defaultValue) { + int maxCompare = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(defaultValue, filterRangesValues[0]); + int minCompare = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterRangesValues[1], defaultValue); + + if (((isGreaterThan() && maxCompare > 0) || (isGreaterThanEqualTo() && maxCompare >= 0)) + && ((isLessThan() && minCompare > 0) || (isLessThanEqualTo() && minCompare >= 0))) { + isDefaultValuePresentInFilter = true; + } + } + } + } + + /** + * Method to apply the filter. + * @param blockChunkHolder + * @return + * @throws FilterUnsupportedException + * @throws IOException + */ + public BitSetGroup applyFilter(BlocksChunkHolder blockChunkHolder) + throws FilterUnsupportedException, IOException { + return applyNoAndDirectFilter(blockChunkHolder); + } + + /** + * Method to find presence of LessThan Expression. + * @return + */ + private boolean isLessThan() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof LessThanExpression) { + return true; + } + } + return false; + } + + /** + * Method to find presence of LessThanEqualTo Expression. + * @return + */ + private boolean isLessThanEqualTo() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof LessThanEqualToExpression) { + return true; + } + } + return false; + } + + /** + * Method to find presence of GreaterThan Expression. + * @return + */ + private boolean isGreaterThan() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof GreaterThanExpression) { + return true; + } + } + return false; + } + + /** + * Method to find presence of GreaterThanEqual Expression. + * @return + */ + private boolean isGreaterThanEqualTo() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof GreaterThanEqualToExpression) { + return true; + } + } + return false; + } + + /** + * Method to identify if scanning of Data Block required or not by comparing the Block Min and Max + * values and comparing them with filter min and max value. + * @param blockMinValue + * @param blockMaxValue + * @param filterValues + * @return + */ + public boolean isScanRequired(byte[] blockMinValue, byte[] blockMaxValue, byte[][] filterValues) { + boolean isScanRequired = true; + + boolean lessThan = isLessThan(); + boolean greaterThan = isGreaterThan(); + boolean greaterThanEqualTo = isGreaterThanEqualTo(); + boolean lessThanEqualTo = isLessThanEqualTo(); + + int filterMinlessThanBlockMin = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(blockMinValue, filterValues[0]); + int filterMaxLessThanBlockMin = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(blockMinValue, filterValues[1]); + + int filterMinGreaterThanBlockMax = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterValues[0], blockMaxValue); + int filterMaxGreaterThanBlockMax = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterValues[1], blockMaxValue); + + // if any filter value is in range than this block needs to be + // scanned less than equal to max range. + + if (isDimensionPresentInCurrentBlock == true) { + if (((lessThan == true) && (filterMaxLessThanBlockMin >= 0)) || + ((lessThanEqualTo == true) && (filterMaxLessThanBlockMin > 0)) || + ((greaterThan == true) && (filterMinGreaterThanBlockMax >= 0)) || + ((greaterThanEqualTo == true) && (filterMinGreaterThanBlockMax > 0))) { + isScanRequired = false; + } + return isScanRequired; + } else { + return isDefaultValuePresentInFilter; + } + } + + /** + * Method checks is the scan lies within the range values or not. + * @param blockMaxValue + * @param blockMinValue + * @return + */ + @Override public BitSet isScanRequired(byte[][] blockMaxValue, byte[][] blockMinValue) { + BitSet bitSet = new BitSet(1); + byte[][] filterValues = this.filterRangesValues; + int columnIndex = this.dimColEvaluatorInfo.getColumnIndex(); + boolean isScanRequired = + isScanRequired(blockMinValue[columnIndex], blockMaxValue[columnIndex], filterValues); + if (isScanRequired) { + bitSet.set(0); + } + return bitSet; + } + + /** + * Method to apply the Range Filter. + * @param blockChunkHolder + * @return + * @throws FilterUnsupportedException + * @throws IOException + */ + public BitSetGroup applyNoAndDirectFilter(BlocksChunkHolder blockChunkHolder) + throws FilterUnsupportedException, IOException { + + // select all rows if dimension does not exists in the current block + if (!isDimensionPresentInCurrentBlock) { + int numberOfRows = blockChunkHolder.getDataBlock().nodeSize(); + return FilterUtil + .createBitSetGroupWithDefaultValue(blockChunkHolder.getDataBlock().numberOfPages(), + numberOfRows, true); + } + + int blockIndex = segmentProperties.getDimensionOrdinalToBlockMapping() + .get(dimColEvaluatorInfo.getColumnIndex()); + + if (null == blockChunkHolder.getDimensionRawDataChunk()[blockIndex]) { + blockChunkHolder.getDimensionRawDataChunk()[blockIndex] = blockChunkHolder.getDataBlock() + .getDimensionChunk(blockChunkHolder.getFileReader(), blockIndex); + } + --- End diff -- Use AllSetBitset in case complete blocklet is selected, using blocklet min max --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user gvramana commented on a diff in the pull request:
https://github.com/apache/incubator-carbondata/pull/670#discussion_r106800140 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java --- @@ -0,0 +1,557 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.scan.filter.executer; + +import java.io.IOException; +import java.util.BitSet; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datastore.block.SegmentProperties; +import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk; +import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk; +import org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionDataChunk; +import org.apache.carbondata.core.datastore.chunk.impl.VariableLengthDimensionDataChunk; +import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; +import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension; +import org.apache.carbondata.core.scan.expression.Expression; +import org.apache.carbondata.core.scan.expression.conditional.GreaterThanEqualToExpression; +import org.apache.carbondata.core.scan.expression.conditional.GreaterThanExpression; +import org.apache.carbondata.core.scan.expression.conditional.LessThanEqualToExpression; +import org.apache.carbondata.core.scan.expression.conditional.LessThanExpression; +import org.apache.carbondata.core.scan.expression.exception.FilterUnsupportedException; +import org.apache.carbondata.core.scan.filter.FilterUtil; +import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo; +import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.MeasureColumnResolvedFilterInfo; +import org.apache.carbondata.core.scan.processor.BlocksChunkHolder; +import org.apache.carbondata.core.util.BitSetGroup; +import org.apache.carbondata.core.util.ByteUtil; +import org.apache.carbondata.core.util.CarbonUtil; + +public class RangeValueFilterExecuterImpl extends ValueBasedFilterExecuterImpl { + + private DimColumnResolvedFilterInfo dimColEvaluatorInfo; + private MeasureColumnResolvedFilterInfo msrColEvalutorInfo; + private AbsoluteTableIdentifier tableIdentifier; + private Expression exp; + private byte[][] filterRangesValues; + private SegmentProperties segmentProperties; + private boolean isDefaultValuePresentInFilter; + /** + * it has index at which given dimension is stored in file + */ + protected int dimensionBlocksIndex; + + /** + * flag to check whether the filter dimension is present in current block list of dimensions. + * Applicable for restructure scenarios + */ + protected boolean isDimensionPresentInCurrentBlock; + + public RangeValueFilterExecuterImpl(DimColumnResolvedFilterInfo dimColEvaluatorInfo, + MeasureColumnResolvedFilterInfo msrColEvaluatorInfo, Expression exp, + AbsoluteTableIdentifier tableIdentifier, byte[][] filterRangeValues, + SegmentProperties segmentProperties) { + + this.dimColEvaluatorInfo = dimColEvaluatorInfo; + this.msrColEvalutorInfo = msrColEvaluatorInfo; + this.exp = exp; + this.segmentProperties = segmentProperties; + this.tableIdentifier = tableIdentifier; + this.filterRangesValues = filterRangeValues; + initDimensionBlockIndexes(); + ifDefaultValueMatchesFilter(); + } + + /** + * This method will initialize the dimension info for the current block to be + * used for filtering the data + */ + private void initDimensionBlockIndexes() { + // find the dimension in the current block dimensions list + CarbonDimension dimensionFromCurrentBlock = + segmentProperties.getDimensionFromCurrentBlock(dimColEvaluatorInfo.getDimension()); + if (null != dimensionFromCurrentBlock) { + dimColEvaluatorInfo.setColumnIndex(dimensionFromCurrentBlock.getOrdinal()); + this.dimensionBlocksIndex = segmentProperties.getDimensionOrdinalToBlockMapping() + .get(dimensionFromCurrentBlock.getOrdinal()); + isDimensionPresentInCurrentBlock = true; + } + } + + /** + * This method will check whether default value is present in the given filter values + */ + private void ifDefaultValueMatchesFilter() { + isDefaultValuePresentInFilter = false; + if (this.isDimensionPresentInCurrentBlock) { + CarbonDimension dimension = this.dimColEvaluatorInfo.getDimension(); + byte[] defaultValue = dimension.getDefaultValue(); + if (null != defaultValue) { + int maxCompare = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(defaultValue, filterRangesValues[0]); + int minCompare = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterRangesValues[1], defaultValue); + + if (((isGreaterThan() && maxCompare > 0) || (isGreaterThanEqualTo() && maxCompare >= 0)) + && ((isLessThan() && minCompare > 0) || (isLessThanEqualTo() && minCompare >= 0))) { + isDefaultValuePresentInFilter = true; + } + } + } + } + + /** + * Method to apply the filter. + * @param blockChunkHolder + * @return + * @throws FilterUnsupportedException + * @throws IOException + */ + public BitSetGroup applyFilter(BlocksChunkHolder blockChunkHolder) + throws FilterUnsupportedException, IOException { + return applyNoAndDirectFilter(blockChunkHolder); + } + + /** + * Method to find presence of LessThan Expression. + * @return + */ + private boolean isLessThan() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof LessThanExpression) { + return true; + } + } + return false; + } + + /** + * Method to find presence of LessThanEqualTo Expression. + * @return + */ + private boolean isLessThanEqualTo() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof LessThanEqualToExpression) { + return true; + } + } + return false; + } + + /** + * Method to find presence of GreaterThan Expression. + * @return + */ + private boolean isGreaterThan() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof GreaterThanExpression) { + return true; + } + } + return false; + } + + /** + * Method to find presence of GreaterThanEqual Expression. + * @return + */ + private boolean isGreaterThanEqualTo() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof GreaterThanEqualToExpression) { + return true; + } + } + return false; + } + + /** + * Method to identify if scanning of Data Block required or not by comparing the Block Min and Max + * values and comparing them with filter min and max value. + * @param blockMinValue + * @param blockMaxValue + * @param filterValues + * @return + */ + public boolean isScanRequired(byte[] blockMinValue, byte[] blockMaxValue, byte[][] filterValues) { + boolean isScanRequired = true; + + boolean lessThan = isLessThan(); + boolean greaterThan = isGreaterThan(); + boolean greaterThanEqualTo = isGreaterThanEqualTo(); + boolean lessThanEqualTo = isLessThanEqualTo(); + + int filterMinlessThanBlockMin = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(blockMinValue, filterValues[0]); + int filterMaxLessThanBlockMin = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(blockMinValue, filterValues[1]); + + int filterMinGreaterThanBlockMax = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterValues[0], blockMaxValue); + int filterMaxGreaterThanBlockMax = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterValues[1], blockMaxValue); + + // if any filter value is in range than this block needs to be + // scanned less than equal to max range. + + if (isDimensionPresentInCurrentBlock == true) { + if (((lessThan == true) && (filterMaxLessThanBlockMin >= 0)) || + ((lessThanEqualTo == true) && (filterMaxLessThanBlockMin > 0)) || + ((greaterThan == true) && (filterMinGreaterThanBlockMax >= 0)) || + ((greaterThanEqualTo == true) && (filterMinGreaterThanBlockMax > 0))) { + isScanRequired = false; + } + return isScanRequired; + } else { + return isDefaultValuePresentInFilter; + } + } + + /** + * Method checks is the scan lies within the range values or not. + * @param blockMaxValue + * @param blockMinValue + * @return + */ + @Override public BitSet isScanRequired(byte[][] blockMaxValue, byte[][] blockMinValue) { + BitSet bitSet = new BitSet(1); + byte[][] filterValues = this.filterRangesValues; + int columnIndex = this.dimColEvaluatorInfo.getColumnIndex(); + boolean isScanRequired = + isScanRequired(blockMinValue[columnIndex], blockMaxValue[columnIndex], filterValues); + if (isScanRequired) { + bitSet.set(0); + } + return bitSet; + } + + /** + * Method to apply the Range Filter. + * @param blockChunkHolder + * @return + * @throws FilterUnsupportedException + * @throws IOException + */ + public BitSetGroup applyNoAndDirectFilter(BlocksChunkHolder blockChunkHolder) + throws FilterUnsupportedException, IOException { + + // select all rows if dimension does not exists in the current block + if (!isDimensionPresentInCurrentBlock) { + int numberOfRows = blockChunkHolder.getDataBlock().nodeSize(); + return FilterUtil + .createBitSetGroupWithDefaultValue(blockChunkHolder.getDataBlock().numberOfPages(), + numberOfRows, true); + } + + int blockIndex = segmentProperties.getDimensionOrdinalToBlockMapping() + .get(dimColEvaluatorInfo.getColumnIndex()); + + if (null == blockChunkHolder.getDimensionRawDataChunk()[blockIndex]) { + blockChunkHolder.getDimensionRawDataChunk()[blockIndex] = blockChunkHolder.getDataBlock() + .getDimensionChunk(blockChunkHolder.getFileReader(), blockIndex); + } + + DimensionRawColumnChunk rawColumnChunk = + blockChunkHolder.getDimensionRawDataChunk()[blockIndex]; + BitSetGroup bitSetGroup = new BitSetGroup(rawColumnChunk.getPagesCount()); + for (int i = 0; i < rawColumnChunk.getPagesCount(); i++) { + if (rawColumnChunk.getMaxValues() != null) { + if (isScanRequired(rawColumnChunk.getMinValues()[i], rawColumnChunk.getMaxValues()[i], + this.filterRangesValues)) { + if (isRangeFullyCovered(rawColumnChunk.getMinValues()[i], + rawColumnChunk.getMaxValues()[i], this.filterRangesValues)) { + // Set all the bits in this case as filter Min Max values cover the whole block. + BitSet bitSet = new BitSet(rawColumnChunk.getRowCount()[i]); + bitSet.flip(0, rawColumnChunk.getRowCount()[i]); --- End diff -- check if 0 to be set or 1 --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user sounakr commented on a diff in the pull request:
https://github.com/apache/incubator-carbondata/pull/670#discussion_r106858685 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/expression/BinaryExpression.java --- @@ -38,4 +38,17 @@ public Expression getRight() { return right; } + @Override public void setChildren(Expression oldExpr, Expression newExpr) { --- End diff -- Done --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/incubator-carbondata/pull/670 Build Success with Spark 1.6.2, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder/1242/ --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user sounakr commented on a diff in the pull request:
https://github.com/apache/incubator-carbondata/pull/670#discussion_r107077246 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java --- @@ -0,0 +1,557 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.scan.filter.executer; + +import java.io.IOException; +import java.util.BitSet; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datastore.block.SegmentProperties; +import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk; +import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk; +import org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionDataChunk; +import org.apache.carbondata.core.datastore.chunk.impl.VariableLengthDimensionDataChunk; +import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; +import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension; +import org.apache.carbondata.core.scan.expression.Expression; +import org.apache.carbondata.core.scan.expression.conditional.GreaterThanEqualToExpression; +import org.apache.carbondata.core.scan.expression.conditional.GreaterThanExpression; +import org.apache.carbondata.core.scan.expression.conditional.LessThanEqualToExpression; +import org.apache.carbondata.core.scan.expression.conditional.LessThanExpression; +import org.apache.carbondata.core.scan.expression.exception.FilterUnsupportedException; +import org.apache.carbondata.core.scan.filter.FilterUtil; +import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo; +import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.MeasureColumnResolvedFilterInfo; +import org.apache.carbondata.core.scan.processor.BlocksChunkHolder; +import org.apache.carbondata.core.util.BitSetGroup; +import org.apache.carbondata.core.util.ByteUtil; +import org.apache.carbondata.core.util.CarbonUtil; + +public class RangeValueFilterExecuterImpl extends ValueBasedFilterExecuterImpl { + + private DimColumnResolvedFilterInfo dimColEvaluatorInfo; + private MeasureColumnResolvedFilterInfo msrColEvalutorInfo; + private AbsoluteTableIdentifier tableIdentifier; + private Expression exp; + private byte[][] filterRangesValues; + private SegmentProperties segmentProperties; + private boolean isDefaultValuePresentInFilter; + /** + * it has index at which given dimension is stored in file + */ + protected int dimensionBlocksIndex; + + /** + * flag to check whether the filter dimension is present in current block list of dimensions. + * Applicable for restructure scenarios + */ + protected boolean isDimensionPresentInCurrentBlock; + + public RangeValueFilterExecuterImpl(DimColumnResolvedFilterInfo dimColEvaluatorInfo, + MeasureColumnResolvedFilterInfo msrColEvaluatorInfo, Expression exp, + AbsoluteTableIdentifier tableIdentifier, byte[][] filterRangeValues, + SegmentProperties segmentProperties) { + + this.dimColEvaluatorInfo = dimColEvaluatorInfo; + this.msrColEvalutorInfo = msrColEvaluatorInfo; + this.exp = exp; + this.segmentProperties = segmentProperties; + this.tableIdentifier = tableIdentifier; + this.filterRangesValues = filterRangeValues; + initDimensionBlockIndexes(); + ifDefaultValueMatchesFilter(); + } + + /** + * This method will initialize the dimension info for the current block to be + * used for filtering the data + */ + private void initDimensionBlockIndexes() { + // find the dimension in the current block dimensions list + CarbonDimension dimensionFromCurrentBlock = + segmentProperties.getDimensionFromCurrentBlock(dimColEvaluatorInfo.getDimension()); + if (null != dimensionFromCurrentBlock) { + dimColEvaluatorInfo.setColumnIndex(dimensionFromCurrentBlock.getOrdinal()); + this.dimensionBlocksIndex = segmentProperties.getDimensionOrdinalToBlockMapping() + .get(dimensionFromCurrentBlock.getOrdinal()); + isDimensionPresentInCurrentBlock = true; + } + } + + /** + * This method will check whether default value is present in the given filter values + */ + private void ifDefaultValueMatchesFilter() { + isDefaultValuePresentInFilter = false; + if (this.isDimensionPresentInCurrentBlock) { + CarbonDimension dimension = this.dimColEvaluatorInfo.getDimension(); + byte[] defaultValue = dimension.getDefaultValue(); + if (null != defaultValue) { + int maxCompare = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(defaultValue, filterRangesValues[0]); + int minCompare = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterRangesValues[1], defaultValue); + + if (((isGreaterThan() && maxCompare > 0) || (isGreaterThanEqualTo() && maxCompare >= 0)) + && ((isLessThan() && minCompare > 0) || (isLessThanEqualTo() && minCompare >= 0))) { + isDefaultValuePresentInFilter = true; + } + } + } + } + + /** + * Method to apply the filter. + * @param blockChunkHolder + * @return + * @throws FilterUnsupportedException + * @throws IOException + */ + public BitSetGroup applyFilter(BlocksChunkHolder blockChunkHolder) + throws FilterUnsupportedException, IOException { + return applyNoAndDirectFilter(blockChunkHolder); + } + + /** + * Method to find presence of LessThan Expression. + * @return + */ + private boolean isLessThan() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof LessThanExpression) { + return true; + } + } + return false; + } + + /** + * Method to find presence of LessThanEqualTo Expression. + * @return + */ + private boolean isLessThanEqualTo() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof LessThanEqualToExpression) { + return true; + } + } + return false; + } + + /** + * Method to find presence of GreaterThan Expression. + * @return + */ + private boolean isGreaterThan() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof GreaterThanExpression) { + return true; + } + } + return false; + } + + /** + * Method to find presence of GreaterThanEqual Expression. + * @return + */ + private boolean isGreaterThanEqualTo() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof GreaterThanEqualToExpression) { + return true; + } + } + return false; + } + + /** + * Method to identify if scanning of Data Block required or not by comparing the Block Min and Max + * values and comparing them with filter min and max value. + * @param blockMinValue + * @param blockMaxValue + * @param filterValues + * @return + */ + public boolean isScanRequired(byte[] blockMinValue, byte[] blockMaxValue, byte[][] filterValues) { + boolean isScanRequired = true; + + boolean lessThan = isLessThan(); + boolean greaterThan = isGreaterThan(); + boolean greaterThanEqualTo = isGreaterThanEqualTo(); + boolean lessThanEqualTo = isLessThanEqualTo(); + + int filterMinlessThanBlockMin = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(blockMinValue, filterValues[0]); + int filterMaxLessThanBlockMin = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(blockMinValue, filterValues[1]); + + int filterMinGreaterThanBlockMax = --- End diff -- Done --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user sounakr commented on a diff in the pull request:
https://github.com/apache/incubator-carbondata/pull/670#discussion_r107077923 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java --- @@ -0,0 +1,557 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.scan.filter.executer; + +import java.io.IOException; +import java.util.BitSet; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datastore.block.SegmentProperties; +import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk; +import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk; +import org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionDataChunk; +import org.apache.carbondata.core.datastore.chunk.impl.VariableLengthDimensionDataChunk; +import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; +import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension; +import org.apache.carbondata.core.scan.expression.Expression; +import org.apache.carbondata.core.scan.expression.conditional.GreaterThanEqualToExpression; +import org.apache.carbondata.core.scan.expression.conditional.GreaterThanExpression; +import org.apache.carbondata.core.scan.expression.conditional.LessThanEqualToExpression; +import org.apache.carbondata.core.scan.expression.conditional.LessThanExpression; +import org.apache.carbondata.core.scan.expression.exception.FilterUnsupportedException; +import org.apache.carbondata.core.scan.filter.FilterUtil; +import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo; +import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.MeasureColumnResolvedFilterInfo; +import org.apache.carbondata.core.scan.processor.BlocksChunkHolder; +import org.apache.carbondata.core.util.BitSetGroup; +import org.apache.carbondata.core.util.ByteUtil; +import org.apache.carbondata.core.util.CarbonUtil; + +public class RangeValueFilterExecuterImpl extends ValueBasedFilterExecuterImpl { + + private DimColumnResolvedFilterInfo dimColEvaluatorInfo; + private MeasureColumnResolvedFilterInfo msrColEvalutorInfo; + private AbsoluteTableIdentifier tableIdentifier; + private Expression exp; + private byte[][] filterRangesValues; + private SegmentProperties segmentProperties; + private boolean isDefaultValuePresentInFilter; + /** + * it has index at which given dimension is stored in file + */ + protected int dimensionBlocksIndex; + + /** + * flag to check whether the filter dimension is present in current block list of dimensions. + * Applicable for restructure scenarios + */ + protected boolean isDimensionPresentInCurrentBlock; + + public RangeValueFilterExecuterImpl(DimColumnResolvedFilterInfo dimColEvaluatorInfo, + MeasureColumnResolvedFilterInfo msrColEvaluatorInfo, Expression exp, + AbsoluteTableIdentifier tableIdentifier, byte[][] filterRangeValues, + SegmentProperties segmentProperties) { + + this.dimColEvaluatorInfo = dimColEvaluatorInfo; + this.msrColEvalutorInfo = msrColEvaluatorInfo; + this.exp = exp; + this.segmentProperties = segmentProperties; + this.tableIdentifier = tableIdentifier; + this.filterRangesValues = filterRangeValues; + initDimensionBlockIndexes(); + ifDefaultValueMatchesFilter(); + } + + /** + * This method will initialize the dimension info for the current block to be + * used for filtering the data + */ + private void initDimensionBlockIndexes() { + // find the dimension in the current block dimensions list + CarbonDimension dimensionFromCurrentBlock = + segmentProperties.getDimensionFromCurrentBlock(dimColEvaluatorInfo.getDimension()); + if (null != dimensionFromCurrentBlock) { + dimColEvaluatorInfo.setColumnIndex(dimensionFromCurrentBlock.getOrdinal()); + this.dimensionBlocksIndex = segmentProperties.getDimensionOrdinalToBlockMapping() + .get(dimensionFromCurrentBlock.getOrdinal()); + isDimensionPresentInCurrentBlock = true; + } + } + + /** + * This method will check whether default value is present in the given filter values + */ + private void ifDefaultValueMatchesFilter() { + isDefaultValuePresentInFilter = false; + if (this.isDimensionPresentInCurrentBlock) { + CarbonDimension dimension = this.dimColEvaluatorInfo.getDimension(); + byte[] defaultValue = dimension.getDefaultValue(); + if (null != defaultValue) { + int maxCompare = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(defaultValue, filterRangesValues[0]); + int minCompare = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterRangesValues[1], defaultValue); + + if (((isGreaterThan() && maxCompare > 0) || (isGreaterThanEqualTo() && maxCompare >= 0)) + && ((isLessThan() && minCompare > 0) || (isLessThanEqualTo() && minCompare >= 0))) { + isDefaultValuePresentInFilter = true; + } + } + } + } + + /** + * Method to apply the filter. + * @param blockChunkHolder + * @return + * @throws FilterUnsupportedException + * @throws IOException + */ + public BitSetGroup applyFilter(BlocksChunkHolder blockChunkHolder) + throws FilterUnsupportedException, IOException { + return applyNoAndDirectFilter(blockChunkHolder); + } + + /** + * Method to find presence of LessThan Expression. + * @return + */ + private boolean isLessThan() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof LessThanExpression) { + return true; + } + } + return false; + } + + /** + * Method to find presence of LessThanEqualTo Expression. + * @return + */ + private boolean isLessThanEqualTo() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof LessThanEqualToExpression) { + return true; + } + } + return false; + } + + /** + * Method to find presence of GreaterThan Expression. + * @return + */ + private boolean isGreaterThan() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof GreaterThanExpression) { + return true; + } + } + return false; + } + + /** + * Method to find presence of GreaterThanEqual Expression. + * @return + */ + private boolean isGreaterThanEqualTo() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof GreaterThanEqualToExpression) { + return true; + } + } + return false; + } + + /** + * Method to identify if scanning of Data Block required or not by comparing the Block Min and Max + * values and comparing them with filter min and max value. + * @param blockMinValue + * @param blockMaxValue + * @param filterValues + * @return + */ + public boolean isScanRequired(byte[] blockMinValue, byte[] blockMaxValue, byte[][] filterValues) { + boolean isScanRequired = true; + + boolean lessThan = isLessThan(); --- End diff -- Done --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user sounakr commented on a diff in the pull request:
https://github.com/apache/incubator-carbondata/pull/670#discussion_r107077907 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java --- @@ -0,0 +1,557 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.scan.filter.executer; + +import java.io.IOException; +import java.util.BitSet; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datastore.block.SegmentProperties; +import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk; +import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk; +import org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionDataChunk; +import org.apache.carbondata.core.datastore.chunk.impl.VariableLengthDimensionDataChunk; +import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; +import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension; +import org.apache.carbondata.core.scan.expression.Expression; +import org.apache.carbondata.core.scan.expression.conditional.GreaterThanEqualToExpression; +import org.apache.carbondata.core.scan.expression.conditional.GreaterThanExpression; +import org.apache.carbondata.core.scan.expression.conditional.LessThanEqualToExpression; +import org.apache.carbondata.core.scan.expression.conditional.LessThanExpression; +import org.apache.carbondata.core.scan.expression.exception.FilterUnsupportedException; +import org.apache.carbondata.core.scan.filter.FilterUtil; +import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo; +import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.MeasureColumnResolvedFilterInfo; +import org.apache.carbondata.core.scan.processor.BlocksChunkHolder; +import org.apache.carbondata.core.util.BitSetGroup; +import org.apache.carbondata.core.util.ByteUtil; +import org.apache.carbondata.core.util.CarbonUtil; + +public class RangeValueFilterExecuterImpl extends ValueBasedFilterExecuterImpl { + + private DimColumnResolvedFilterInfo dimColEvaluatorInfo; + private MeasureColumnResolvedFilterInfo msrColEvalutorInfo; + private AbsoluteTableIdentifier tableIdentifier; + private Expression exp; + private byte[][] filterRangesValues; + private SegmentProperties segmentProperties; + private boolean isDefaultValuePresentInFilter; + /** + * it has index at which given dimension is stored in file + */ + protected int dimensionBlocksIndex; + + /** + * flag to check whether the filter dimension is present in current block list of dimensions. + * Applicable for restructure scenarios + */ + protected boolean isDimensionPresentInCurrentBlock; + + public RangeValueFilterExecuterImpl(DimColumnResolvedFilterInfo dimColEvaluatorInfo, + MeasureColumnResolvedFilterInfo msrColEvaluatorInfo, Expression exp, + AbsoluteTableIdentifier tableIdentifier, byte[][] filterRangeValues, + SegmentProperties segmentProperties) { + + this.dimColEvaluatorInfo = dimColEvaluatorInfo; + this.msrColEvalutorInfo = msrColEvaluatorInfo; + this.exp = exp; + this.segmentProperties = segmentProperties; + this.tableIdentifier = tableIdentifier; + this.filterRangesValues = filterRangeValues; + initDimensionBlockIndexes(); + ifDefaultValueMatchesFilter(); + } + + /** + * This method will initialize the dimension info for the current block to be + * used for filtering the data + */ + private void initDimensionBlockIndexes() { + // find the dimension in the current block dimensions list + CarbonDimension dimensionFromCurrentBlock = + segmentProperties.getDimensionFromCurrentBlock(dimColEvaluatorInfo.getDimension()); + if (null != dimensionFromCurrentBlock) { + dimColEvaluatorInfo.setColumnIndex(dimensionFromCurrentBlock.getOrdinal()); + this.dimensionBlocksIndex = segmentProperties.getDimensionOrdinalToBlockMapping() + .get(dimensionFromCurrentBlock.getOrdinal()); + isDimensionPresentInCurrentBlock = true; + } + } + + /** + * This method will check whether default value is present in the given filter values + */ + private void ifDefaultValueMatchesFilter() { + isDefaultValuePresentInFilter = false; + if (this.isDimensionPresentInCurrentBlock) { + CarbonDimension dimension = this.dimColEvaluatorInfo.getDimension(); + byte[] defaultValue = dimension.getDefaultValue(); + if (null != defaultValue) { + int maxCompare = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(defaultValue, filterRangesValues[0]); + int minCompare = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterRangesValues[1], defaultValue); + + if (((isGreaterThan() && maxCompare > 0) || (isGreaterThanEqualTo() && maxCompare >= 0)) + && ((isLessThan() && minCompare > 0) || (isLessThanEqualTo() && minCompare >= 0))) { + isDefaultValuePresentInFilter = true; + } + } + } + } + + /** + * Method to apply the filter. + * @param blockChunkHolder + * @return + * @throws FilterUnsupportedException + * @throws IOException + */ + public BitSetGroup applyFilter(BlocksChunkHolder blockChunkHolder) + throws FilterUnsupportedException, IOException { + return applyNoAndDirectFilter(blockChunkHolder); + } + + /** + * Method to find presence of LessThan Expression. + * @return + */ + private boolean isLessThan() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof LessThanExpression) { + return true; + } + } + return false; + } + + /** + * Method to find presence of LessThanEqualTo Expression. + * @return + */ + private boolean isLessThanEqualTo() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof LessThanEqualToExpression) { + return true; + } + } + return false; + } + + /** + * Method to find presence of GreaterThan Expression. + * @return + */ + private boolean isGreaterThan() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof GreaterThanExpression) { + return true; + } + } + return false; + } + + /** + * Method to find presence of GreaterThanEqual Expression. + * @return + */ + private boolean isGreaterThanEqualTo() { + for (Expression result : this.exp.getChildren()) { + if (result instanceof GreaterThanEqualToExpression) { + return true; + } + } + return false; + } + + /** + * Method to identify if scanning of Data Block required or not by comparing the Block Min and Max + * values and comparing them with filter min and max value. + * @param blockMinValue + * @param blockMaxValue + * @param filterValues + * @return + */ + public boolean isScanRequired(byte[] blockMinValue, byte[] blockMaxValue, byte[][] filterValues) { + boolean isScanRequired = true; + + boolean lessThan = isLessThan(); + boolean greaterThan = isGreaterThan(); + boolean greaterThanEqualTo = isGreaterThanEqualTo(); + boolean lessThanEqualTo = isLessThanEqualTo(); + + int filterMinlessThanBlockMin = --- End diff -- Done --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
Free forum by Nabble | Edit this page |