Github user sounakr commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1079#discussion_r125190897 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java --- @@ -209,9 +233,29 @@ private static FilterExecuter getIncludeFilterExecuter( * @return */ private static FilterExecuter getExcludeFilterExecuter( - DimColumnResolvedFilterInfo dimColResolvedFilterInfo, SegmentProperties segmentProperties) { + DimColumnResolvedFilterInfo dimColResolvedFilterInfo, + MeasureColumnResolvedFilterInfo msrColResolvedFilterInfo, + SegmentProperties segmentProperties) { - if (dimColResolvedFilterInfo.getDimension().isColumnar()) { + if (null != msrColResolvedFilterInfo && msrColResolvedFilterInfo.getMeasure().isColumnar()) { --- End diff -- Done. Removed --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user sounakr commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1079#discussion_r125190940 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java --- @@ -395,6 +440,58 @@ public static DimColumnFilterInfo getNoDictionaryValKeyMemberForFilter( } /** + * This method will get the no dictionary data based on filters and same + * will be in ColumnFilterInfo + * + * @param evaluateResultListFinal + * @param isIncludeFilter + * @return ColumnFilterInfo + */ + public static ColumnFilterInfo getMeasureValKeyMemberForFilter( + List<String> evaluateResultListFinal, boolean isIncludeFilter, DataType dataType, + CarbonMeasure carbonMeasure) throws FilterUnsupportedException { + List<byte[]> filterValuesList = new ArrayList<byte[]>(20); + String result = null; + try { + int length = evaluateResultListFinal.size(); + for (int i = 0; i < length; i++) { + result = evaluateResultListFinal.get(i); + if (CarbonCommonConstants.MEMBER_DEFAULT_VAL.equals(result)) { + filterValuesList.add(new byte[0]); + continue; + } + // TODO have to understand what method to be used for measures. + // filterValuesList + // .add(DataTypeUtil.getBytesBasedOnDataTypeForNoDictionaryColumn(result, dataType)); --- End diff -- Done. Removed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user sounakr commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1079#discussion_r125191345 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java --- @@ -395,6 +440,58 @@ public static DimColumnFilterInfo getNoDictionaryValKeyMemberForFilter( } /** + * This method will get the no dictionary data based on filters and same + * will be in ColumnFilterInfo + * + * @param evaluateResultListFinal + * @param isIncludeFilter + * @return ColumnFilterInfo + */ + public static ColumnFilterInfo getMeasureValKeyMemberForFilter( + List<String> evaluateResultListFinal, boolean isIncludeFilter, DataType dataType, + CarbonMeasure carbonMeasure) throws FilterUnsupportedException { + List<byte[]> filterValuesList = new ArrayList<byte[]>(20); + String result = null; + try { + int length = evaluateResultListFinal.size(); + for (int i = 0; i < length; i++) { + result = evaluateResultListFinal.get(i); + if (CarbonCommonConstants.MEMBER_DEFAULT_VAL.equals(result)) { + filterValuesList.add(new byte[0]); + continue; + } + // TODO have to understand what method to be used for measures. + // filterValuesList + // .add(DataTypeUtil.getBytesBasedOnDataTypeForNoDictionaryColumn(result, dataType)); + + filterValuesList + .add(DataTypeUtil.getMeasureByteArrayBasedOnDataTypes(result, dataType, carbonMeasure)); + + } + } catch (Throwable ex) { + throw new FilterUnsupportedException("Unsupported Filter condition: " + result, ex); + } + + Comparator<byte[]> filterMeasureComaparator = new Comparator<byte[]>() { + + @Override public int compare(byte[] filterMember1, byte[] filterMember2) { + // TODO Auto-generated method stub + return ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterMember1, filterMember2); --- End diff -- We are converting String dataTypes into Bytes array and then saving into filterValueList. From populateFilterResolvedInfo itself we convert all dataType to strings and then pass it along. We may have to rectify and pass on actual datatype from populateFilterResolvedInfo. But all filterValue Comparision in measures are currently sequential, so there no chance of getting a wrong result. i.e. in Include, RowLevelLessThan, RowLevelLessThanEqual, RowLevelGrtThanEqual, RowLevelGrtThan. Also Range is not implemented yet for measures where 2 filter values should be in ascending order. In the next stage optimization we can hold the filter values in actual datatype and have comparator for each. This was comparision will be proper and we dont have to convert to datatype to byte and again back to object while doing actual comparision, we can carry object all along. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user sounakr commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1079#discussion_r125191362 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/resolver/ConditionalFilterResolverImpl.java --- @@ -198,21 +237,31 @@ public AbsoluteTableIdentifier getTableIdentifier() { */ public void getStartKey(SegmentProperties segmentProperties, long[] startKey, SortedMap<Integer, byte[]> setOfStartKeyByteArray, List<long[]> startKeyList) { - FilterUtil.getStartKey(dimColResolvedFilterInfo.getDimensionResolvedFilterInstance(), - segmentProperties, startKey, startKeyList); - FilterUtil.getStartKeyForNoDictionaryDimension(dimColResolvedFilterInfo, - segmentProperties, setOfStartKeyByteArray); + if (null != dimColResolvedFilterInfo) { + FilterUtil.getStartKey(dimColResolvedFilterInfo.getDimensionResolvedFilterInstance(), + segmentProperties, startKey, startKeyList); + FilterUtil.getStartKeyForNoDictionaryDimension(dimColResolvedFilterInfo, segmentProperties, + setOfStartKeyByteArray); + } +// else { +// FilterUtil.getStartKey(dimColResolvedFilterInfo.getDimensionResolvedFilterInstance(), +// segmentProperties, startKey, startKeyList); +// FilterUtil.getStartKeyForNoDictionaryDimension(dimColResolvedFilterInfo, segmentProperties, +// setOfStartKeyByteArray); +// } --- End diff -- Done. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user sounakr commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1079#discussion_r125191383 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/resolver/ConditionalFilterResolverImpl.java --- @@ -44,16 +44,22 @@ protected boolean isExpressionResolve; protected boolean isIncludeFilter; private DimColumnResolvedFilterInfo dimColResolvedFilterInfo; + private MeasureColumnResolvedFilterInfo msrColResolvedFilterInfo; private AbsoluteTableIdentifier tableIdentifier; + private boolean isMeasure; --- End diff -- Done. Removed --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user sounakr commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1079#discussion_r125191402 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/partition/PartitionFilterUtil.java --- @@ -107,6 +131,12 @@ public static Comparator getComparatorByDataType(DataType dataType) { } } + static class DecimalComparator implements Comparator<Object> { --- End diff -- Done. Removed --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user sounakr commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1079#discussion_r125191432 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/partition/PartitionFilterUtil.java --- @@ -76,24 +99,25 @@ public static Comparator getComparatorByDataType(DataType dataType) { static class DoubleComparator implements Comparator<Object> { @Override public int compare(Object key1, Object key2) { - double result = (double) key1 - (double) key2; - if (result < 0) { + double key1Double1 = (double)key1; --- End diff -- There is a scenario is a variable is a negative one the Key1 - Key2 wont give proper output. Better to check greater or less than operator. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user sounakr commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1079#discussion_r125191588 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/executer/MeasureColumnExecuterFilterInfo.java --- @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.scan.filter.executer; + +public class MeasureColumnExecuterFilterInfo { + + byte[][] filterKeys; --- End diff -- In Current Implementation kept filterkeys as Byte Array to keep it simple and in sync with dimention array. During actual comparision the filter keys are converted back to Objects and compared. In next phase optimization will change the Filter Keys to hold objects. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user sounakr commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1079#discussion_r125191629 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java --- @@ -74,80 +87,205 @@ private void ifDefaultValueMatchesFilter() { } } } + } else if (!msrColEvalutorInfoList.isEmpty() && !isMeasurePresentInCurrentBlock[0]) { + CarbonMeasure measure = this.msrColEvalutorInfoList.get(0).getMeasure(); + byte[] defaultValue = measure.getDefaultValue(); + if (null != defaultValue) { + for (int k = 0; k < filterRangeValues.length; k++) { + int maxCompare = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterRangeValues[k], defaultValue); --- End diff -- Currently Filter Keys are in ByteArray and values for restructuring is also same. Is this still required. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user sounakr commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1079#discussion_r125191734 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java --- @@ -74,80 +87,205 @@ private void ifDefaultValueMatchesFilter() { } } } + } else if (!msrColEvalutorInfoList.isEmpty() && !isMeasurePresentInCurrentBlock[0]) { + CarbonMeasure measure = this.msrColEvalutorInfoList.get(0).getMeasure(); + byte[] defaultValue = measure.getDefaultValue(); + if (null != defaultValue) { + for (int k = 0; k < filterRangeValues.length; k++) { + int maxCompare = + ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterRangeValues[k], defaultValue); + if (maxCompare < 0) { + isDefaultValuePresentInFilter = true; + break; + } + } + } } } @Override public BitSet isScanRequired(byte[][] blockMaxValue, byte[][] blockMinValue) { BitSet bitSet = new BitSet(1); - boolean isScanRequired = - isScanRequired(blockMaxValue[dimensionBlocksIndex[0]], filterRangeValues); + boolean isScanRequired = false; + byte[] maxValue = null; + if (isMeasurePresentInCurrentBlock[0] || isDimensionPresentInCurrentBlock[0]) { + if (isMeasurePresentInCurrentBlock[0]) { + maxValue = blockMaxValue[measureBlocksIndex[0] + lastDimensionColOrdinal]; + isScanRequired = + isScanRequired(maxValue, filterRangeValues, msrColEvalutorInfoList.get(0).getType()); + } else { + maxValue = blockMaxValue[dimensionBlocksIndex[0]]; + isScanRequired = isScanRequired(maxValue, filterRangeValues); + } + } else { + isScanRequired = isDefaultValuePresentInFilter; + } + if (isScanRequired) { bitSet.set(0); } return bitSet; } + private boolean isScanRequired(byte[] blockMaxValue, byte[][] filterValues) { boolean isScanRequired = false; - if (isDimensionPresentInCurrentBlock[0]) { - for (int k = 0; k < filterValues.length; k++) { - // filter value should be in range of max and min value i.e - // max>filtervalue>min - // so filter-max should be negative - int maxCompare = ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterValues[k], blockMaxValue); - // if any filter value is in range than this block needs to be - // scanned means always less than block max range. - if (maxCompare < 0) { - isScanRequired = true; - break; - } + for (int k = 0; k < filterValues.length; k++) { + // filter value should be in range of max and min value i.e + // max>filtervalue>min + // so filter-max should be negative + int maxCompare = ByteUtil.UnsafeComparer.INSTANCE.compareTo(filterValues[k], blockMaxValue); + // if any filter value is in range than this block needs to be + // scanned less than equal to max range. + if (maxCompare < 0) { + isScanRequired = true; + break; } - } else { - isScanRequired = isDefaultValuePresentInFilter; } return isScanRequired; } + private boolean isScanRequired(byte[] maxValue, byte[][] filterValue, + DataType dataType) { + for (int i = 0; i < filterValue.length; i++) { + if (filterValue[i].length == 0 || maxValue.length == 0) { + return isScanRequired(maxValue, filterValue); + } + switch (dataType) { --- End diff -- In Line 150 is a special Null Value case, rest of the cases comparator is being used. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user sounakr commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1079#discussion_r125191770 --- Diff: core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java --- @@ -113,6 +115,143 @@ public static Object getMeasureValueBasedOnDataType(String msrValue, DataType da } } + public static Object getMeasureObjectFromDataType(byte[] data, DataType dataType) { + ByteBuffer bb = ByteBuffer.wrap(data); + switch (dataType) { + case SHORT: + case INT: + case LONG: + return bb.getLong(); + case DECIMAL: + return byteToBigDecimal(data); + default: + return bb.getDouble(); + } + } + + /** + * This method will convert a given ByteArray to its specific type + * + * @param msrValue + * @param dataType + * @param carbonMeasure + * @return + */ + // public static byte[] getMeasureByteArrayBasedOnDataType(String msrValue, DataType dataType, + // CarbonMeasure carbonMeasure) { + // switch (dataType) { + // case DECIMAL: + // BigDecimal bigDecimal = + // new BigDecimal(msrValue).setScale(carbonMeasure.getScale(), RoundingMode.HALF_UP); + // return ByteUtil.toBytes(normalizeDecimalValue(bigDecimal, carbonMeasure.getPrecision())); + // case SHORT: + // return ByteUtil.toBytes((Short.parseShort(msrValue))); + // case INT: + // return ByteUtil.toBytes(Integer.parseInt(msrValue)); + // case LONG: + // return ByteUtil.toBytes(Long.valueOf(msrValue)); + // default: + // Double parsedValue = Double.valueOf(msrValue); + // if (Double.isInfinite(parsedValue) || Double.isNaN(parsedValue)) { + // return null; + // } + // return ByteUtil.toBytes(parsedValue); + // } + // } --- End diff -- Done --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user sounakr commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1079#discussion_r125191772 --- Diff: core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java --- @@ -113,6 +115,143 @@ public static Object getMeasureValueBasedOnDataType(String msrValue, DataType da } } + public static Object getMeasureObjectFromDataType(byte[] data, DataType dataType) { + ByteBuffer bb = ByteBuffer.wrap(data); + switch (dataType) { + case SHORT: + case INT: + case LONG: + return bb.getLong(); + case DECIMAL: + return byteToBigDecimal(data); + default: + return bb.getDouble(); + } + } + + /** + * This method will convert a given ByteArray to its specific type + * + * @param msrValue + * @param dataType + * @param carbonMeasure + * @return + */ + // public static byte[] getMeasureByteArrayBasedOnDataType(String msrValue, DataType dataType, + // CarbonMeasure carbonMeasure) { + // switch (dataType) { + // case DECIMAL: + // BigDecimal bigDecimal = + // new BigDecimal(msrValue).setScale(carbonMeasure.getScale(), RoundingMode.HALF_UP); + // return ByteUtil.toBytes(normalizeDecimalValue(bigDecimal, carbonMeasure.getPrecision())); + // case SHORT: + // return ByteUtil.toBytes((Short.parseShort(msrValue))); + // case INT: + // return ByteUtil.toBytes(Integer.parseInt(msrValue)); + // case LONG: + // return ByteUtil.toBytes(Long.valueOf(msrValue)); + // default: + // Double parsedValue = Double.valueOf(msrValue); + // if (Double.isInfinite(parsedValue) || Double.isNaN(parsedValue)) { + // return null; + // } + // return ByteUtil.toBytes(parsedValue); + // } + // } + public static byte[] getMeasureByteArrayBasedOnDataTypes(String msrValue, DataType dataType, + CarbonMeasure carbonMeasure) { + ByteBuffer b; + switch (dataType) { + case BYTE: + case SHORT: + case INT: + case LONG: + b = ByteBuffer.allocate(8); + b.putLong(Long.valueOf(msrValue)); + b.flip(); + return b.array(); + case DOUBLE: + b = ByteBuffer.allocate(8); + b.putDouble(Double.valueOf(msrValue)); + b.flip(); + return b.array(); + case DECIMAL: + BigDecimal bigDecimal = + new BigDecimal(msrValue).setScale(carbonMeasure.getScale(), RoundingMode.HALF_UP); + return DataTypeUtil + .bigDecimalToByte(normalizeDecimalValue(bigDecimal, carbonMeasure.getPrecision())); + default: + throw new IllegalArgumentException("Invalid data type: " + dataType); + } + } + + /** + * This method will convert a given ByteArray to its specific type + * + * @param msrValue + * @param dataType + * @param carbonMeasure + * @return + */ + public static byte[] getMeasureByteArrayBasedOnDataType(ColumnPage measurePage, int index, --- End diff -- Done --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user sounakr commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1079#discussion_r125191774 --- Diff: core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java --- @@ -113,6 +115,143 @@ public static Object getMeasureValueBasedOnDataType(String msrValue, DataType da } } + public static Object getMeasureObjectFromDataType(byte[] data, DataType dataType) { + ByteBuffer bb = ByteBuffer.wrap(data); + switch (dataType) { + case SHORT: + case INT: + case LONG: + return bb.getLong(); + case DECIMAL: + return byteToBigDecimal(data); + default: + return bb.getDouble(); + } + } + + /** + * This method will convert a given ByteArray to its specific type + * + * @param msrValue + * @param dataType + * @param carbonMeasure + * @return + */ + // public static byte[] getMeasureByteArrayBasedOnDataType(String msrValue, DataType dataType, + // CarbonMeasure carbonMeasure) { + // switch (dataType) { + // case DECIMAL: + // BigDecimal bigDecimal = + // new BigDecimal(msrValue).setScale(carbonMeasure.getScale(), RoundingMode.HALF_UP); + // return ByteUtil.toBytes(normalizeDecimalValue(bigDecimal, carbonMeasure.getPrecision())); + // case SHORT: + // return ByteUtil.toBytes((Short.parseShort(msrValue))); + // case INT: + // return ByteUtil.toBytes(Integer.parseInt(msrValue)); + // case LONG: + // return ByteUtil.toBytes(Long.valueOf(msrValue)); + // default: + // Double parsedValue = Double.valueOf(msrValue); + // if (Double.isInfinite(parsedValue) || Double.isNaN(parsedValue)) { + // return null; + // } + // return ByteUtil.toBytes(parsedValue); + // } + // } + public static byte[] getMeasureByteArrayBasedOnDataTypes(String msrValue, DataType dataType, + CarbonMeasure carbonMeasure) { + ByteBuffer b; + switch (dataType) { + case BYTE: + case SHORT: + case INT: + case LONG: + b = ByteBuffer.allocate(8); + b.putLong(Long.valueOf(msrValue)); + b.flip(); + return b.array(); + case DOUBLE: + b = ByteBuffer.allocate(8); + b.putDouble(Double.valueOf(msrValue)); + b.flip(); + return b.array(); + case DECIMAL: + BigDecimal bigDecimal = + new BigDecimal(msrValue).setScale(carbonMeasure.getScale(), RoundingMode.HALF_UP); + return DataTypeUtil + .bigDecimalToByte(normalizeDecimalValue(bigDecimal, carbonMeasure.getPrecision())); + default: + throw new IllegalArgumentException("Invalid data type: " + dataType); + } + } + + /** + * This method will convert a given ByteArray to its specific type + * + * @param msrValue + * @param dataType + * @param carbonMeasure + * @return + */ + public static byte[] getMeasureByteArrayBasedOnDataType(ColumnPage measurePage, int index, + DataType dataType, CarbonMeasure carbonMeasure) { + switch (dataType) { + case DECIMAL: + BigDecimal bigDecimal = new BigDecimal(measurePage.getDouble(index)) + .setScale(carbonMeasure.getScale(), RoundingMode.HALF_UP); + return ByteUtil.toBytes(normalizeDecimalValue(bigDecimal, carbonMeasure.getPrecision())); + case SHORT: + return ByteUtil.toBytes(measurePage.getShort(index)); + case INT: + return ByteUtil.toBytes(measurePage.getInt(index)); + case LONG: + return ByteUtil.toBytes(measurePage.getLong(index)); + default: + Double parsedValue = Double.valueOf(measurePage.getDouble(index)); + if (Double.isInfinite(parsedValue) || Double.isNaN(parsedValue)) { + return null; + } + return ByteUtil.toBytes(parsedValue); + } + } + + public static Object getMeasureObjectBasedOnDataType(ColumnPage measurePage, int index, + DataType dataType, CarbonMeasure carbonMeasure) { + // switch (dataType) { + // case DECIMAL: + // BigDecimal bigDecimal = new BigDecimal(measurePage.getDouble(index)) + // .setScale(carbonMeasure.getScale(), RoundingMode.HALF_UP); + // return normalizeDecimalValue(bigDecimal, carbonMeasure.getPrecision()); + // case SHORT: + // case INT: + // case LONG: + // return measurePage.getLong(index); + // default: + // Double parsedValue = Double.valueOf(measurePage.getDouble(index)); + // if (Double.isInfinite(parsedValue) || Double.isNaN(parsedValue)) { + // return null; + // } + // return parsedValue; + // } --- End diff -- Done. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user sounakr commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1079#discussion_r125191810 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java --- @@ -395,6 +440,58 @@ public static DimColumnFilterInfo getNoDictionaryValKeyMemberForFilter( } /** + * This method will get the no dictionary data based on filters and same + * will be in ColumnFilterInfo + * + * @param evaluateResultListFinal + * @param isIncludeFilter + * @return ColumnFilterInfo + */ + public static ColumnFilterInfo getMeasureValKeyMemberForFilter( + List<String> evaluateResultListFinal, boolean isIncludeFilter, DataType dataType, + CarbonMeasure carbonMeasure) throws FilterUnsupportedException { + List<byte[]> filterValuesList = new ArrayList<byte[]>(20); + String result = null; + try { + int length = evaluateResultListFinal.size(); + for (int i = 0; i < length; i++) { + result = evaluateResultListFinal.get(i); + if (CarbonCommonConstants.MEMBER_DEFAULT_VAL.equals(result)) { + filterValuesList.add(new byte[0]); + continue; + } + // TODO have to understand what method to be used for measures. + // filterValuesList + // .add(DataTypeUtil.getBytesBasedOnDataTypeForNoDictionaryColumn(result, dataType)); + + filterValuesList + .add(DataTypeUtil.getMeasureByteArrayBasedOnDataTypes(result, dataType, carbonMeasure)); --- End diff -- Currently we are storing filter keys of measures in byte array format. In next optimization phase will change to Object array of respective type to avoid conversion. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user sounakr commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1079#discussion_r125191929 --- Diff: core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java --- @@ -113,6 +115,143 @@ public static Object getMeasureValueBasedOnDataType(String msrValue, DataType da } } + public static Object getMeasureObjectFromDataType(byte[] data, DataType dataType) { + ByteBuffer bb = ByteBuffer.wrap(data); --- End diff -- Done --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user sounakr commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1079#discussion_r125198825 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java --- @@ -1042,12 +1144,17 @@ public static FilterExecuter getFilterExecuterTree( * @param dimension * @param dimColumnExecuterInfo */ - public static void prepareKeysFromSurrogates(DimColumnFilterInfo filterValues, + public static void prepareKeysFromSurrogates(ColumnFilterInfo filterValues, SegmentProperties segmentProperties, CarbonDimension dimension, - DimColumnExecuterFilterInfo dimColumnExecuterInfo) { - byte[][] keysBasedOnFilter = getKeyArray(filterValues, dimension, segmentProperties); - dimColumnExecuterInfo.setFilterKeys(keysBasedOnFilter); - + DimColumnExecuterFilterInfo dimColumnExecuterInfo, CarbonMeasure measures, + MeasureColumnExecuterFilterInfo msrColumnExecuterInfo) { + if (null != measures) { --- End diff -- This if check is required in order to setFilterKeys in respective measures or dimensions. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user sounakr commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1079#discussion_r125198925 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ExcludeFilterExecuterImpl.java --- @@ -18,56 +18,152 @@ import java.io.IOException; import java.util.BitSet; +import java.util.Comparator; import org.apache.carbondata.core.datastore.block.SegmentProperties; import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk; +import org.apache.carbondata.core.datastore.chunk.MeasureColumnDataChunk; import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk; +import org.apache.carbondata.core.datastore.chunk.impl.MeasureRawColumnChunk; +import org.apache.carbondata.core.metadata.datatype.DataType; import org.apache.carbondata.core.scan.filter.FilterUtil; +import org.apache.carbondata.core.scan.filter.partition.PartitionFilterUtil; import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo; +import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.MeasureColumnResolvedFilterInfo; import org.apache.carbondata.core.scan.processor.BlocksChunkHolder; import org.apache.carbondata.core.util.BitSetGroup; import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.DataTypeUtil; public class ExcludeFilterExecuterImpl implements FilterExecuter { protected DimColumnResolvedFilterInfo dimColEvaluatorInfo; protected DimColumnExecuterFilterInfo dimColumnExecuterInfo; + protected MeasureColumnResolvedFilterInfo msrColumnEvaluatorInfo; + protected MeasureColumnExecuterFilterInfo msrColumnExecutorInfo; protected SegmentProperties segmentProperties; + protected boolean isDimensionPresentInCurrentBlock = false; + protected boolean isMeasurePresentInCurrentBlock = false; /** * is dimension column data is natural sorted */ - private boolean isNaturalSorted; + private boolean isNaturalSorted = false; + public ExcludeFilterExecuterImpl(DimColumnResolvedFilterInfo dimColEvaluatorInfo, - SegmentProperties segmentProperties) { - this.dimColEvaluatorInfo = dimColEvaluatorInfo; - dimColumnExecuterInfo = new DimColumnExecuterFilterInfo(); + MeasureColumnResolvedFilterInfo msrColumnEvaluatorInfo, SegmentProperties segmentProperties, + boolean isMeasure) { this.segmentProperties = segmentProperties; - FilterUtil.prepareKeysFromSurrogates(dimColEvaluatorInfo.getFilterValues(), segmentProperties, - dimColEvaluatorInfo.getDimension(), dimColumnExecuterInfo); - isNaturalSorted = dimColEvaluatorInfo.getDimension().isUseInvertedIndex() && dimColEvaluatorInfo - .getDimension().isSortColumn(); + if (isMeasure == false) { --- End diff -- Done --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user sounakr commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1079#discussion_r125199168 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ExcludeFilterExecuterImpl.java --- @@ -18,56 +18,152 @@ import java.io.IOException; import java.util.BitSet; +import java.util.Comparator; import org.apache.carbondata.core.datastore.block.SegmentProperties; import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk; +import org.apache.carbondata.core.datastore.chunk.MeasureColumnDataChunk; import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk; +import org.apache.carbondata.core.datastore.chunk.impl.MeasureRawColumnChunk; +import org.apache.carbondata.core.metadata.datatype.DataType; import org.apache.carbondata.core.scan.filter.FilterUtil; +import org.apache.carbondata.core.scan.filter.partition.PartitionFilterUtil; import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo; +import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.MeasureColumnResolvedFilterInfo; import org.apache.carbondata.core.scan.processor.BlocksChunkHolder; import org.apache.carbondata.core.util.BitSetGroup; import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.DataTypeUtil; public class ExcludeFilterExecuterImpl implements FilterExecuter { protected DimColumnResolvedFilterInfo dimColEvaluatorInfo; protected DimColumnExecuterFilterInfo dimColumnExecuterInfo; + protected MeasureColumnResolvedFilterInfo msrColumnEvaluatorInfo; + protected MeasureColumnExecuterFilterInfo msrColumnExecutorInfo; protected SegmentProperties segmentProperties; + protected boolean isDimensionPresentInCurrentBlock = false; + protected boolean isMeasurePresentInCurrentBlock = false; --- End diff -- Done, removed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user sounakr commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1079#discussion_r125199254 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ExcludeFilterExecuterImpl.java --- @@ -18,56 +18,152 @@ import java.io.IOException; import java.util.BitSet; +import java.util.Comparator; import org.apache.carbondata.core.datastore.block.SegmentProperties; import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk; +import org.apache.carbondata.core.datastore.chunk.MeasureColumnDataChunk; import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk; +import org.apache.carbondata.core.datastore.chunk.impl.MeasureRawColumnChunk; +import org.apache.carbondata.core.metadata.datatype.DataType; import org.apache.carbondata.core.scan.filter.FilterUtil; +import org.apache.carbondata.core.scan.filter.partition.PartitionFilterUtil; import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo; +import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.MeasureColumnResolvedFilterInfo; import org.apache.carbondata.core.scan.processor.BlocksChunkHolder; import org.apache.carbondata.core.util.BitSetGroup; import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.DataTypeUtil; public class ExcludeFilterExecuterImpl implements FilterExecuter { protected DimColumnResolvedFilterInfo dimColEvaluatorInfo; protected DimColumnExecuterFilterInfo dimColumnExecuterInfo; + protected MeasureColumnResolvedFilterInfo msrColumnEvaluatorInfo; + protected MeasureColumnExecuterFilterInfo msrColumnExecutorInfo; protected SegmentProperties segmentProperties; + protected boolean isDimensionPresentInCurrentBlock = false; + protected boolean isMeasurePresentInCurrentBlock = false; /** * is dimension column data is natural sorted */ - private boolean isNaturalSorted; + private boolean isNaturalSorted = false; + public ExcludeFilterExecuterImpl(DimColumnResolvedFilterInfo dimColEvaluatorInfo, - SegmentProperties segmentProperties) { - this.dimColEvaluatorInfo = dimColEvaluatorInfo; - dimColumnExecuterInfo = new DimColumnExecuterFilterInfo(); + MeasureColumnResolvedFilterInfo msrColumnEvaluatorInfo, SegmentProperties segmentProperties, + boolean isMeasure) { this.segmentProperties = segmentProperties; - FilterUtil.prepareKeysFromSurrogates(dimColEvaluatorInfo.getFilterValues(), segmentProperties, - dimColEvaluatorInfo.getDimension(), dimColumnExecuterInfo); - isNaturalSorted = dimColEvaluatorInfo.getDimension().isUseInvertedIndex() && dimColEvaluatorInfo - .getDimension().isSortColumn(); + if (isMeasure == false) { + this.dimColEvaluatorInfo = dimColEvaluatorInfo; + dimColumnExecuterInfo = new DimColumnExecuterFilterInfo(); + + FilterUtil.prepareKeysFromSurrogates(dimColEvaluatorInfo.getFilterValues(), segmentProperties, + dimColEvaluatorInfo.getDimension(), dimColumnExecuterInfo, null, null); + isDimensionPresentInCurrentBlock = true; + isNaturalSorted = + dimColEvaluatorInfo.getDimension().isUseInvertedIndex() && dimColEvaluatorInfo + .getDimension().isSortColumn(); + } else { + this.msrColumnEvaluatorInfo = msrColumnEvaluatorInfo; + msrColumnExecutorInfo = new MeasureColumnExecuterFilterInfo(); + FilterUtil + .prepareKeysFromSurrogates(msrColumnEvaluatorInfo.getFilterValues(), segmentProperties, + null, null, msrColumnEvaluatorInfo.getMeasure(), msrColumnExecutorInfo); + isMeasurePresentInCurrentBlock = true; + } + } @Override public BitSetGroup applyFilter(BlocksChunkHolder blockChunkHolder) throws IOException { - int blockIndex = segmentProperties.getDimensionOrdinalToBlockMapping() - .get(dimColEvaluatorInfo.getColumnIndex()); - if (null == blockChunkHolder.getDimensionRawDataChunk()[blockIndex]) { - blockChunkHolder.getDimensionRawDataChunk()[blockIndex] = blockChunkHolder.getDataBlock() - .getDimensionChunk(blockChunkHolder.getFileReader(), blockIndex); + if (isDimensionPresentInCurrentBlock == true) { + int blockIndex = segmentProperties.getDimensionOrdinalToBlockMapping() + .get(dimColEvaluatorInfo.getColumnIndex()); + if (null == blockChunkHolder.getDimensionRawDataChunk()[blockIndex]) { + blockChunkHolder.getDimensionRawDataChunk()[blockIndex] = blockChunkHolder.getDataBlock() + .getDimensionChunk(blockChunkHolder.getFileReader(), blockIndex); + } + DimensionRawColumnChunk dimensionRawColumnChunk = + blockChunkHolder.getDimensionRawDataChunk()[blockIndex]; + DimensionColumnDataChunk[] dimensionColumnDataChunks = + dimensionRawColumnChunk.convertToDimColDataChunks(); + BitSetGroup bitSetGroup = new BitSetGroup(dimensionRawColumnChunk.getPagesCount()); + for (int i = 0; i < dimensionColumnDataChunks.length; i++) { + BitSet bitSet = getFilteredIndexes(dimensionColumnDataChunks[i], + dimensionRawColumnChunk.getRowCount()[i]); + bitSetGroup.setBitSet(bitSet, i); + } + + return bitSetGroup; + } else if (isMeasurePresentInCurrentBlock == true) { + int blockIndex = segmentProperties.getMeasuresOrdinalToBlockMapping() + .get(msrColumnEvaluatorInfo.getColumnIndex()); + if (null == blockChunkHolder.getMeasureRawDataChunk()[blockIndex]) { + blockChunkHolder.getMeasureRawDataChunk()[blockIndex] = blockChunkHolder.getDataBlock() + .getMeasureChunk(blockChunkHolder.getFileReader(), blockIndex); + } + MeasureRawColumnChunk measureRawColumnChunk = + blockChunkHolder.getMeasureRawDataChunk()[blockIndex]; + MeasureColumnDataChunk[] measureColumnDataChunks = + measureRawColumnChunk.convertToMeasureColDataChunks(); + BitSetGroup bitSetGroup = new BitSetGroup(measureRawColumnChunk.getPagesCount()); + DataType msrType = getMeasureDataType(msrColumnEvaluatorInfo); + for (int i = 0; i < measureColumnDataChunks.length; i++) { + BitSet bitSet = + getFilteredIndexes(measureColumnDataChunks[i], measureRawColumnChunk.getRowCount()[i], + msrType); + bitSetGroup.setBitSet(bitSet, i); + } + return bitSetGroup; } - DimensionRawColumnChunk dimensionRawColumnChunk = - blockChunkHolder.getDimensionRawDataChunk()[blockIndex]; - DimensionColumnDataChunk[] dimensionColumnDataChunks = - dimensionRawColumnChunk.convertToDimColDataChunks(); - BitSetGroup bitSetGroup = - new BitSetGroup(dimensionRawColumnChunk.getPagesCount()); - for (int i = 0; i < dimensionColumnDataChunks.length; i++) { - BitSet bitSet = getFilteredIndexes(dimensionColumnDataChunks[i], - dimensionRawColumnChunk.getRowCount()[i]); - bitSetGroup.setBitSet(bitSet, i); + return null; + } + + private DataType getMeasureDataType(MeasureColumnResolvedFilterInfo msrColumnEvaluatorInfo) { + switch (msrColumnEvaluatorInfo.getType()) { + case SHORT: + return DataType.SHORT; + case INT: + return DataType.INT; + case LONG: + return DataType.LONG; + case DECIMAL: + return DataType.DECIMAL; + default: + return DataType.DOUBLE; } + } - return bitSetGroup; + protected BitSet getFilteredIndexes(MeasureColumnDataChunk measureColumnDataChunk, + int numerOfRows, DataType msrType) { + // Here the algorithm is + // Get the measure values from the chunk. compare sequentially with the + // the filter values. The one that matches sets it Bitset. + BitSet bitSet = new BitSet(numerOfRows); + bitSet.flip(0, numerOfRows); + byte[][] filterValues = msrColumnExecutorInfo.getFilterKeys(); --- End diff -- As of now using ByteArray in filterKeys, in later optimization will convert will store objects. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
In reply to this post by qiuchenjian-2
Github user sounakr commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1079#discussion_r125199276 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImpl.java --- @@ -17,65 +17,174 @@ package org.apache.carbondata.core.scan.filter.executer; import java.io.IOException; +import java.math.BigDecimal; +import java.nio.ByteBuffer; import java.util.BitSet; +import java.util.Comparator; import org.apache.carbondata.core.datastore.block.SegmentProperties; import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk; +import org.apache.carbondata.core.datastore.chunk.MeasureColumnDataChunk; import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk; +import org.apache.carbondata.core.datastore.chunk.impl.MeasureRawColumnChunk; +import org.apache.carbondata.core.metadata.datatype.DataType; import org.apache.carbondata.core.scan.filter.FilterUtil; +import org.apache.carbondata.core.scan.filter.partition.PartitionFilterUtil; import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo; +import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.MeasureColumnResolvedFilterInfo; import org.apache.carbondata.core.scan.processor.BlocksChunkHolder; import org.apache.carbondata.core.util.BitSetGroup; import org.apache.carbondata.core.util.ByteUtil; import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.DataTypeUtil; public class IncludeFilterExecuterImpl implements FilterExecuter { protected DimColumnResolvedFilterInfo dimColumnEvaluatorInfo; protected DimColumnExecuterFilterInfo dimColumnExecuterInfo; + protected MeasureColumnResolvedFilterInfo msrColumnEvaluatorInfo; + protected MeasureColumnExecuterFilterInfo msrColumnExecutorInfo; protected SegmentProperties segmentProperties; + protected boolean isDimensionPresentInCurrentBlock = false; + protected boolean isMeasurePresentInCurrentBlock = false; /** * is dimension column data is natural sorted */ - private boolean isNaturalSorted; + private boolean isNaturalSorted = false; --- End diff -- Done. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at [hidden email] or file a JIRA ticket with INFRA. --- |
Free forum by Nabble | Edit this page |