Github user Indhumathi27 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2396#discussion_r197644958 --- Diff: core/src/main/java/org/apache/carbondata/core/metadata/schema/table/column/CarbonDimension.java --- @@ -51,6 +51,16 @@ */ private int complexTypeOrdinal; + /** + * to store the Ordinal of the Complex Parent Column. + */ + private int parentOrdinal = -1; --- End diff -- Removed --- |
In reply to this post by qiuchenjian-2
Github user Indhumathi27 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2396#discussion_r197645070 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/collector/impl/DictionaryBasedResultCollector.java --- @@ -67,17 +69,38 @@ int noDictionaryColumnIndex; int complexTypeColumnIndex; + int noDictionaryComplexColumnIndex = 0; --- End diff -- Cannot be moved as it is used at class level --- |
In reply to this post by qiuchenjian-2
Github user Indhumathi27 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2396#discussion_r197645110 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/complextypes/ArrayQueryType.java --- @@ -97,4 +99,56 @@ public void parseBlocksAndReturnComplexColumnByteArray(DimensionRawColumnChunk[] return DataTypeUtil.getDataTypeConverter().wrapWithGenericArrayData(data); } + @Override public Object getDataBasedOnColumn(ByteBuffer dataBuffer, CarbonDimension parent, --- End diff -- Okay.done --- |
In reply to this post by qiuchenjian-2
Github user Indhumathi27 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2396#discussion_r197645133 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/model/QueryModelBuilder.java --- @@ -54,18 +58,134 @@ public QueryModelBuilder projectColumns(String[] projectionColumns) { } else { CarbonMeasure measure = table.getMeasureByName(factTableName, projectionColumnName); if (measure == null) { - throw new RuntimeException(projectionColumnName + - " column not found in the table " + factTableName); + throw new RuntimeException( + projectionColumnName + " column not found in the table " + factTableName); } projection.addMeasure(measure, i); i++; } } - + optimizeProjectionForComplexColumns(projection); this.projection = projection; return this; } + private void optimizeProjectionForComplexColumns(QueryProjection projection) { + // Get the List of Complex Column Projection. + // The optimization techniques which can be applied are + // A. Merging in Driver Side + // B. Merging in the result Collector side. + // Merging is driver side cases are + // Driver merging will eliminate one of the CarbonDimension. + // Executor merging will merge the column output in Result Collector. + // In this routine we are going to do driver merging and leave executor merging. + Map<Integer, List<Integer>> complexColumnMap = new HashMap<>(); + List<ProjectionDimension> carbonDimensions = projection.getDimensions(); + for (ProjectionDimension cols : carbonDimensions) { + // get all the Projections with Parent Ordinal Set. + if (cols.getDimension().getParentOrdinal() != -1) { + if (complexColumnMap.get(cols.getDimension().getParentOrdinal()) != null) { + List<Integer> childColumns = complexColumnMap.get(cols.getDimension().getParentOrdinal()); + childColumns.add(cols.getDimension().getOrdinal()); + complexColumnMap.put(cols.getDimension().getParentOrdinal(), childColumns); + } else { + List<Integer> childColumns = new ArrayList<>(); + childColumns.add(cols.getDimension().getOrdinal()); + complexColumnMap.put(cols.getDimension().getParentOrdinal(), childColumns); + } + } + } + + // Traverse the Map to Find any columns are parent. + for (Map.Entry<Integer, List<Integer>> entry : complexColumnMap.entrySet()) { + List<Integer> childOrdinals = entry.getValue(); + if (childOrdinals.size() > 1) { + // In case of more that one child, have to check if the child columns are in the same path + // and have a common parent. + Collections.sort(childOrdinals); + List<Integer> mergedOrdinals = mergeChildColumns(childOrdinals, entry.getKey()); + if (mergedOrdinals.size() > 0) { + projection = removeDimension(projection, mergedOrdinals); + } + } + } + } + + private QueryProjection removeDimension(QueryProjection projection, + List<Integer> mergedOrdinals) { + List<ProjectionDimension> carbonDimensions = projection.getDimensions(); + QueryProjection outputProjection = new QueryProjection(); + int i = 0; + for (ProjectionDimension cols : carbonDimensions) { + if (!mergedOrdinals.contains(cols.getDimension().getOrdinal())) { + outputProjection.addDimension(cols.getDimension(), i++); + } + } + List<ProjectionMeasure> carbonMeasures = projection.getMeasures(); + for (ProjectionMeasure cols : carbonMeasures) { + outputProjection.addMeasure(cols.getMeasure(), i++); + } + return outputProjection; + } + + private List<Integer> mergeChildColumns(List<Integer> childOrdinals, Integer key) { --- End diff -- removed --- |
In reply to this post by qiuchenjian-2
Github user Indhumathi27 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2396#discussion_r197645136 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/executor/util/RestructureUtil.java --- @@ -156,8 +162,70 @@ private static boolean isColumnMatches(boolean isTransactionalTable, // If it is non transactional table just check the column names, no need to validate // column id as multiple sdk's output placed in a single folder doesn't have same // column ID but can have same column name - return (tableColumn.getColumnId().equals(queryColumn.getColumnId()) || - (!isTransactionalTable && tableColumn.getColName().equals(queryColumn.getColName()))); + if (tableColumn.getDataType().isComplexType() && !(tableColumn.getDataType().getId() + == DataTypes.ARRAY_TYPE_ID)) { + if (tableColumn.getColumnId().equals(queryColumn.getColumnId())) { + return true; + } else { + return isColumnMatchesStruct(tableColumn, queryColumn); + } + } else { + return (tableColumn.getColumnId().equals(queryColumn.getColumnId()) || (!isTransactionalTable + && tableColumn.getColName().equals(queryColumn.getColName()))); + } + } + + /** + * In case of Multilevel Complex column - STRUCT/STRUCTofSTRUCT, traverse all the child dimension + * to check column Id + * + * @param tableColumn + * @param queryColumn + * @return + */ + private static boolean isColumnMatchesStruct(CarbonColumn tableColumn, CarbonColumn queryColumn) { + if (tableColumn instanceof CarbonDimension) { + CarbonDimension dimension = (CarbonDimension) tableColumn; + if (dimension.getColName().equalsIgnoreCase(queryColumn.getColName().split("\\.")[0])) { + if (dimension.getListOfChildDimensions() != null) { + CarbonColumn columnMatchesStructlevel = + isColumnMatchesStructlevel(tableColumn, dimension.getListOfChildDimensions(), + queryColumn); + if (null != columnMatchesStructlevel) { + return true; + } else { + return false; + } + } + } else { + return (tableColumn.getColumnId().equals(queryColumn.getColumnId())); + } + } + return false; + } + + private static CarbonColumn isColumnMatchesStructlevel(CarbonColumn tableColumn, --- End diff -- changed --- |
In reply to this post by qiuchenjian-2
Github user Indhumathi27 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2396#discussion_r197645152 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/complextypes/StructQueryType.java --- @@ -109,4 +111,53 @@ public StructQueryType(String name, String parentname, int blockIndex) { } return DataTypeUtil.getDataTypeConverter().wrapWithGenericRow(fields); } + + @Override public Object getDataBasedOnColumn(ByteBuffer dataBuffer, CarbonDimension parent, + CarbonDimension child) { + int childLength; + if (parent.getOrdinal() < child.getOrdinal()) { + childLength = parent.getNumberOfChild(); + Object[] fields = new Object[childLength]; + for (int i = 0; i < childLength; i++) { + fields[i] = children.get(i) + .getDataBasedOnColumn(dataBuffer, parent.getListOfChildDimensions().get(i), child); + } + return DataTypeUtil.getDataTypeConverter().wrapWithGenericRow(fields); + } else if (parent.getOrdinal() > child.getOrdinal()) { + return null; + } + else { --- End diff -- done --- |
In reply to this post by qiuchenjian-2
Github user Indhumathi27 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2396#discussion_r197645154 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/complextypes/PrimitiveQueryType.java --- @@ -146,4 +148,58 @@ public PrimitiveQueryType(String name, String parentname, int blockIndex, return actualData; } + @Override public Object getDataBasedOnColumn(ByteBuffer dataBuffer, CarbonDimension parent, + CarbonDimension child) { + Object actualData = null; + + if (parent.getOrdinal() != child.getOrdinal() || null == dataBuffer) { + return null; + } + + if (isDirectDictionary) { --- End diff -- removed and changed --- |
In reply to this post by qiuchenjian-2
Github user Indhumathi27 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2396#discussion_r197645177 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/collector/impl/DictionaryBasedResultCollector.java --- @@ -136,28 +189,81 @@ void fillDimensionData(BlockletScannedResult scannedResult, int[] surrogateResul } } else if (complexDataTypeArray[i]) { // Complex Type With No Dictionary Encoding. - row[order[i]] = comlexDimensionInfoMap.get(queryDimensions[i].getDimension().getOrdinal()) - .getDataBasedOnDataType(ByteBuffer.wrap(complexTypeKeyArray[complexTypeColumnIndex++])); + if (queryDimensions[i].getDimension().getParentOrdinal() != -1) { + if (mergedComplexDimensionColumns + .get(queryDimensions[i].getDimension().getParentOrdinal()).size() > 1) { + fillRowForComplexColumn(complexDimensionInfoMap, row, i); + } else { + row[order[i]] = + complexDimensionInfoMap.get(queryDimensions[i].getDimension().getParentOrdinal()) + .getDataBasedOnColumn( + ByteBuffer.wrap(complexTypeKeyArray[complexTypeColumnIndex++]), + queryDimensions[i].getDimension().getComplexParentDimension(), + queryDimensions[i].getDimension()); + } + } else { + row[order[i]] = + complexDimensionInfoMap.get(queryDimensions[i].getDimension().getOrdinal()) + .getDataBasedOnDataType( + ByteBuffer.wrap(complexTypeKeyArray[complexTypeColumnIndex++])); + } } else { - row[order[i]] = DataTypeUtil.getDataBasedOnDataTypeForNoDictionaryColumn( - noDictionaryKeys[noDictionaryColumnIndex++], - queryDimensions[i].getDimension().getDataType()); + if (queryDimensions[i].getDimension().getParentOrdinal() != -1) { --- End diff -- done --- |
In reply to this post by qiuchenjian-2
Github user Indhumathi27 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2396#discussion_r197645240 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/collector/impl/DictionaryBasedResultCollector.java --- @@ -136,28 +189,81 @@ void fillDimensionData(BlockletScannedResult scannedResult, int[] surrogateResul } } else if (complexDataTypeArray[i]) { // Complex Type With No Dictionary Encoding. - row[order[i]] = comlexDimensionInfoMap.get(queryDimensions[i].getDimension().getOrdinal()) - .getDataBasedOnDataType(ByteBuffer.wrap(complexTypeKeyArray[complexTypeColumnIndex++])); + if (queryDimensions[i].getDimension().getParentOrdinal() != -1) { --- End diff -- done --- |
In reply to this post by qiuchenjian-2
Github user Indhumathi27 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2396#discussion_r197645259 --- Diff: core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java --- @@ -417,6 +417,44 @@ public static Object getDataDataTypeForNoDictionaryColumn(String dimensionValue, } } + /** + * Returns true for fixed length DataTypes. + * @param dataType + * @return + */ + public static boolean isFixedSizeDataType(DataType dataType) { + if (dataType == DataTypes.STRING || DataTypes.isDecimal(dataType)) { + return false; + } else { + return true; + } + } + /** + * get the size of fixed size DataType + * @param actualDataType + * @return + */ + public static int getSizeOfDataType(DataType actualDataType) { --- End diff -- changed --- |
In reply to this post by qiuchenjian-2
Github user Indhumathi27 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2396#discussion_r197645280 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/CarbonDatasourceHadoopRelation.scala --- @@ -67,14 +68,68 @@ case class CarbonDatasourceHadoopRelation( override def schema: StructType = tableSchema.getOrElse(carbonRelation.schema) def buildScan(requiredColumns: Array[String], + projects: Seq[NamedExpression], filters: Array[Filter], partitions: Seq[PartitionSpec]): RDD[InternalRow] = { val filterExpression: Option[Expression] = filters.flatMap { filter => CarbonFilters.createCarbonFilter(schema, filter) }.reduceOption(new AndExpression(_, _)) + var parentColumns = new ListBuffer[String] + // In case of Struct or StructofStruct Complex type, get the project column for given + // parent/child field and pushdown the corresponding project column. In case of Array, + // ArrayofStruct or StructofArray, pushdown parent column + var reqColumns = projects.map { + case a@Alias(s: GetStructField, name) => + val arrayTypeExists = s.childSchema.map(x => x.dataType) + .filter(dataType => dataType.isInstanceOf[ArrayType]) + if (0 == arrayTypeExists.length) { + val columnName = s.toString().replaceAll("#[0-9]*", "") + parentColumns += columnName.split("\\.")(0) + columnName + } + else { + None + } + case a@Alias(s: GetArrayItem, name) => + None + case other => other.name.replaceAll("#[0-9]*", "") + } + + var reqCols = reqColumns.filterNot(none => none.equals(None)).map(col => col.toString) + parentColumns = parentColumns.distinct + reqCols = reqCols.distinct + + // if the parent column is there in the projection list then we can filter out all the children + // in that projection list + val parentColumnOnProjectionList = reqCols.filter(col => parentColumns.contains(col)) --- End diff -- removed --- |
In reply to this post by qiuchenjian-2
Github user Indhumathi27 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2396#discussion_r197645303 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/complextypes/ArrayQueryType.java --- @@ -97,4 +99,56 @@ public void parseBlocksAndReturnComplexColumnByteArray(DimensionRawColumnChunk[] return DataTypeUtil.getDataTypeConverter().wrapWithGenericArrayData(data); } + @Override public Object getDataBasedOnColumn(ByteBuffer dataBuffer, CarbonDimension parent, + CarbonDimension child) { + int dataLength; + if (parent.getOrdinal() < child.getOrdinal()) { + dataLength = parent.getNumberOfChild(); + + if (dataLength == -1) { + return null; + } + Object[] data = new Object[dataLength]; + for (int i = 0; i < dataLength; i++) { + data[i] = children + .getDataBasedOnColumn(dataBuffer, parent.getListOfChildDimensions().get(i), child); + } + return DataTypeUtil.getDataTypeConverter().wrapWithGenericArrayData(data); + } else if (parent.getOrdinal() > child.getOrdinal()) { + return null; + } else { + // dataLength = dataBuffer.getInt(); + return DataTypeUtil.getDataTypeConverter() + .wrapWithGenericArrayData(getDataBasedOnDataType(dataBuffer)); + } + } + + @Override public Object getDataBasedOnColumnList(Map<CarbonDimension, ByteBuffer> childBuffer, --- End diff -- changed --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2396 Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/6504/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2396 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/6506/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2396 Build Failed with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/5338/ --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on the issue:
https://github.com/apache/carbondata/pull/2396 SDV Build Success , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/5426/ --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on the issue:
https://github.com/apache/carbondata/pull/2396 SDV Build Success , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/5427/ --- |
In reply to this post by qiuchenjian-2
Github user Indhumathi27 commented on the issue:
https://github.com/apache/carbondata/pull/2396 retest this please --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2396 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/6510/ --- |
In reply to this post by qiuchenjian-2
Github user Indhumathi27 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2396#discussion_r197678273 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/collector/impl/DictionaryBasedResultCollector.java --- @@ -19,6 +19,7 @@ import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashMap; --- End diff -- VectorCollector is not required for now. It will be handled in next Complex data type enhancement PR --- |
Free forum by Nabble | Edit this page |