Github user ravipesala commented on the issue:
https://github.com/apache/carbondata/pull/1672 SDV Build Success , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/2400/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/1672 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/2119/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/1672 Build Success with Spark 2.2.0, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/894/ --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on the issue:
https://github.com/apache/carbondata/pull/1672 SDV Build Success , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/2404/ --- |
In reply to this post by qiuchenjian-2
|
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1672#discussion_r157702204 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonLateDecodeStrategy.scala --- @@ -146,27 +184,13 @@ private[sql] class CarbonLateDecodeStrategy extends SparkStrategy { } } - protected def pruneFilterProject( - relation: LogicalRelation, - projects: Seq[NamedExpression], - filterPredicates: Seq[Expression], - scanBuilder: (Seq[Attribute], Array[Filter], - ArrayBuffer[AttributeReference]) => RDD[InternalRow]) = { - pruneFilterProjectRaw( - relation, - projects, - filterPredicates, - (requestedColumns, _, pushedFilters, a) => { - scanBuilder(requestedColumns, pushedFilters.toArray, a) - }) - } - protected def pruneFilterProjectRaw( relation: LogicalRelation, rawProjects: Seq[NamedExpression], filterPredicates: Seq[Expression], + partitions: Seq[String], scanBuilder: (Seq[Attribute], Seq[Expression], Seq[Filter], - ArrayBuffer[AttributeReference]) => RDD[InternalRow]) = { + ArrayBuffer[AttributeReference], Seq[String]) => RDD[InternalRow]) = { --- End diff -- identation is not correct --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1672#discussion_r157702403 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonLateDecodeStrategy.scala --- @@ -130,6 +130,44 @@ private[sql] class CarbonLateDecodeStrategy extends SparkStrategy { table.carbonTable.getTableInfo.serialize()) } + /** + * Converts to physical RDD of carbon after pushing down applicable filters. + * @param relation + * @param projects + * @param filterPredicates + * @param scanBuilder + * @return + */ + private def pruneFilterProject( + relation: LogicalRelation, + projects: Seq[NamedExpression], + filterPredicates: Seq[Expression], + scanBuilder: (Seq[Attribute], Array[Filter], + ArrayBuffer[AttributeReference], Seq[String]) => RDD[InternalRow]) = { --- End diff -- incorrect identation --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1672#discussion_r157703591 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonLateDecodeStrategy.scala --- @@ -130,6 +130,44 @@ private[sql] class CarbonLateDecodeStrategy extends SparkStrategy { table.carbonTable.getTableInfo.serialize()) } + /** + * Converts to physical RDD of carbon after pushing down applicable filters. + * @param relation + * @param projects + * @param filterPredicates + * @param scanBuilder --- End diff -- can you add comment for these parameter --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1672#discussion_r157704454 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonLateDecodeStrategy.scala --- @@ -130,6 +130,44 @@ private[sql] class CarbonLateDecodeStrategy extends SparkStrategy { table.carbonTable.getTableInfo.serialize()) } + /** + * Converts to physical RDD of carbon after pushing down applicable filters. + * @param relation + * @param projects + * @param filterPredicates + * @param scanBuilder + * @return + */ + private def pruneFilterProject( + relation: LogicalRelation, + projects: Seq[NamedExpression], + filterPredicates: Seq[Expression], + scanBuilder: (Seq[Attribute], Array[Filter], + ArrayBuffer[AttributeReference], Seq[String]) => RDD[InternalRow]) = { + val names = relation.catalogTable.get.partitionColumnNames + // Get the current partitions from table. + var partitions: Seq[String] = null + if (names.nonEmpty) { + val partitionSet = AttributeSet(names + .map(p => relation.output.find(_.name.equalsIgnoreCase(p)).get)) --- End diff -- here the code style is not good --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1672#discussion_r157704631 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonLateDecodeStrategy.scala --- @@ -130,6 +130,44 @@ private[sql] class CarbonLateDecodeStrategy extends SparkStrategy { table.carbonTable.getTableInfo.serialize()) } + /** + * Converts to physical RDD of carbon after pushing down applicable filters. + * @param relation + * @param projects + * @param filterPredicates + * @param scanBuilder + * @return + */ + private def pruneFilterProject( + relation: LogicalRelation, + projects: Seq[NamedExpression], + filterPredicates: Seq[Expression], + scanBuilder: (Seq[Attribute], Array[Filter], + ArrayBuffer[AttributeReference], Seq[String]) => RDD[InternalRow]) = { + val names = relation.catalogTable.get.partitionColumnNames + // Get the current partitions from table. + var partitions: Seq[String] = null + if (names.nonEmpty) { + val partitionSet = AttributeSet(names + .map(p => relation.output.find(_.name.equalsIgnoreCase(p)).get)) + val partitionKeyFilters = + ExpressionSet(ExpressionSet(filterPredicates).filter(_.references.subsetOf(partitionSet))) + partitions = + CarbonFilters.getPartitions( --- End diff -- can you add comment for logic line 151 to 156, explain what it does --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1672#discussion_r157704888 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/optimizer/CarbonFilters.scala --- @@ -395,4 +391,14 @@ object CarbonFilters { case _ => expressions } } + + def getPartitions(partitionFilters: Seq[Expression], --- End diff -- Can you add comment for this function --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1672#discussion_r157704988 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/optimizer/CarbonFilters.scala --- @@ -395,4 +391,14 @@ object CarbonFilters { case _ => expressions } } + + def getPartitions(partitionFilters: Seq[Expression], --- End diff -- move `partitionFilters` to next line --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/1672 Build Success with Spark 2.2.0, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/898/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/1672 Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/2123/ --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on the issue:
https://github.com/apache/carbondata/pull/1672 SDV Build Success , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/2407/ --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1672#discussion_r157738771 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonLateDecodeStrategy.scala --- @@ -130,6 +130,44 @@ private[sql] class CarbonLateDecodeStrategy extends SparkStrategy { table.carbonTable.getTableInfo.serialize()) } + /** + * Converts to physical RDD of carbon after pushing down applicable filters. + * @param relation + * @param projects + * @param filterPredicates + * @param scanBuilder + * @return + */ + private def pruneFilterProject( + relation: LogicalRelation, + projects: Seq[NamedExpression], + filterPredicates: Seq[Expression], + scanBuilder: (Seq[Attribute], Array[Filter], + ArrayBuffer[AttributeReference], Seq[String]) => RDD[InternalRow]) = { --- End diff -- Yes, you are right. It is difficult to track the parameters meaning. Even I also find it difficult pass a new parameter partitions but this is old code added as part of spark2.0 support and may we can refactor later. --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1672#discussion_r157739117 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonLateDecodeStrategy.scala --- @@ -130,6 +130,44 @@ private[sql] class CarbonLateDecodeStrategy extends SparkStrategy { table.carbonTable.getTableInfo.serialize()) } + /** + * Converts to physical RDD of carbon after pushing down applicable filters. + * @param relation + * @param projects + * @param filterPredicates + * @param scanBuilder + * @return + */ + private def pruneFilterProject( + relation: LogicalRelation, + projects: Seq[NamedExpression], + filterPredicates: Seq[Expression], + scanBuilder: (Seq[Attribute], Array[Filter], + ArrayBuffer[AttributeReference], Seq[String]) => RDD[InternalRow]) = { + val names = relation.catalogTable.get.partitionColumnNames + // Get the current partitions from table. + var partitions: Seq[String] = null + if (names.nonEmpty) { + val partitionSet = AttributeSet(names + .map(p => relation.output.find(_.name.equalsIgnoreCase(p)).get)) --- End diff -- OK --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1672#discussion_r157739562 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonLateDecodeStrategy.scala --- @@ -130,6 +130,44 @@ private[sql] class CarbonLateDecodeStrategy extends SparkStrategy { table.carbonTable.getTableInfo.serialize()) } + /** + * Converts to physical RDD of carbon after pushing down applicable filters. + * @param relation + * @param projects + * @param filterPredicates + * @param scanBuilder + * @return + */ + private def pruneFilterProject( + relation: LogicalRelation, + projects: Seq[NamedExpression], + filterPredicates: Seq[Expression], + scanBuilder: (Seq[Attribute], Array[Filter], + ArrayBuffer[AttributeReference], Seq[String]) => RDD[InternalRow]) = { + val names = relation.catalogTable.get.partitionColumnNames + // Get the current partitions from table. + var partitions: Seq[String] = null + if (names.nonEmpty) { + val partitionSet = AttributeSet(names + .map(p => relation.output.find(_.name.equalsIgnoreCase(p)).get)) + val partitionKeyFilters = + ExpressionSet(ExpressionSet(filterPredicates).filter(_.references.subsetOf(partitionSet))) + partitions = + CarbonFilters.getPartitions( --- End diff -- ok --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1672#discussion_r157740002 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonLateDecodeStrategy.scala --- @@ -130,6 +130,44 @@ private[sql] class CarbonLateDecodeStrategy extends SparkStrategy { table.carbonTable.getTableInfo.serialize()) } + /** + * Converts to physical RDD of carbon after pushing down applicable filters. + * @param relation + * @param projects + * @param filterPredicates + * @param scanBuilder --- End diff -- ok --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/1672 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/2134/ --- |
Free forum by Nabble | Edit this page |