[ https://issues.apache.org/jira/browse/CARBONDATA-2576?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Prasanna Ravichandran updated CARBONDATA-2576: ---------------------------------------------- Description: MV is not working fine if there is more than 3 aggregate function in the same datamap. It is working fine upto 3 aggregate functions on the same MV. Test queries: scala> carbon.sql("create datamap datamap_comp_maxsumminavg using 'mv' as select empno,max(projectenddate),sum(salary),min(projectjoindate),avg(attendance) from originTable group by empno").show(200,false) ++ ++ ++ scala> carbon.sql("rebuild datamap datamap_comp_maxsumminavg").show(200,false) ++ ++ ++ scala> carbon.sql("explain select empno,max(projectenddate),sum(salary),min(projectjoindate),avg(attendance) from originTable group by empno").show(200,false) org.apache.spark.sql.AnalysisException: expression 'datamap_comp_maxsumminavg_table.`avg_attendance`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.;; Aggregate [origintable_empno#2925|#2925], [origintable_empno#2925 AS empno#3002, max(max_projectenddate#2926) AS max(projectenddate)#3003, sum(sum_salary#2927L) AS sum(salary)#3004L, min(min_projectjoindate#2928) AS min(projectjoindate)#3005, avg_attendance#2929 AS avg(attendance)#3006|#2925 AS empno#3002, max(max_projectenddate#2926) AS max(projectenddate)#3003, sum(sum_salary#2927L) AS sum(salary)#3004L, min(min_projectjoindate#2928) AS min(projectjoindate)#3005, avg_attendance#2929 AS avg(attendance)#3006] +- SubqueryAlias datamap_comp_maxsumminavg_table +- Relation[origintable_empno#2925,max_projectenddate#2926,sum_salary#2927L,min_projectjoindate#2928,avg_attendance#2929|#2925,max_projectenddate#2926,sum_salary#2927L,min_projectjoindate#2928,avg_attendance#2929] CarbonDatasourceHadoopRelation [ Database name :default, Table name :datamap_comp_maxsumminavg_table, Schema :Some(StructType(StructField(origintable_empno,IntegerType,true), StructField(max_projectenddate,TimestampType,true), StructField(sum_salary,LongType,true), StructField(min_projectjoindate,TimestampType,true), StructField(avg_attendance,DoubleType,true))) ] at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.failAnalysis(CheckAnalysis.scala:39) at org.apache.spark.sql.catalyst.analysis.Analyzer.failAnalysis(Analyzer.scala:91) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.org$apache$spark$sql$catalyst$analysis$CheckAnalysis$class$$anonfun$$checkValidAggregateExpression$1(CheckAnalysis.scala:247) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$org$apache$spark$sql$catalyst$analysis$CheckAnalysis$class$$anonfun$$checkValidAggregateExpression$1$5.apply(CheckAnalysis.scala:253) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$org$apache$spark$sql$catalyst$analysis$CheckAnalysis$class$$anonfun$$checkValidAggregateExpression$1$5.apply(CheckAnalysis.scala:253) at scala.collection.immutable.List.foreach(List.scala:381) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.org$apache$spark$sql$catalyst$analysis$CheckAnalysis$class$$anonfun$$checkValidAggregateExpression$1(CheckAnalysis.scala:253) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$9.apply(CheckAnalysis.scala:280) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$9.apply(CheckAnalysis.scala:280) at scala.collection.immutable.List.foreach(List.scala:381) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:280) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:78) at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:127) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.checkAnalysis(CheckAnalysis.scala:78) at org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:91) at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:52) at org.apache.spark.sql.CarbonSession.withProfiler(CarbonSession.scala:148) at org.apache.spark.sql.CarbonSession.sql(CarbonSession.scala:95) at org.apache.carbondata.mv.datamap.MVAnalyzerRule.apply(MVAnalyzerRule.scala:72) at org.apache.carbondata.mv.datamap.MVAnalyzerRule.apply(MVAnalyzerRule.scala:38) at org.apache.spark.sql.hive.CarbonAnalyzer.execute(CarbonAnalyzer.scala:46) at org.apache.spark.sql.hive.CarbonAnalyzer.execute(CarbonAnalyzer.scala:27) at org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:69) at org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:67) at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:50) at org.apache.spark.sql.execution.QueryExecution.withCachedData$lzycompute(QueryExecution.scala:73) at org.apache.spark.sql.execution.QueryExecution.withCachedData(QueryExecution.scala:72) at org.apache.spark.sql.execution.QueryExecution.optimizedPlan$lzycompute(QueryExecution.scala:78) at org.apache.spark.sql.execution.QueryExecution.optimizedPlan(QueryExecution.scala:78) at org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:84) at org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:80) at org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:89) at org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:89) at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:92) at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:92) at org.apache.spark.sql.execution.command.table.CarbonExplainCommand.collectProfiler(CarbonExplainCommand.scala:54) at org.apache.spark.sql.execution.command.table.CarbonExplainCommand.processMetadata(CarbonExplainCommand.scala:45) at org.apache.spark.sql.execution.command.MetadataCommand.run(package.scala:68) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:58) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:56) at org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:67) at org.apache.spark.sql.Dataset.<init>(Dataset.scala:183) at org.apache.spark.sql.CarbonSession$$anonfun$sql$1.apply(CarbonSession.scala:108) at org.apache.spark.sql.CarbonSession$$anonfun$sql$1.apply(CarbonSession.scala:97) at org.apache.spark.sql.CarbonSession.withProfiler(CarbonSession.scala:155) at org.apache.spark.sql.CarbonSession.sql(CarbonSession.scala:95) ... 48 elided was: MV is not working fine if there is more than 3 aggregate function in the same datamap. Test queries: scala> carbon.sql("create datamap datamap_comp_maxsumminavg using 'mv' as select empno,max(projectenddate),sum(salary),min(projectjoindate),avg(attendance) from originTable group by empno").show(200,false) ++ || ++ ++ scala> carbon.sql("rebuild datamap datamap_comp_maxsumminavg").show(200,false) ++ || ++ ++ scala> carbon.sql("explain select empno,max(projectenddate),sum(salary),min(projectjoindate),avg(attendance) from originTable group by empno").show(200,false) org.apache.spark.sql.AnalysisException: expression 'datamap_comp_maxsumminavg_table.`avg_attendance`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.;; Aggregate [origintable_empno#2925], [origintable_empno#2925 AS empno#3002, max(max_projectenddate#2926) AS max(projectenddate)#3003, sum(sum_salary#2927L) AS sum(salary)#3004L, min(min_projectjoindate#2928) AS min(projectjoindate)#3005, avg_attendance#2929 AS avg(attendance)#3006] +- SubqueryAlias datamap_comp_maxsumminavg_table +- Relation[origintable_empno#2925,max_projectenddate#2926,sum_salary#2927L,min_projectjoindate#2928,avg_attendance#2929] CarbonDatasourceHadoopRelation [ Database name :default, Table name :datamap_comp_maxsumminavg_table, Schema :Some(StructType(StructField(origintable_empno,IntegerType,true), StructField(max_projectenddate,TimestampType,true), StructField(sum_salary,LongType,true), StructField(min_projectjoindate,TimestampType,true), StructField(avg_attendance,DoubleType,true))) ] at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.failAnalysis(CheckAnalysis.scala:39) at org.apache.spark.sql.catalyst.analysis.Analyzer.failAnalysis(Analyzer.scala:91) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.org$apache$spark$sql$catalyst$analysis$CheckAnalysis$class$$anonfun$$checkValidAggregateExpression$1(CheckAnalysis.scala:247) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$org$apache$spark$sql$catalyst$analysis$CheckAnalysis$class$$anonfun$$checkValidAggregateExpression$1$5.apply(CheckAnalysis.scala:253) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$org$apache$spark$sql$catalyst$analysis$CheckAnalysis$class$$anonfun$$checkValidAggregateExpression$1$5.apply(CheckAnalysis.scala:253) at scala.collection.immutable.List.foreach(List.scala:381) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.org$apache$spark$sql$catalyst$analysis$CheckAnalysis$class$$anonfun$$checkValidAggregateExpression$1(CheckAnalysis.scala:253) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$9.apply(CheckAnalysis.scala:280) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$9.apply(CheckAnalysis.scala:280) at scala.collection.immutable.List.foreach(List.scala:381) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:280) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:78) at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:127) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.checkAnalysis(CheckAnalysis.scala:78) at org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:91) at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:52) at org.apache.spark.sql.CarbonSession.withProfiler(CarbonSession.scala:148) at org.apache.spark.sql.CarbonSession.sql(CarbonSession.scala:95) at org.apache.carbondata.mv.datamap.MVAnalyzerRule.apply(MVAnalyzerRule.scala:72) at org.apache.carbondata.mv.datamap.MVAnalyzerRule.apply(MVAnalyzerRule.scala:38) at org.apache.spark.sql.hive.CarbonAnalyzer.execute(CarbonAnalyzer.scala:46) at org.apache.spark.sql.hive.CarbonAnalyzer.execute(CarbonAnalyzer.scala:27) at org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:69) at org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:67) at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:50) at org.apache.spark.sql.execution.QueryExecution.withCachedData$lzycompute(QueryExecution.scala:73) at org.apache.spark.sql.execution.QueryExecution.withCachedData(QueryExecution.scala:72) at org.apache.spark.sql.execution.QueryExecution.optimizedPlan$lzycompute(QueryExecution.scala:78) at org.apache.spark.sql.execution.QueryExecution.optimizedPlan(QueryExecution.scala:78) at org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:84) at org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:80) at org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:89) at org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:89) at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:92) at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:92) at org.apache.spark.sql.execution.command.table.CarbonExplainCommand.collectProfiler(CarbonExplainCommand.scala:54) at org.apache.spark.sql.execution.command.table.CarbonExplainCommand.processMetadata(CarbonExplainCommand.scala:45) at org.apache.spark.sql.execution.command.MetadataCommand.run(package.scala:68) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:58) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:56) at org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:67) at org.apache.spark.sql.Dataset.<init>(Dataset.scala:183) at org.apache.spark.sql.CarbonSession$$anonfun$sql$1.apply(CarbonSession.scala:108) at org.apache.spark.sql.CarbonSession$$anonfun$sql$1.apply(CarbonSession.scala:97) at org.apache.spark.sql.CarbonSession.withProfiler(CarbonSession.scala:155) at org.apache.spark.sql.CarbonSession.sql(CarbonSession.scala:95) ... 48 elided > MV Datamap - MV is not working fine if there is more than 3 aggregate function in the same datamap. > --------------------------------------------------------------------------------------------------- > > Key: CARBONDATA-2576 > URL: https://issues.apache.org/jira/browse/CARBONDATA-2576 > Project: CarbonData > Issue Type: Bug > Components: data-query > Reporter: Prasanna Ravichandran > Priority: Minor > Labels: CARBONDATA., MV, Materialistic_Views > Attachments: From 4th aggregate function -error shown.docx, data.csv > > > MV is not working fine if there is more than 3 aggregate function in the same datamap. It is working fine upto 3 aggregate functions on the same MV. > Test queries: > > scala> carbon.sql("create datamap datamap_comp_maxsumminavg using 'mv' as select empno,max(projectenddate),sum(salary),min(projectjoindate),avg(attendance) from originTable group by empno").show(200,false) > ++ > > ++ > ++ > > > scala> carbon.sql("rebuild datamap datamap_comp_maxsumminavg").show(200,false) > ++ > > ++ > ++ > > > scala> carbon.sql("explain select empno,max(projectenddate),sum(salary),min(projectjoindate),avg(attendance) from originTable group by empno").show(200,false) > org.apache.spark.sql.AnalysisException: expression 'datamap_comp_maxsumminavg_table.`avg_attendance`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.;; > Aggregate [origintable_empno#2925|#2925], [origintable_empno#2925 AS empno#3002, max(max_projectenddate#2926) AS max(projectenddate)#3003, sum(sum_salary#2927L) AS sum(salary)#3004L, min(min_projectjoindate#2928) AS min(projectjoindate)#3005, avg_attendance#2929 AS avg(attendance)#3006|#2925 AS empno#3002, max(max_projectenddate#2926) AS max(projectenddate)#3003, sum(sum_salary#2927L) AS sum(salary)#3004L, min(min_projectjoindate#2928) AS min(projectjoindate)#3005, avg_attendance#2929 AS avg(attendance)#3006] > +- SubqueryAlias datamap_comp_maxsumminavg_table > +- Relation[origintable_empno#2925,max_projectenddate#2926,sum_salary#2927L,min_projectjoindate#2928,avg_attendance#2929|#2925,max_projectenddate#2926,sum_salary#2927L,min_projectjoindate#2928,avg_attendance#2929] CarbonDatasourceHadoopRelation [ Database name :default, Table name :datamap_comp_maxsumminavg_table, Schema :Some(StructType(StructField(origintable_empno,IntegerType,true), StructField(max_projectenddate,TimestampType,true), StructField(sum_salary,LongType,true), StructField(min_projectjoindate,TimestampType,true), StructField(avg_attendance,DoubleType,true))) ] > > at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.failAnalysis(CheckAnalysis.scala:39) > at org.apache.spark.sql.catalyst.analysis.Analyzer.failAnalysis(Analyzer.scala:91) > at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.org$apache$spark$sql$catalyst$analysis$CheckAnalysis$class$$anonfun$$checkValidAggregateExpression$1(CheckAnalysis.scala:247) > at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$org$apache$spark$sql$catalyst$analysis$CheckAnalysis$class$$anonfun$$checkValidAggregateExpression$1$5.apply(CheckAnalysis.scala:253) > at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$org$apache$spark$sql$catalyst$analysis$CheckAnalysis$class$$anonfun$$checkValidAggregateExpression$1$5.apply(CheckAnalysis.scala:253) > at scala.collection.immutable.List.foreach(List.scala:381) > at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.org$apache$spark$sql$catalyst$analysis$CheckAnalysis$class$$anonfun$$checkValidAggregateExpression$1(CheckAnalysis.scala:253) > at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$9.apply(CheckAnalysis.scala:280) > at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$9.apply(CheckAnalysis.scala:280) > at scala.collection.immutable.List.foreach(List.scala:381) > at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:280) > at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:78) > at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:127) > at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.checkAnalysis(CheckAnalysis.scala:78) > at org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:91) > at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:52) > at org.apache.spark.sql.CarbonSession.withProfiler(CarbonSession.scala:148) > at org.apache.spark.sql.CarbonSession.sql(CarbonSession.scala:95) > at org.apache.carbondata.mv.datamap.MVAnalyzerRule.apply(MVAnalyzerRule.scala:72) > at org.apache.carbondata.mv.datamap.MVAnalyzerRule.apply(MVAnalyzerRule.scala:38) > at org.apache.spark.sql.hive.CarbonAnalyzer.execute(CarbonAnalyzer.scala:46) > at org.apache.spark.sql.hive.CarbonAnalyzer.execute(CarbonAnalyzer.scala:27) > at org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:69) > at org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:67) > at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:50) > at org.apache.spark.sql.execution.QueryExecution.withCachedData$lzycompute(QueryExecution.scala:73) > at org.apache.spark.sql.execution.QueryExecution.withCachedData(QueryExecution.scala:72) > at org.apache.spark.sql.execution.QueryExecution.optimizedPlan$lzycompute(QueryExecution.scala:78) > at org.apache.spark.sql.execution.QueryExecution.optimizedPlan(QueryExecution.scala:78) > at org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:84) > at org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:80) > at org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:89) > at org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:89) > at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:92) > at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:92) > at org.apache.spark.sql.execution.command.table.CarbonExplainCommand.collectProfiler(CarbonExplainCommand.scala:54) > at org.apache.spark.sql.execution.command.table.CarbonExplainCommand.processMetadata(CarbonExplainCommand.scala:45) > at org.apache.spark.sql.execution.command.MetadataCommand.run(package.scala:68) > at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:58) > at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:56) > at org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:67) > at org.apache.spark.sql.Dataset.<init>(Dataset.scala:183) > at org.apache.spark.sql.CarbonSession$$anonfun$sql$1.apply(CarbonSession.scala:108) > at org.apache.spark.sql.CarbonSession$$anonfun$sql$1.apply(CarbonSession.scala:97) > at org.apache.spark.sql.CarbonSession.withProfiler(CarbonSession.scala:155) > at org.apache.spark.sql.CarbonSession.sql(CarbonSession.scala:95) > ... 48 elided -- This message was sent by Atlassian JIRA (v7.6.3#76005) |
Free forum by Nabble | Edit this page |