Apache CarbonData Dev Mailing List archive › Apache CarbonData JIRA issues

[jira] [Updated] (CARBONDATA-2085) It's different between load twice and create datamap with load again after load data and create datamap

Classic

List

Threaded

1 message

Akash R Nilugal (Jira)

[jira] [Updated] (CARBONDATA-2085) It's different between load twice and create datamap with load again after load data and create datamap

[ https://issues.apache.org/jira/browse/CARBONDATA-2085?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

xubo245 updated CARBONDATA-2085:
--------------------------------
Description:
It's different between two test case

test case 1: load twice and create datamap , and then query
test case 2:load once , create datamap and load again, and then query

{code:java}
+ test("load data into mainTable after create timeseries datamap on table 1") {
+ sql("drop table if exists mainTable")
+ sql(
+ """
+ | CREATE TABLE mainTable(
+ | mytime timestamp,
+ | name string,
+ | age int)
+ | STORED BY 'org.apache.carbondata.format'
+ """.stripMargin)
+
+ sql(s"LOAD DATA LOCAL INPATH '$resourcesPath/timeseriestest.csv' into table mainTable")
+
+ sql(
+ """
+ | create datamap agg0 on table mainTable
+ | using 'preaggregate'
+ | DMPROPERTIES (
+ | 'timeseries.eventTime'='mytime',
+ | 'timeseries.hierarchy'='second=1,minute=1,hour=1,day=1,month=1,year=1')
+ | as select mytime, sum(age)
+ | from mainTable
+ | group by mytime""".stripMargin)
+
+ sql(s"LOAD DATA LOCAL INPATH '$resourcesPath/timeseriestest.csv' into table mainTable")
+ val df = sql(
+ """
+ | select
+ | timeseries(mytime,'minute') as minuteLevel,
+ | sum(age) as sum
+ | from mainTable
+ | where timeseries(mytime,'minute')>='2016-02-23 01:01:00'
+ | group by
+ | timeseries(mytime,'minute')
+ | order by
+ | timeseries(mytime,'minute')
+ """.stripMargin)
+
+ // only for test, it need remove before merge
+ df.show()
+ sql("select * from maintable_agg0_minute").show(100)
+
+ checkAnswer(df,
+ Seq(Row(Timestamp.valueOf("2016-02-23 01:01:00"), 120),
+ Row(Timestamp.valueOf("2016-02-23 01:02:00"), 280)))
+
+ }
+
+ test("load data into mainTable after create timeseries datamap on table 2") {
+ sql("drop table if exists mainTable")
+ sql(
+ """
+ | CREATE TABLE mainTable(
+ | mytime timestamp,
+ | name string,
+ | age int)
+ | STORED BY 'org.apache.carbondata.format'
+ """.stripMargin)
+
+ sql(s"LOAD DATA LOCAL INPATH '$resourcesPath/timeseriestest.csv' into table mainTable")
+ sql(s"LOAD DATA LOCAL INPATH '$resourcesPath/timeseriestest.csv' into table mainTable")
+ sql(
+ """
+ | create datamap agg0 on table mainTable
+ | using 'preaggregate'
+ | DMPROPERTIES (
+ | 'timeseries.eventTime'='mytime',
+ | 'timeseries.hierarchy'='second=1,minute=1,hour=1,day=1,month=1,year=1')
+ | as select mytime, sum(age)
+ | from mainTable
+ | group by mytime""".stripMargin)
+
+
+ val df = sql(
+ """
+ | select
+ | timeseries(mytime,'minute') as minuteLevel,
+ | sum(age) as sum
+ | from mainTable
+ | where timeseries(mytime,'minute')>='2016-02-23 01:01:00'
+ | group by
+ | timeseries(mytime,'minute')
+ | order by
+ | timeseries(mytime,'minute')
+ """.stripMargin)
+
+ // only for test, it need remove before merge
+ df.show()
+ sql("select * from maintable_agg0_minute").show(100)
+
+
+ checkAnswer(df,
+ Seq(Row(Timestamp.valueOf("2016-02-23 01:01:00"), 120),
+ Row(Timestamp.valueOf("2016-02-23 01:02:00"), 280)))
+ }
+
{code}

was:
It's different between load twice and create datamap with load again after load data and create datamap

{code:java}
test("test if timeseries load twice is successful on table creation") {
sql("drop table if exists mainTable")
sql(
"""
| CREATE TABLE mainTable(
| mytime timestamp,
| name string,
| age int)
| STORED BY 'org.apache.carbondata.format'
""".stripMargin)
sql(s"LOAD DATA LOCAL INPATH '$resourcesPath/timeseriestest.csv' into table mainTable")
sql(s"LOAD DATA LOCAL INPATH '$resourcesPath/timeseriestest.csv' into table mainTable")
sql(
"""
| create datamap agg0 on table mainTable
| using 'preaggregate'
| DMPROPERTIES (
| 'timeseries.eventTime'='mytime',
| 'timeseries.hierarchy'='second=1,minute=1,hour=1,day=1,month=1,year=1')
| as select mytime, sum(age)
| from mainTable
| group by mytime
""".stripMargin)
checkAnswer(sql("select * from maintable_agg0_second"),
Seq(Row(Timestamp.valueOf("2016-02-23 01:01:30.0"), 20),
Row(Timestamp.valueOf("2016-02-23 01:01:40.0"), 40),
Row(Timestamp.valueOf("2016-02-23 01:01:50.0"), 60),
Row(Timestamp.valueOf("2016-02-23 01:02:30.0"), 80),
Row(Timestamp.valueOf("2016-02-23 01:02:40.0"), 100),
Row(Timestamp.valueOf("2016-02-23 01:02:50.0"), 100)))
}

// TODO: to be fixed
ignore("load data into mainTable after create timeseries datamap on table") {
sql("drop table if exists mainTable")
sql(
"""
| CREATE TABLE mainTable(
| mytime timestamp,
| name string,
| age int)
| STORED BY 'org.apache.carbondata.format'
""".stripMargin)
sql(s"LOAD DATA LOCAL INPATH '$resourcesPath/timeseriestest.csv' into table mainTable")
sql(
"""
| create datamap agg0 on table mainTable
| using 'preaggregate'
| DMPROPERTIES (
| 'timeseries.eventTime'='mytime',
| 'timeseries.hierarchy'='second=1,minute=1,hour=1,day=1,month=1,year=1')
| as select mytime, sum(age)
| from mainTable
| group by mytime""".stripMargin)

sql(s"LOAD DATA LOCAL INPATH '$resourcesPath/timeseriestest.csv' into table mainTable")

// Discussed
checkAnswer(sql("select * from maintable_agg0_second"),
Seq(Row(Timestamp.valueOf("2016-02-23 01:01:30.0"), 20),
Row(Timestamp.valueOf("2016-02-23 01:01:40.0"), 40),
Row(Timestamp.valueOf("2016-02-23 01:01:50.0"), 60),
Row(Timestamp.valueOf("2016-02-23 01:02:30.0"), 80),
Row(Timestamp.valueOf("2016-02-23 01:02:40.0"), 100),
Row(Timestamp.valueOf("2016-02-23 01:02:50.0"), 100)))

//TODO: to be discussed, why is success?
// checkAnswer(sql("select * from maintable_agg0_second"),
// Seq(Row(Timestamp.valueOf("2016-02-23 01:01:30.0"), 10),
// Row(Timestamp.valueOf("2016-02-23 01:01:40.0"), 20),
// Row(Timestamp.valueOf("2016-02-23 01:01:50.0"), 30),
// Row(Timestamp.valueOf("2016-02-23 01:02:30.0"), 40),
// Row(Timestamp.valueOf("2016-02-23 01:02:40.0"), 50),
// Row(Timestamp.valueOf("2016-02-23 01:02:50.0"), 50),
// Row(Timestamp.valueOf("2016-02-23 01:01:30.0"), 10),
// Row(Timestamp.valueOf("2016-02-23 01:01:40.0"), 20),
// Row(Timestamp.valueOf("2016-02-23 01:01:50.0"), 30),
// Row(Timestamp.valueOf("2016-02-23 01:02:30.0"), 40),
// Row(Timestamp.valueOf("2016-02-23 01:02:40.0"), 50),
// Row(Timestamp.valueOf("2016-02-23 01:02:50.0"), 50)))
}

{code}

> It's different between load twice and create datamap with load again after load data and create datamap
> -------------------------------------------------------------------------------------------------------
>
> Key: CARBONDATA-2085
> URL: https://issues.apache.org/jira/browse/CARBONDATA-2085
> Project: CarbonData
> Issue Type: Bug
> Components: core, spark-integration
> Affects Versions: 1.3.0
> Reporter: xubo245
> Priority: Major
> Fix For: 1.3.0
>
> Time Spent: 20m
> Remaining Estimate: 0h
>
> It's different between two test case
> test case 1: load twice and create datamap , and then query
> test case 2:load once , create datamap and load again, and then query
> {code:java}
> + test("load data into mainTable after create timeseries datamap on table 1") {
> + sql("drop table if exists mainTable")
> + sql(
> + """
> + | CREATE TABLE mainTable(
> + | mytime timestamp,
> + | name string,
> + | age int)
> + | STORED BY 'org.apache.carbondata.format'
> + """.stripMargin)
> +
> + sql(s"LOAD DATA LOCAL INPATH '$resourcesPath/timeseriestest.csv' into table mainTable")
> +
> + sql(
> + """
> + | create datamap agg0 on table mainTable
> + | using 'preaggregate'
> + | DMPROPERTIES (
> + | 'timeseries.eventTime'='mytime',
> + | 'timeseries.hierarchy'='second=1,minute=1,hour=1,day=1,month=1,year=1')
> + | as select mytime, sum(age)
> + | from mainTable
> + | group by mytime""".stripMargin)
> +
> + sql(s"LOAD DATA LOCAL INPATH '$resourcesPath/timeseriestest.csv' into table mainTable")
> + val df = sql(
> + """
> + | select
> + | timeseries(mytime,'minute') as minuteLevel,
> + | sum(age) as sum
> + | from mainTable
> + | where timeseries(mytime,'minute')>='2016-02-23 01:01:00'
> + | group by
> + | timeseries(mytime,'minute')
> + | order by
> + | timeseries(mytime,'minute')
> + """.stripMargin)
> +
> + // only for test, it need remove before merge
> + df.show()
> + sql("select * from maintable_agg0_minute").show(100)
> +
> + checkAnswer(df,
> + Seq(Row(Timestamp.valueOf("2016-02-23 01:01:00"), 120),
> + Row(Timestamp.valueOf("2016-02-23 01:02:00"), 280)))
> +
> + }
> +
> + test("load data into mainTable after create timeseries datamap on table 2") {
> + sql("drop table if exists mainTable")
> + sql(
> + """
> + | CREATE TABLE mainTable(
> + | mytime timestamp,
> + | name string,
> + | age int)
> + | STORED BY 'org.apache.carbondata.format'
> + """.stripMargin)
> +
> + sql(s"LOAD DATA LOCAL INPATH '$resourcesPath/timeseriestest.csv' into table mainTable")
> + sql(s"LOAD DATA LOCAL INPATH '$resourcesPath/timeseriestest.csv' into table mainTable")
> + sql(
> + """
> + | create datamap agg0 on table mainTable
> + | using 'preaggregate'
> + | DMPROPERTIES (
> + | 'timeseries.eventTime'='mytime',
> + | 'timeseries.hierarchy'='second=1,minute=1,hour=1,day=1,month=1,year=1')
> + | as select mytime, sum(age)
> + | from mainTable
> + | group by mytime""".stripMargin)
> +
> +
> + val df = sql(
> + """
> + | select
> + | timeseries(mytime,'minute') as minuteLevel,
> + | sum(age) as sum
> + | from mainTable
> + | where timeseries(mytime,'minute')>='2016-02-23 01:01:00'
> + | group by
> + | timeseries(mytime,'minute')
> + | order by
> + | timeseries(mytime,'minute')
> + """.stripMargin)
> +
> + // only for test, it need remove before merge
> + df.show()
> + sql("select * from maintable_agg0_minute").show(100)
> +
> +
> + checkAnswer(df,
> + Seq(Row(Timestamp.valueOf("2016-02-23 01:01:00"), 120),
> + Row(Timestamp.valueOf("2016-02-23 01:02:00"), 280)))
> + }
> +
> {code}

--
This message was sent by Atlassian JIRA
(v7.6.3#76005)