[jira] [Updated] (CARBONDATA-2085) It's different between load twice and create datamap with load again after load data and create datamap

classic Classic list List threaded Threaded
1 message Options
Reply | Threaded
Open this post in threaded view
|

[jira] [Updated] (CARBONDATA-2085) It's different between load twice and create datamap with load again after load data and create datamap

Akash R Nilugal (Jira)

     [ https://issues.apache.org/jira/browse/CARBONDATA-2085?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

xubo245 updated CARBONDATA-2085:
--------------------------------
    Description:
It's different between two test case

test case 1: load twice and create datamap , and then query
test case 2:load once , create datamap and load again, and then query

{code:java}
+  test("load data into mainTable after create timeseries datamap on table 1") {
 +    sql("drop table if exists mainTable")
 +    sql(
 +      """
 +        | CREATE TABLE mainTable(
 +        |   mytime timestamp,
 +        |   name string,
 +        |   age int)
 +        | STORED BY 'org.apache.carbondata.format'
 +      """.stripMargin)
 +
 +    sql(s"LOAD DATA LOCAL INPATH '$resourcesPath/timeseriestest.csv' into table mainTable")
 +
 +    sql(
 +      """
 +        | create datamap agg0 on table mainTable
 +        | using 'preaggregate'
 +        | DMPROPERTIES (
 +        |   'timeseries.eventTime'='mytime',
 +        |   'timeseries.hierarchy'='second=1,minute=1,hour=1,day=1,month=1,year=1')
 +        | as select mytime, sum(age)
 +        | from mainTable
 +        | group by mytime""".stripMargin)
 +
 +    sql(s"LOAD DATA LOCAL INPATH '$resourcesPath/timeseriestest.csv' into table mainTable")
 +    val df = sql(
 +      """
 +        | select
 +        |   timeseries(mytime,'minute') as minuteLevel,
 +        |   sum(age) as sum
 +        | from mainTable
 +        | where timeseries(mytime,'minute')>='2016-02-23 01:01:00'
 +        | group by
 +        |   timeseries(mytime,'minute')
 +        | order by
 +        |   timeseries(mytime,'minute')
 +      """.stripMargin)
 +
 +    // only for test, it need remove before merge
 +    df.show()
 +    sql("select * from maintable_agg0_minute").show(100)
 +
 +    checkAnswer(df,
 +      Seq(Row(Timestamp.valueOf("2016-02-23 01:01:00"), 120),
 +        Row(Timestamp.valueOf("2016-02-23 01:02:00"), 280)))
 +
 +  }
 +
 +  test("load data into mainTable after create timeseries datamap on table 2") {
 +    sql("drop table if exists mainTable")
 +    sql(
 +      """
 +        | CREATE TABLE mainTable(
 +        |   mytime timestamp,
 +        |   name string,
 +        |   age int)
 +        | STORED BY 'org.apache.carbondata.format'
 +      """.stripMargin)
 +
 +    sql(s"LOAD DATA LOCAL INPATH '$resourcesPath/timeseriestest.csv' into table mainTable")
 +    sql(s"LOAD DATA LOCAL INPATH '$resourcesPath/timeseriestest.csv' into table mainTable")
 +    sql(
 +      """
 +        | create datamap agg0 on table mainTable
 +        | using 'preaggregate'
 +        | DMPROPERTIES (
 +        |   'timeseries.eventTime'='mytime',
 +        |   'timeseries.hierarchy'='second=1,minute=1,hour=1,day=1,month=1,year=1')
 +        | as select mytime, sum(age)
 +        | from mainTable
 +        | group by mytime""".stripMargin)
 +
 +
 +    val df = sql(
 +      """
 +        | select
 +        |   timeseries(mytime,'minute') as minuteLevel,
 +        |   sum(age) as sum
 +        | from mainTable
 +        | where timeseries(mytime,'minute')>='2016-02-23 01:01:00'
 +        | group by
 +        |   timeseries(mytime,'minute')
 +        | order by
 +        |   timeseries(mytime,'minute')
 +      """.stripMargin)
 +
 +    // only for test, it need remove before merge
 +    df.show()
 +    sql("select * from maintable_agg0_minute").show(100)
 +
 +
 +    checkAnswer(df,
 +      Seq(Row(Timestamp.valueOf("2016-02-23 01:01:00"), 120),
 +        Row(Timestamp.valueOf("2016-02-23 01:02:00"), 280)))
 +  }
 +
{code}



  was:
It's different between load twice and create datamap with load again after load data and create datamap


{code:java}
 test("test if timeseries load twice is successful on table creation") {
    sql("drop table if exists mainTable")
    sql(
      """
        | CREATE TABLE mainTable(
        |   mytime timestamp,
        |   name string,
        |   age int)
        | STORED BY 'org.apache.carbondata.format'
      """.stripMargin)
    sql(s"LOAD DATA LOCAL INPATH '$resourcesPath/timeseriestest.csv' into table mainTable")
    sql(s"LOAD DATA LOCAL INPATH '$resourcesPath/timeseriestest.csv' into table mainTable")
    sql(
      """
        | create datamap agg0 on table mainTable
        | using 'preaggregate'
        | DMPROPERTIES (
        |   'timeseries.eventTime'='mytime',
        |   'timeseries.hierarchy'='second=1,minute=1,hour=1,day=1,month=1,year=1')
        | as select mytime, sum(age)
        | from mainTable
        | group by mytime
      """.stripMargin)
    checkAnswer(sql("select * from maintable_agg0_second"),
      Seq(Row(Timestamp.valueOf("2016-02-23 01:01:30.0"), 20),
        Row(Timestamp.valueOf("2016-02-23 01:01:40.0"), 40),
        Row(Timestamp.valueOf("2016-02-23 01:01:50.0"), 60),
        Row(Timestamp.valueOf("2016-02-23 01:02:30.0"), 80),
        Row(Timestamp.valueOf("2016-02-23 01:02:40.0"), 100),
        Row(Timestamp.valueOf("2016-02-23 01:02:50.0"), 100)))
  }

  // TODO: to be fixed
  ignore("load data into mainTable after create timeseries datamap on table") {
    sql("drop table if exists mainTable")
    sql(
      """
        | CREATE TABLE mainTable(
        |   mytime timestamp,
        |   name string,
        |   age int)
        | STORED BY 'org.apache.carbondata.format'
      """.stripMargin)
    sql(s"LOAD DATA LOCAL INPATH '$resourcesPath/timeseriestest.csv' into table mainTable")
    sql(
      """
        | create datamap agg0 on table mainTable
        | using 'preaggregate'
        | DMPROPERTIES (
        |   'timeseries.eventTime'='mytime',
        |   'timeseries.hierarchy'='second=1,minute=1,hour=1,day=1,month=1,year=1')
        | as select mytime, sum(age)
        | from mainTable
        | group by mytime""".stripMargin)


    sql(s"LOAD DATA LOCAL INPATH '$resourcesPath/timeseriestest.csv' into table mainTable")

    // Discussed
    checkAnswer(sql("select * from maintable_agg0_second"),
      Seq(Row(Timestamp.valueOf("2016-02-23 01:01:30.0"), 20),
        Row(Timestamp.valueOf("2016-02-23 01:01:40.0"), 40),
        Row(Timestamp.valueOf("2016-02-23 01:01:50.0"), 60),
        Row(Timestamp.valueOf("2016-02-23 01:02:30.0"), 80),
        Row(Timestamp.valueOf("2016-02-23 01:02:40.0"), 100),
        Row(Timestamp.valueOf("2016-02-23 01:02:50.0"), 100)))

    //TODO: to be discussed, why is success?
    //    checkAnswer(sql("select * from maintable_agg0_second"),
    //      Seq(Row(Timestamp.valueOf("2016-02-23 01:01:30.0"), 10),
    //        Row(Timestamp.valueOf("2016-02-23 01:01:40.0"), 20),
    //        Row(Timestamp.valueOf("2016-02-23 01:01:50.0"), 30),
    //        Row(Timestamp.valueOf("2016-02-23 01:02:30.0"), 40),
    //        Row(Timestamp.valueOf("2016-02-23 01:02:40.0"), 50),
    //        Row(Timestamp.valueOf("2016-02-23 01:02:50.0"), 50),
    //        Row(Timestamp.valueOf("2016-02-23 01:01:30.0"), 10),
    //        Row(Timestamp.valueOf("2016-02-23 01:01:40.0"), 20),
    //        Row(Timestamp.valueOf("2016-02-23 01:01:50.0"), 30),
    //        Row(Timestamp.valueOf("2016-02-23 01:02:30.0"), 40),
    //        Row(Timestamp.valueOf("2016-02-23 01:02:40.0"), 50),
    //        Row(Timestamp.valueOf("2016-02-23 01:02:50.0"), 50)))
  }

{code}



> It's different between load twice and create datamap with load again after load data and create datamap
> -------------------------------------------------------------------------------------------------------
>
>                 Key: CARBONDATA-2085
>                 URL: https://issues.apache.org/jira/browse/CARBONDATA-2085
>             Project: CarbonData
>          Issue Type: Bug
>          Components: core, spark-integration
>    Affects Versions: 1.3.0
>            Reporter: xubo245
>            Priority: Major
>             Fix For: 1.3.0
>
>          Time Spent: 20m
>  Remaining Estimate: 0h
>
> It's different between two test case
> test case 1: load twice and create datamap , and then query
> test case 2:load once , create datamap and load again, and then query
> {code:java}
> +  test("load data into mainTable after create timeseries datamap on table 1") {
>  +    sql("drop table if exists mainTable")
>  +    sql(
>  +      """
>  +        | CREATE TABLE mainTable(
>  +        |   mytime timestamp,
>  +        |   name string,
>  +        |   age int)
>  +        | STORED BY 'org.apache.carbondata.format'
>  +      """.stripMargin)
>  +
>  +    sql(s"LOAD DATA LOCAL INPATH '$resourcesPath/timeseriestest.csv' into table mainTable")
>  +
>  +    sql(
>  +      """
>  +        | create datamap agg0 on table mainTable
>  +        | using 'preaggregate'
>  +        | DMPROPERTIES (
>  +        |   'timeseries.eventTime'='mytime',
>  +        |   'timeseries.hierarchy'='second=1,minute=1,hour=1,day=1,month=1,year=1')
>  +        | as select mytime, sum(age)
>  +        | from mainTable
>  +        | group by mytime""".stripMargin)
>  +
>  +    sql(s"LOAD DATA LOCAL INPATH '$resourcesPath/timeseriestest.csv' into table mainTable")
>  +    val df = sql(
>  +      """
>  +        | select
>  +        |   timeseries(mytime,'minute') as minuteLevel,
>  +        |   sum(age) as sum
>  +        | from mainTable
>  +        | where timeseries(mytime,'minute')>='2016-02-23 01:01:00'
>  +        | group by
>  +        |   timeseries(mytime,'minute')
>  +        | order by
>  +        |   timeseries(mytime,'minute')
>  +      """.stripMargin)
>  +
>  +    // only for test, it need remove before merge
>  +    df.show()
>  +    sql("select * from maintable_agg0_minute").show(100)
>  +
>  +    checkAnswer(df,
>  +      Seq(Row(Timestamp.valueOf("2016-02-23 01:01:00"), 120),
>  +        Row(Timestamp.valueOf("2016-02-23 01:02:00"), 280)))
>  +
>  +  }
>  +
>  +  test("load data into mainTable after create timeseries datamap on table 2") {
>  +    sql("drop table if exists mainTable")
>  +    sql(
>  +      """
>  +        | CREATE TABLE mainTable(
>  +        |   mytime timestamp,
>  +        |   name string,
>  +        |   age int)
>  +        | STORED BY 'org.apache.carbondata.format'
>  +      """.stripMargin)
>  +
>  +    sql(s"LOAD DATA LOCAL INPATH '$resourcesPath/timeseriestest.csv' into table mainTable")
>  +    sql(s"LOAD DATA LOCAL INPATH '$resourcesPath/timeseriestest.csv' into table mainTable")
>  +    sql(
>  +      """
>  +        | create datamap agg0 on table mainTable
>  +        | using 'preaggregate'
>  +        | DMPROPERTIES (
>  +        |   'timeseries.eventTime'='mytime',
>  +        |   'timeseries.hierarchy'='second=1,minute=1,hour=1,day=1,month=1,year=1')
>  +        | as select mytime, sum(age)
>  +        | from mainTable
>  +        | group by mytime""".stripMargin)
>  +
>  +
>  +    val df = sql(
>  +      """
>  +        | select
>  +        |   timeseries(mytime,'minute') as minuteLevel,
>  +        |   sum(age) as sum
>  +        | from mainTable
>  +        | where timeseries(mytime,'minute')>='2016-02-23 01:01:00'
>  +        | group by
>  +        |   timeseries(mytime,'minute')
>  +        | order by
>  +        |   timeseries(mytime,'minute')
>  +      """.stripMargin)
>  +
>  +    // only for test, it need remove before merge
>  +    df.show()
>  +    sql("select * from maintable_agg0_minute").show(100)
>  +
>  +
>  +    checkAnswer(df,
>  +      Seq(Row(Timestamp.valueOf("2016-02-23 01:01:00"), 120),
>  +        Row(Timestamp.valueOf("2016-02-23 01:02:00"), 280)))
>  +  }
>  +
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)