Apache CarbonData Dev Mailing List archive

Re: carbon data

Posted by lionel061201 on Nov 29, 2016; 9:56am
URL: http://apache-carbondata-dev-mailing-list-archive.168.s1.nabble.com/carbon-data-tp3305p3353.html

Thank you for the response Liang. I think I have followed the example but
it still returns error:
Data loading failed. table not found: default.carbontest
attached my code below: I read data from a hive table with HiveContext and
convert it to CarbonContext then generate the df and save to hdfs. I'm not
sure whether it's correct or not when I generate the dataframe in
sc.parallelize(sc.Files,
25) Do you have any other mothod we can use to generate DF?

object SparkConvert {

def main(args: Array[String]): Unit = {

val conf = new SparkConf().setAppName("CarbonTest")

val sc = new SparkContext(conf)

val path = "hdfs:///user/appuser/lucao/CarbonTest_001.carbon"

val hqlContext = new HiveContext(sc)

val df = hqlContext.sql("select * from default.test_data_all")

println("the count is:" + df.count())

val cc = createCarbonContext(df.sqlContext.sparkContext, path)

writeDataFrame(cc, "CarbonTest", SaveMode.Append)

}

def createCarbonContext(sc : SparkContext, storePath : String):
CarbonContext = {

val cc = new CarbonContext(sc, storePath)

cc

}

def writeDataFrame(cc : CarbonContext, tableName : String, mode :
SaveMode) : Unit = {

import cc.implicits._

val sc = cc.sparkContext

val df = sc.parallelize(sc.files,
25).toDF(“col1”,”col2”,”col3”..."coln")

df.write

.format("carbondata")

.option("tableName", tableName)

.option("compress", "true")

.mode(mode)

.save()

}

}