Re: carbon data
Posted by lionel061201 on Nov 29, 2016; 9:56am
URL: http://apache-carbondata-dev-mailing-list-archive.168.s1.nabble.com/carbon-data-tp3305p3353.html
Thank you for the response Liang. I think I have followed the example but
it still returns error:
Data loading failed. table not found: default.carbontest
attached my code below: I read data from a hive table with HiveContext and
convert it to CarbonContext then generate the df and save to hdfs. I'm not
sure whether it's correct or not when I generate the dataframe in
sc.parallelize(sc.Files,
25) Do you have any other mothod we can use to generate DF?
object SparkConvert {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("CarbonTest")
val sc = new SparkContext(conf)
val path = "hdfs:///user/appuser/lucao/CarbonTest_001.carbon"
val hqlContext = new HiveContext(sc)
val df = hqlContext.sql("select * from default.test_data_all")
println("the count is:" + df.count())
val cc = createCarbonContext(df.sqlContext.sparkContext, path)
writeDataFrame(cc, "CarbonTest", SaveMode.Append)
}
def createCarbonContext(sc : SparkContext, storePath : String):
CarbonContext = {
val cc = new CarbonContext(sc, storePath)
cc
}
def writeDataFrame(cc : CarbonContext, tableName : String, mode :
SaveMode) : Unit = {
import cc.implicits._
val sc = cc.sparkContext
val df = sc.parallelize(sc.files,
25).toDF(“col1”,”col2”,”col3”..."coln")
df.write
.format("carbondata")
.option("tableName", tableName)
.option("compress", "true")
.mode(mode)
.save()
}
}