[ https://issues.apache.org/jira/browse/CARBONDATA-3484?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Ravindra Pesala updated CARBONDATA-3484: ---------------------------------------- Fix Version/s: (was: 1.6.0) > Unable to read data from carbontable , after sucessfull writing in same or new carbonsession > -------------------------------------------------------------------------------------------- > > Key: CARBONDATA-3484 > URL: https://issues.apache.org/jira/browse/CARBONDATA-3484 > Project: CarbonData > Issue Type: Bug > Components: data-query > Affects Versions: 1.6.0 > Environment: ubuntu > Reporter: anshul > Priority: Critical > Labels: newbie > Original Estimate: 24h > Remaining Estimate: 24h > > I have read a csv from my local system and write it as carbontable on s3 location using s3n > public static void main(String args[]) { > String colNames = ""; > SparkSession spark = null; > SparkSession carbon = null; > String storeLocation = "s3n://accesskey:secretkey@bucketnames3"; > SparkConf config = new SparkConf(); > config.setMaster("local[2]"); > config.set("javax.jdo.option.ConnectionDriverName", "org.postgresql.Driver"); > config.set("javax.jdo.option.ConnectionPassword", "postgres"); > config.set("javax.jdo.option.ConnectionUserName", "postgres"); > config.set("hive.exec.dynamic.partition.mode", "nonstrict"); > config.set("hive.exec.dynamic.partition", "true"); > config.set("hive.exec.max.dynamic.partitions", "2556"); > config.set("hive.exec.max.dynamic.partitions.pernode", "2556"); > config.set("carbon.number.of.cores.while.loading", "1"); > config.set("carbon.sort.temp.compressor", "SNAPPY"); > config.set("carbon.sort.size", "5000"); > config.set("carbon.sort.file.write.buffer.size", "500"); > config.set("carbon.merge.sort.prefetch", "false"); > config.set("carbon.sort.intermediate.files.limit", "10"); > config.set("enable.unsafe.sort", "true"); > config.set("spark.kryo.unsafe", "true"); > config.set("hive.metastore.uris", "thrift://localhost:9083"); > spark = SparkSession.builder().appName("CarbonDataReader").config(config).enableHiveSupport().getOrCreate(); > carbon = CarbonSession.CarbonBuilder(spark.builder()).getOrCreateCarbonSession(storeLocation, > "jdbc:postgresql://localhost:5432/carbonmetastore"); > carbon.sparkContext().hadoopConfiguration().set("fs.s3n.impl", "org.apache.hadoop.fs.s3native.NativeS3FileSystem"); > carbon.sparkContext().hadoopConfiguration().set("fs.s3n.awsAccessKeyId", "xxxx"); > carbon.sparkContext().hadoopConfiguration().set("fs.s3n.awsSecretAccessKey", "xxxxxxx"); > > Dataset<Row> ds = carbon.read().format("carbondata").option("header", "true").option("inferSchema", "true") > .csv("/home/anshul.jain/Downloads/datasets/EMP_Age.csv"); > ds.registerTempTable("temp_emp_age_test"); > carbon.sql("describe formatted emp_age_test").show(100, false); > DataFrameWriter<Row> dfw = ds.write().format(MaterializedViewConstants.CARBONFORMAT).option(MetadataConstants.TABLE_NAME, "emp_age_test") > .option("bad_records_logger_enable", false); > dfw.mode(SaveMode.Overwrite).save(); > > carbon.sql("select * from emp_age_test").show(); > -- This message was sent by Atlassian Jira (v8.3.2#803003) |
Free forum by Nabble | Edit this page |