[ https://issues.apache.org/jira/browse/CARBONDATA-279?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15529820#comment-15529820 ] ASF GitHub Bot commented on CARBONDATA-279: ------------------------------------------- Github user jackylk commented on a diff in the pull request: https://github.com/apache/incubator-carbondata/pull/203#discussion_r80922605 --- Diff: integration/spark/src/main/scala/org/apache/carbondata/spark/rdd/CarbonDataLoadRDD.scala --- @@ -462,3 +478,133 @@ class CarbonDataLoadRDD[K, V]( } } +class CarbonRDDDataLoadRDD[K, V]( + sc: SparkContext, + result: DataLoadResult[K, V], + carbonLoadModel: CarbonLoadModel, + var storeLocation: String, + hdfsStoreLocation: String, + kettleHomePath: String, + columinar: Boolean, + loadCount: Integer, + tableCreationTime: Long, + schemaLastUpdatedTime: Long, + prev: RDD[Row]) + extends RDD[(K, V)](prev) + with Logging { + + sc.setLocalProperty("spark.scheduler.pool", "DDL") + + @DeveloperApi + override def compute(theSplit: Partition, context: TaskContext): Iterator[(K, V)] = { + val LOGGER = LogServiceFactory.getLogService(this.getClass.getName) + val resultIter = new Iterator[(K, V)] { + var partitionID = "0" + val loadMetadataDetails = new LoadMetadataDetails() + var uniqueLoadStatusId = carbonLoadModel.getTableName + CarbonCommonConstants.UNDERSCORE + + theSplit.index + try { + loadMetadataDetails.setPartitionCount(partitionID) + loadMetadataDetails.setLoadStatus(CarbonCommonConstants.STORE_LOADSTATUS_FAILURE) + carbonLoadModel.setPartitionId(partitionID) + carbonLoadModel.setSegmentId(String.valueOf(loadCount)) + carbonLoadModel.setTaskNo(String.valueOf(theSplit.index)) + + storeLocation = CarbonDataLoadRDD.initialize(carbonLoadModel, theSplit.index) + loadMetadataDetails.setLoadStatus(CarbonCommonConstants.STORE_LOADSTATUS_SUCCESS) + val rddIteratorKey = UUID.randomUUID().toString + try{ + RddInputUtils.put(rddIteratorKey, + new RddIterator(firstParent[Row].iterator(theSplit, context), carbonLoadModel)) + carbonLoadModel.setRddIteratorKey(rddIteratorKey) --- End diff -- Why can't you just set the iterator object itself in the carbonLoadModel instead of putting it in a global map? > [DataLoading]Save a DataFrame to CarbonData file without writing CSV file > ------------------------------------------------------------------------- > > Key: CARBONDATA-279 > URL: https://issues.apache.org/jira/browse/CARBONDATA-279 > Project: CarbonData > Issue Type: Improvement > Affects Versions: 0.1.0-incubating > Reporter: QiangCai > Assignee: QiangCai > Priority: Minor > Fix For: 0.2.0-incubating > > > Directly save a DataFrame to CarbonData file without writing CSV file -- This message was sent by Atlassian JIRA (v6.3.4#6332) |
Free forum by Nabble | Edit this page |