Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2824 Build Success with Spark 2.3.1, Please check CI http://136.243.101.176:8080/job/carbondataprbuilder2.3/9203/ --- |
In reply to this post by qiuchenjian-2
Github user QiangCai commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2824#discussion_r227289015 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/execution/datasources/SparkCarbonTableFormat.scala --- @@ -172,33 +172,8 @@ with Serializable { dataSchema: StructType, context: TaskAttemptContext): OutputWriter = { val model = CarbonTableOutputFormat.getLoadModel(context.getConfiguration) - val isCarbonUseMultiDir = CarbonProperties.getInstance().isUseMultiTempDir - var storeLocation: Array[String] = Array[String]() - val isCarbonUseLocalDir = CarbonProperties.getInstance() - .getProperty("carbon.use.local.dir", "false").equalsIgnoreCase("true") - - val taskNumber = generateTaskNumber(path, context, model.getSegmentId) - val tmpLocationSuffix = - File.separator + "carbon" + System.nanoTime() + File.separator + taskNumber - if (isCarbonUseLocalDir) { - val yarnStoreLocations = Util.getConfiguredLocalDirs(SparkEnv.get.conf) - if (!isCarbonUseMultiDir && null != yarnStoreLocations && yarnStoreLocations.nonEmpty) { - // use single dir - storeLocation = storeLocation :+ - (yarnStoreLocations(Random.nextInt(yarnStoreLocations.length)) + tmpLocationSuffix) - if (storeLocation == null || storeLocation.isEmpty) { - storeLocation = storeLocation :+ - (System.getProperty("java.io.tmpdir") + tmpLocationSuffix) - } - } else { - // use all the yarn dirs - storeLocation = yarnStoreLocations.map(_ + tmpLocationSuffix) - } - } else { - storeLocation = - storeLocation :+ (System.getProperty("java.io.tmpdir") + tmpLocationSuffix) - } + val storeLocation = CommonUtil.getTempStoreLocations(taskNumber) --- End diff -- the old tmpLocationSuffix is different from the method CommonUtil.getTempStoreLocations. please check whether is ok or not --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2824 Build Success with Spark 2.2.1, Please check CI http://95.216.28.178:8080/job/ApacheCarbonPRBuilder1/1171/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2824 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder2.1/964/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2824 Build Success with Spark 2.3.1, Please check CI http://136.243.101.176:8080/job/carbondataprbuilder2.3/9228/ --- |
In reply to this post by qiuchenjian-2
Github user xuchuanyin commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2824#discussion_r227352023 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/execution/datasources/SparkCarbonTableFormat.scala --- @@ -172,33 +172,8 @@ with Serializable { dataSchema: StructType, context: TaskAttemptContext): OutputWriter = { val model = CarbonTableOutputFormat.getLoadModel(context.getConfiguration) - val isCarbonUseMultiDir = CarbonProperties.getInstance().isUseMultiTempDir - var storeLocation: Array[String] = Array[String]() - val isCarbonUseLocalDir = CarbonProperties.getInstance() - .getProperty("carbon.use.local.dir", "false").equalsIgnoreCase("true") - - val taskNumber = generateTaskNumber(path, context, model.getSegmentId) - val tmpLocationSuffix = - File.separator + "carbon" + System.nanoTime() + File.separator + taskNumber - if (isCarbonUseLocalDir) { - val yarnStoreLocations = Util.getConfiguredLocalDirs(SparkEnv.get.conf) - if (!isCarbonUseMultiDir && null != yarnStoreLocations && yarnStoreLocations.nonEmpty) { - // use single dir - storeLocation = storeLocation :+ - (yarnStoreLocations(Random.nextInt(yarnStoreLocations.length)) + tmpLocationSuffix) - if (storeLocation == null || storeLocation.isEmpty) { - storeLocation = storeLocation :+ - (System.getProperty("java.io.tmpdir") + tmpLocationSuffix) - } - } else { - // use all the yarn dirs - storeLocation = yarnStoreLocations.map(_ + tmpLocationSuffix) - } - } else { - storeLocation = - storeLocation :+ (System.getProperty("java.io.tmpdir") + tmpLocationSuffix) - } + val storeLocation = CommonUtil.getTempStoreLocations(taskNumber) --- End diff -- yes, I also noticed this. The suffix has no meanings, just used to separate each thread's output. I think it's a mistake by hand -- that's why we need to extract these code to avoid problems like this. --- |
In reply to this post by qiuchenjian-2
Github user xuchuanyin commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2824#discussion_r227625617 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/execution/datasources/SparkCarbonTableFormat.scala --- @@ -172,33 +172,8 @@ with Serializable { dataSchema: StructType, context: TaskAttemptContext): OutputWriter = { val model = CarbonTableOutputFormat.getLoadModel(context.getConfiguration) - val isCarbonUseMultiDir = CarbonProperties.getInstance().isUseMultiTempDir - var storeLocation: Array[String] = Array[String]() - val isCarbonUseLocalDir = CarbonProperties.getInstance() - .getProperty("carbon.use.local.dir", "false").equalsIgnoreCase("true") - - val taskNumber = generateTaskNumber(path, context, model.getSegmentId) - val tmpLocationSuffix = - File.separator + "carbon" + System.nanoTime() + File.separator + taskNumber - if (isCarbonUseLocalDir) { - val yarnStoreLocations = Util.getConfiguredLocalDirs(SparkEnv.get.conf) - if (!isCarbonUseMultiDir && null != yarnStoreLocations && yarnStoreLocations.nonEmpty) { - // use single dir - storeLocation = storeLocation :+ - (yarnStoreLocations(Random.nextInt(yarnStoreLocations.length)) + tmpLocationSuffix) - if (storeLocation == null || storeLocation.isEmpty) { - storeLocation = storeLocation :+ - (System.getProperty("java.io.tmpdir") + tmpLocationSuffix) - } - } else { - // use all the yarn dirs - storeLocation = yarnStoreLocations.map(_ + tmpLocationSuffix) - } - } else { - storeLocation = - storeLocation :+ (System.getProperty("java.io.tmpdir") + tmpLocationSuffix) - } + val storeLocation = CommonUtil.getTempStoreLocations(taskNumber) --- End diff -- @jackylk @QiangCai I've debugged and reviewed the code again and found it works as expected: all the emp locations were cleared. The `TempStoreLocations` generated at the begining of data loading is just the same as that at the closure of `CarbonTableOutputFormat` in which these locations will be cleared. --- |
In reply to this post by qiuchenjian-2
|
In reply to this post by qiuchenjian-2
|
Free forum by Nabble | Edit this page |