Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2128 Build Failed with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/3556/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2128 Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/4782/ --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on the issue:
https://github.com/apache/carbondata/pull/2128 SDV Build Success , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/4284/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2128 Build Success with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/3564/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2128 Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/4788/ --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on the issue:
https://github.com/apache/carbondata/pull/2128 SDV Build Success , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/4290/ --- |
In reply to this post by qiuchenjian-2
Github user rahulforallp commented on the issue:
https://github.com/apache/carbondata/pull/2128 retest this please --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2128 Build Success with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/3577/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2128 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/4800/ --- |
In reply to this post by qiuchenjian-2
Github user gvramana commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2128#discussion_r180034323 --- Diff: integration/spark-common/src/main/scala/org/apache/carbondata/api/CarbonStore.scala --- @@ -151,13 +152,88 @@ object CarbonStore { } } } finally { + if (currentTablePartitions.equals(None)) { + cleanUpPartitionFoldersRecurssively(carbonTable, List.empty[PartitionSpec]) + } else { + cleanUpPartitionFoldersRecurssively(carbonTable, currentTablePartitions.get.toList) + } + if (carbonCleanFilesLock != null) { CarbonLockUtil.fileUnlock(carbonCleanFilesLock, LockUsage.CLEAN_FILES_LOCK) } } LOGGER.audit(s"Clean files operation is success for $dbName.$tableName.") } + /** + * delete partition folders recurssively + * + * @param carbonTable + * @param partitionSpecList + */ + def cleanUpPartitionFoldersRecurssively(carbonTable: CarbonTable, + partitionSpecList: List[PartitionSpec]): Unit = { + if (carbonTable != null) { + val loadMetadataDetails = SegmentStatusManager --- End diff -- 1. partition folders cannot be deleted, as there is no way to check if new dataload is using them. 2. Shouldnot take multiple snapshots of file system during clean files. 3. Partition location will be valid for partitions inside table path also, those folders should not be scanned twice. 4. CarbonFile interface should be used for filesystem operations. --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on the issue:
https://github.com/apache/carbondata/pull/2128 SDV Build Success , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/4340/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2128 Build Success with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/3649/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2128 Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/4871/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2128 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/4878/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2128 Build Success with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/3655/ --- |
In reply to this post by qiuchenjian-2
Github user rahulforallp commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2128#discussion_r180079486 --- Diff: integration/spark-common/src/main/scala/org/apache/carbondata/api/CarbonStore.scala --- @@ -151,13 +152,88 @@ object CarbonStore { } } } finally { + if (currentTablePartitions.equals(None)) { + cleanUpPartitionFoldersRecurssively(carbonTable, List.empty[PartitionSpec]) + } else { + cleanUpPartitionFoldersRecurssively(carbonTable, currentTablePartitions.get.toList) + } + if (carbonCleanFilesLock != null) { CarbonLockUtil.fileUnlock(carbonCleanFilesLock, LockUsage.CLEAN_FILES_LOCK) } } LOGGER.audit(s"Clean files operation is success for $dbName.$tableName.") } + /** + * delete partition folders recurssively + * + * @param carbonTable + * @param partitionSpecList + */ + def cleanUpPartitionFoldersRecurssively(carbonTable: CarbonTable, + partitionSpecList: List[PartitionSpec]): Unit = { + if (carbonTable != null) { + val loadMetadataDetails = SegmentStatusManager --- End diff -- 1. partition folders cannot be deleted, as there is no way to check if new dataload is using them. ==> Done 2. Shouldnot take multiple snapshots of file system during clean files. ==> earlier we are not taking snapshot recurssively . so it required here for partition folders. 3. Partition location will be valid for partitions inside table path also, those folders should not be scanned twice. ==> Done 4. CarbonFile interface should be used for filesystem operations. ==> Done --- |
In reply to this post by qiuchenjian-2
Github user gvramana commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2128#discussion_r180099123 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/filesystem/LocalCarbonFile.java --- @@ -156,6 +158,25 @@ public boolean delete() { } + @Override + public CarbonFile[] listFiles(Boolean recurssive) { + if (!file.isDirectory()) { + return new CarbonFile[0]; + } + String[] filter = null; + Collection<File> fileCollection = FileUtils.listFiles(file, null, true); + File[] files = fileCollection.toArray(new File[fileCollection.size()]); + if (files == null) { + return new CarbonFile[0]; + } + CarbonFile[] carbonFiles = new CarbonFile[files.length]; --- End diff -- directly copy into array --- |
In reply to this post by qiuchenjian-2
Github user gvramana commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2128#discussion_r180109707 --- Diff: integration/spark-common/src/main/scala/org/apache/carbondata/api/CarbonStore.scala --- @@ -151,13 +153,82 @@ object CarbonStore { } } } finally { + if (currentTablePartitions.equals(None)) { + cleanUpPartitionFoldersRecurssively(carbonTable, List.empty[PartitionSpec]) + } else { + cleanUpPartitionFoldersRecurssively(carbonTable, currentTablePartitions.get.toList) + } + if (carbonCleanFilesLock != null) { CarbonLockUtil.fileUnlock(carbonCleanFilesLock, LockUsage.CLEAN_FILES_LOCK) } } LOGGER.audit(s"Clean files operation is success for $dbName.$tableName.") } + /** + * delete partition folders recurssively + * + * @param carbonTable + * @param partitionSpecList + */ + def cleanUpPartitionFoldersRecurssively(carbonTable: CarbonTable, + partitionSpecList: List[PartitionSpec]): Unit = { + if (carbonTable != null) { + val loadMetadataDetails = SegmentStatusManager + .readLoadMetadata(carbonTable.getMetadataPath) + + val fileType = FileFactory.getFileType(carbonTable.getTablePath) + val carbonFile = FileFactory.getCarbonFile(carbonTable.getTablePath, fileType) + + // list all files from table path + val listOfDefaultPartFilesIterator = carbonFile.listFiles(true) + loadMetadataDetails.foreach { metadataDetail => + if (metadataDetail.getSegmentStatus.equals(SegmentStatus.MARKED_FOR_DELETE) && + metadataDetail.getSegmentFile == null) { + val loadStartTime: Long = metadataDetail.getLoadStartTime + // delete all files of @loadStartTime from tablepath + cleanPartitionFolder(listOfDefaultPartFilesIterator, loadStartTime) + partitionSpecList.foreach { + partitionSpec => + val partitionLocation = partitionSpec.getLocation + // For partition folder outside the tablePath + if (!partitionLocation.toString.startsWith(carbonTable.getTablePath)) { + val fileType = FileFactory.getFileType(partitionLocation.toString) + val partitionCarbonFile = FileFactory + .getCarbonFile(partitionLocation.toString, fileType) + // list all files from partitionLoacation + val listOfExternalPartFilesIterator = partitionCarbonFile.listFiles(true) + // delete all files of @loadStartTime from externalPath + cleanPartitionFolder(listOfExternalPartFilesIterator, loadStartTime) + } + } + } + } + } + } + + /** + * + * @param carbonFiles + * @param timestamp + */ + private def cleanPartitionFolder(carbonFiles: Array[CarbonFile], + timestamp: Long): Unit = { + carbonFiles.foreach { + carbonFile => + val filePath = carbonFile.getPath + val fileName = carbonFile.getName + if (fileName.lastIndexOf("-") > 0 && fileName.lastIndexOf(".") > 0) { + if (fileName.substring(fileName.lastIndexOf("-") + 1, fileName.lastIndexOf(".")) --- End diff -- move getCarbonFileTimeStamp function can be moved to CarbonTablePath Change function name to cleanCarbonFilesInFolder --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2128 Build Success with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/3666/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2128 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/4889/ --- |
Free forum by Nabble | Edit this page |