Login  Register

[GitHub] [carbondata] akashrn5 commented on a change in pull request #4072: [CARBONDATA-4110] Support clean files dry run operation and show statistics after clean files operation

Posted by GitBox on Mar 09, 2021; 7:25am
URL: http://apache-carbondata-dev-mailing-list-archive.168.s1.nabble.com/GitHub-carbondata-vikramahuja1001-opened-a-new-pull-request-4072-WIP-Clean-files-phase2-tp105322p106582.html


akashrn5 commented on a change in pull request #4072:
URL: https://github.com/apache/carbondata/pull/4072#discussion_r590016222



##########
File path: integration/spark/src/main/scala/org/apache/carbondata/trash/DataTrashManager.scala
##########
@@ -87,13 +106,53 @@ object DataTrashManager {
     }
   }
 
-  private def checkAndCleanTrashFolder(carbonTable: CarbonTable, isForceDelete: Boolean): Unit = {
+  /**
+   * Checks the size of the segment files as well as datafiles, this method is used before and after
+   * clean files operation to check how much space is actually freed, during the operation.
+   */
+  def getSizeSnapshot(carbonTable: CarbonTable): Long = {
+    val metadataDetails = SegmentStatusManager.readLoadMetadata(carbonTable.getMetadataPath)
+    var size: Long = 0
+    val segmentFileLocation = CarbonTablePath.getSegmentFilesLocation(carbonTable.getTablePath)
+    if (FileFactory.isFileExist(segmentFileLocation)) {
+      size += FileFactory.getDirectorySize(segmentFileLocation)
+    }
+    metadataDetails.foreach(oneLoad =>
+      if (oneLoad.getVisibility.toBoolean) {
+        size += calculateSegmentSizeForOneLoad(carbonTable, oneLoad, metadataDetails)
+      }
+    )
+    size
+  }
+
+  /**
+   * Method to handle the Clean files dry run operation
+   */
+  def cleanFilesDryRunOperation (
+      carbonTable: CarbonTable,
+      isForceDelete: Boolean,
+      cleanStaleInProgress: Boolean,
+      showStats: Boolean): (Long, Long) = {
+    // get size freed from the trash folder
+    val trashFolderSizeStats = checkAndCleanTrashFolder(carbonTable, isForceDelete,
+        isDryRun = true, showStats)
+    // get size that will be deleted (MFD, COmpacted, Inprogress segments)
+    val expiredSegmentsSizeStats = dryRunOnExpiredSegments(carbonTable, isForceDelete,
+      cleanStaleInProgress)
+    (trashFolderSizeStats._1 + expiredSegmentsSizeStats._1, trashFolderSizeStats._2 +
+        expiredSegmentsSizeStats._2)
+  }
+
+  private def checkAndCleanTrashFolder(carbonTable: CarbonTable, isForceDelete: Boolean,
+      isDryRun: Boolean, showStats: Boolean): (Long, Long) = {
     if (isForceDelete) {
       // empty the trash folder
-      TrashUtil.emptyTrash(carbonTable.getTablePath)
+      val a = TrashUtil.emptyTrash(carbonTable.getTablePath, isDryRun, showStats)

Review comment:
       give a proper variable name here




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[hidden email]