ShreelekhyaG commented on a change in pull request #4104: URL: https://github.com/apache/carbondata/pull/4104#discussion_r597428016 ########## File path: processing/src/main/java/org/apache/carbondata/processing/merger/CompactionResultSortProcessor.java ########## @@ -190,7 +200,7 @@ public boolean execute(List<RawResultIterator> unsortedResultIteratorList, LOGGER.error(e.getLocalizedMessage(), e); throw e; } finally { - if (partitionSpec != null) { Review comment: Instead of passing boolean value, cant we directly get `CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT `from properties? and the if check should be - if merge index not enabled, then write segment file. As below, ``` boolean isMergeIndexEnable = Boolean.parseBoolean(CarbonProperties.getInstance().getProperty( CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT, CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT_DEFAULT)); if (partitionSpec != null && !isMergeIndexEnable) { ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
liuhe0702 commented on a change in pull request #4104: URL: https://github.com/apache/carbondata/pull/4104#discussion_r597462077 ########## File path: integration/spark/src/main/scala/org/apache/carbondata/spark/rdd/CarbonTableCompactor.scala ########## @@ -296,26 +299,46 @@ class CarbonTableCompactor( if (finalMergeStatus) { val mergedLoadNumber = CarbonDataMergerUtil.getLoadNumberFromLoadName(mergedLoadName) - var segmentFilesForIUDCompact = new util.ArrayList[Segment]() var segmentFileName: String = null + + val isMergeIndexEnabled = CarbonProperties.getInstance().getProperty( + CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT, + CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT_DEFAULT).toBoolean + + if (compactionType != CompactionType.IUD_DELETE_DELTA && isMergeIndexEnabled) { + MergeIndexUtil.mergeIndexFilesOnCompaction(compactionCallableModel) + } + if (carbonTable.isHivePartitionTable) { - val readPath = - CarbonTablePath.getSegmentFilesLocation(carbonLoadModel.getTablePath) + - CarbonCommonConstants.FILE_SEPARATOR + carbonLoadModel.getFactTimeStamp + ".tmp" - // Merge all partition files into a single file. - segmentFileName = - mergedLoadNumber + "_" + carbonLoadModel.getFactTimeStamp - val segmentFile = SegmentFileStore - .mergeSegmentFiles(readPath, - segmentFileName, - CarbonTablePath.getSegmentFilesLocation(carbonLoadModel.getTablePath)) - if (segmentFile != null) { - SegmentFileStore - .moveFromTempFolder(segmentFile, - carbonLoadModel.getFactTimeStamp + ".tmp", - carbonLoadModel.getTablePath) + if (isMergeIndexEnabled) { + val segmentTmpFileName = carbonLoadModel.getFactTimeStamp + CarbonTablePath.SEGMENT_EXT + segmentFileName = mergedLoadNumber + "_" + segmentTmpFileName + val segmentTmpFile = FileFactory.getCarbonFile( + CarbonTablePath.getSegmentFilePath(carbonTable.getTablePath, segmentTmpFileName)) + if (!segmentTmpFile.renameForce( + CarbonTablePath.getSegmentFilePath(carbonTable.getTablePath, segmentFileName))) { + throw new Exception(s"Rename segment file from ${segmentTmpFileName} " + + s"to ${segmentFileName} failed.") + } + } else { + val readPath = + CarbonTablePath.getSegmentFilesLocation(carbonLoadModel.getTablePath) + + CarbonCommonConstants.FILE_SEPARATOR + carbonLoadModel.getFactTimeStamp + ".tmp" + // Merge all partition files into a single file. + segmentFileName = + mergedLoadNumber + "_" + carbonLoadModel.getFactTimeStamp + val segmentFile = SegmentFileStore Review comment: done -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
liuhe0702 commented on a change in pull request #4104: URL: https://github.com/apache/carbondata/pull/4104#discussion_r597462247 ########## File path: processing/src/main/java/org/apache/carbondata/processing/merger/CompactionResultSortProcessor.java ########## @@ -149,16 +149,26 @@ private CarbonColumn[] noDicAndComplexColumns; + private boolean needSegmentFile; Review comment: done -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
liuhe0702 commented on a change in pull request #4104: URL: https://github.com/apache/carbondata/pull/4104#discussion_r597382404 ########## File path: integration/spark/src/main/scala/org/apache/carbondata/spark/rdd/CarbonTableCompactor.scala ########## @@ -296,26 +299,50 @@ class CarbonTableCompactor( if (finalMergeStatus) { val mergedLoadNumber = CarbonDataMergerUtil.getLoadNumberFromLoadName(mergedLoadName) - var segmentFilesForIUDCompact = new util.ArrayList[Segment]() var segmentFileName: String = null + + val mergeIndex = CarbonProperties.getInstance().getProperty( + CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT, + CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT_DEFAULT).toBoolean + + if (compactionType != CompactionType.IUD_DELETE_DELTA && mergeIndex) { + MergeIndexUtil.mergeIndexFilesOnCompaction(compactionCallableModel) + } + if (carbonTable.isHivePartitionTable) { - val readPath = - CarbonTablePath.getSegmentFilesLocation(carbonLoadModel.getTablePath) + - CarbonCommonConstants.FILE_SEPARATOR + carbonLoadModel.getFactTimeStamp + ".tmp" - // Merge all partition files into a single file. - segmentFileName = - mergedLoadNumber + "_" + carbonLoadModel.getFactTimeStamp - val segmentFile = SegmentFileStore - .mergeSegmentFiles(readPath, - segmentFileName, - CarbonTablePath.getSegmentFilesLocation(carbonLoadModel.getTablePath)) - if (segmentFile != null) { - SegmentFileStore - .moveFromTempFolder(segmentFile, - carbonLoadModel.getFactTimeStamp + ".tmp", - carbonLoadModel.getTablePath) + if (mergeIndex) { + val segmentTmpFileName = carbonLoadModel.getFactTimeStamp + CarbonTablePath.SEGMENT_EXT + segmentFileName = mergedLoadNumber + "_" + segmentTmpFileName + val segmentTmpFile = FileFactory.getCarbonFile( + CarbonTablePath.getSegmentFilePath(carbonTable.getTablePath, segmentTmpFileName)) + if (!segmentTmpFile.renameForce( + CarbonTablePath.getSegmentFilePath(carbonTable.getTablePath, segmentFileName))) { + throw new Exception(s"Rename segment file from ${segmentTmpFileName} " + + s"to ${segmentFileName} failed.") + } + val tmpPath = Review comment: done -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
liuhe0702 commented on a change in pull request #4104: URL: https://github.com/apache/carbondata/pull/4104#discussion_r597462970 ########## File path: integration/spark/src/main/scala/org/apache/carbondata/spark/rdd/CarbonTableCompactor.scala ########## @@ -296,26 +299,46 @@ class CarbonTableCompactor( if (finalMergeStatus) { val mergedLoadNumber = CarbonDataMergerUtil.getLoadNumberFromLoadName(mergedLoadName) - var segmentFilesForIUDCompact = new util.ArrayList[Segment]() var segmentFileName: String = null + + val isMergeIndexEnabled = CarbonProperties.getInstance().getProperty( + CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT, + CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT_DEFAULT).toBoolean + + if (compactionType != CompactionType.IUD_DELETE_DELTA && isMergeIndexEnabled) { + MergeIndexUtil.mergeIndexFilesOnCompaction(compactionCallableModel) + } + if (carbonTable.isHivePartitionTable) { - val readPath = - CarbonTablePath.getSegmentFilesLocation(carbonLoadModel.getTablePath) + - CarbonCommonConstants.FILE_SEPARATOR + carbonLoadModel.getFactTimeStamp + ".tmp" - // Merge all partition files into a single file. - segmentFileName = - mergedLoadNumber + "_" + carbonLoadModel.getFactTimeStamp - val segmentFile = SegmentFileStore - .mergeSegmentFiles(readPath, - segmentFileName, - CarbonTablePath.getSegmentFilesLocation(carbonLoadModel.getTablePath)) - if (segmentFile != null) { - SegmentFileStore - .moveFromTempFolder(segmentFile, - carbonLoadModel.getFactTimeStamp + ".tmp", - carbonLoadModel.getTablePath) + if (isMergeIndexEnabled) { + val segmentTmpFileName = carbonLoadModel.getFactTimeStamp + CarbonTablePath.SEGMENT_EXT + segmentFileName = mergedLoadNumber + "_" + segmentTmpFileName + val segmentTmpFile = FileFactory.getCarbonFile( + CarbonTablePath.getSegmentFilePath(carbonTable.getTablePath, segmentTmpFileName)) + if (!segmentTmpFile.renameForce( + CarbonTablePath.getSegmentFilePath(carbonTable.getTablePath, segmentFileName))) { + throw new Exception(s"Rename segment file from ${segmentTmpFileName} " + + s"to ${segmentFileName} failed.") + } + } else { + val readPath = + CarbonTablePath.getSegmentFilesLocation(carbonLoadModel.getTablePath) + + CarbonCommonConstants.FILE_SEPARATOR + carbonLoadModel.getFactTimeStamp + ".tmp" + // Merge all partition files into a single file. + segmentFileName = + mergedLoadNumber + "_" + carbonLoadModel.getFactTimeStamp Review comment: done -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
liuhe0702 commented on a change in pull request #4104: URL: https://github.com/apache/carbondata/pull/4104#discussion_r597463702 ########## File path: integration/spark/src/main/scala/org/apache/carbondata/spark/rdd/CarbonTableCompactor.scala ########## @@ -296,26 +299,50 @@ class CarbonTableCompactor( if (finalMergeStatus) { val mergedLoadNumber = CarbonDataMergerUtil.getLoadNumberFromLoadName(mergedLoadName) - var segmentFilesForIUDCompact = new util.ArrayList[Segment]() var segmentFileName: String = null + + val mergeIndex = CarbonProperties.getInstance().getProperty( + CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT, + CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT_DEFAULT).toBoolean + + if (compactionType != CompactionType.IUD_DELETE_DELTA && mergeIndex) { + MergeIndexUtil.mergeIndexFilesOnCompaction(compactionCallableModel) + } + if (carbonTable.isHivePartitionTable) { - val readPath = - CarbonTablePath.getSegmentFilesLocation(carbonLoadModel.getTablePath) + - CarbonCommonConstants.FILE_SEPARATOR + carbonLoadModel.getFactTimeStamp + ".tmp" - // Merge all partition files into a single file. - segmentFileName = - mergedLoadNumber + "_" + carbonLoadModel.getFactTimeStamp - val segmentFile = SegmentFileStore - .mergeSegmentFiles(readPath, - segmentFileName, - CarbonTablePath.getSegmentFilesLocation(carbonLoadModel.getTablePath)) - if (segmentFile != null) { - SegmentFileStore - .moveFromTempFolder(segmentFile, - carbonLoadModel.getFactTimeStamp + ".tmp", - carbonLoadModel.getTablePath) + if (mergeIndex) { + val segmentTmpFileName = carbonLoadModel.getFactTimeStamp + CarbonTablePath.SEGMENT_EXT + segmentFileName = mergedLoadNumber + "_" + segmentTmpFileName + val segmentTmpFile = FileFactory.getCarbonFile( + CarbonTablePath.getSegmentFilePath(carbonTable.getTablePath, segmentTmpFileName)) + if (!segmentTmpFile.renameForce( + CarbonTablePath.getSegmentFilePath(carbonTable.getTablePath, segmentFileName))) { + throw new Exception(s"Rename segment file from ${segmentTmpFileName} " + + s"to ${segmentFileName} failed.") + } + val tmpPath = Review comment: done -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
akashrn5 commented on a change in pull request #4104: URL: https://github.com/apache/carbondata/pull/4104#discussion_r597487929 ########## File path: processing/src/main/java/org/apache/carbondata/processing/merger/CompactionResultSortProcessor.java ########## @@ -190,7 +200,7 @@ public boolean execute(List<RawResultIterator> unsortedResultIteratorList, LOGGER.error(e.getLocalizedMessage(), e); throw e; } finally { - if (partitionSpec != null) { Review comment: yes, @liuhe0702 instead of making parameter changes or adding constructors, check property here only and can avoid changes in MergerRDD -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
liuhe0702 commented on a change in pull request #4104: URL: https://github.com/apache/carbondata/pull/4104#discussion_r597497963 ########## File path: processing/src/main/java/org/apache/carbondata/processing/merger/CompactionResultSortProcessor.java ########## @@ -190,7 +200,7 @@ public boolean execute(List<RawResultIterator> unsortedResultIteratorList, LOGGER.error(e.getLocalizedMessage(), e); throw e; } finally { - if (partitionSpec != null) { Review comment: done -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
CarbonDataQA2 commented on pull request #4104: URL: https://github.com/apache/carbondata/pull/4104#issuecomment-802737738 Build Success with Spark 2.3.4, Please check CI http://121.244.95.60:12444/job/ApacheCarbonPRBuilder2.3/5603/ -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
CarbonDataQA2 commented on pull request #4104: URL: https://github.com/apache/carbondata/pull/4104#issuecomment-802739486 Build Success with Spark 2.4.5, Please check CI http://121.244.95.60:12444/job/ApacheCarbon_PR_Builder_2.4.5/3837/ -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
CarbonDataQA2 commented on pull request #4104: URL: https://github.com/apache/carbondata/pull/4104#issuecomment-803623956 Build Failed with Spark 2.3.4, Please check CI http://121.244.95.60:12545/job/ApacheCarbonPRBuilder2.3/5059/ -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
CarbonDataQA2 commented on pull request #4104: URL: https://github.com/apache/carbondata/pull/4104#issuecomment-803625714 Build Failed with Spark 2.4.5, Please check CI http://121.244.95.60:12602/job/ApacheCarbon_PR_Builder_2.4.5/3307/ -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
liuhe0702 commented on pull request #4104: URL: https://github.com/apache/carbondata/pull/4104#issuecomment-803696803 retest this please -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
CarbonDataQA2 commented on pull request #4104: URL: https://github.com/apache/carbondata/pull/4104#issuecomment-803720072 Build Failed with Spark 2.3.4, Please check CI http://121.244.95.60:12602/job/ApacheCarbonPRBuilder2.3/5069/ -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
CarbonDataQA2 commented on pull request #4104: URL: https://github.com/apache/carbondata/pull/4104#issuecomment-803722043 Build Success with Spark 2.4.5, Please check CI http://121.244.95.60:12602/job/ApacheCarbon_PR_Builder_2.4.5/3316/ -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
liuhe0702 commented on pull request #4104: URL: https://github.com/apache/carbondata/pull/4104#issuecomment-803737267 retest this please -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
CarbonDataQA2 commented on pull request #4104: URL: https://github.com/apache/carbondata/pull/4104#issuecomment-803763582 Build Success with Spark 2.3.4, Please check CI http://121.244.95.60:12602/job/ApacheCarbonPRBuilder2.3/5072/ -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
akashrn5 commented on a change in pull request #4104: URL: https://github.com/apache/carbondata/pull/4104#discussion_r598429980 ########## File path: processing/src/main/java/org/apache/carbondata/processing/merger/CompactionResultSortProcessor.java ########## @@ -190,7 +190,12 @@ public boolean execute(List<RawResultIterator> unsortedResultIteratorList, LOGGER.error(e.getLocalizedMessage(), e); throw e; } finally { - if (partitionSpec != null) { + boolean isMergeIndex = Boolean.parseBoolean(CarbonProperties.getInstance().getProperty( + CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT, + CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT_DEFAULT)); + // mergeIndex is true, the segment file not need to be wrotten + // and will be wrotten during merging index + if (partitionSpec != null && !isMergeIndex) { Review comment: `wrotten` to `written` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
CarbonDataQA2 commented on pull request #4104: URL: https://github.com/apache/carbondata/pull/4104#issuecomment-803768137 Build Success with Spark 2.4.5, Please check CI http://121.244.95.60:12602/job/ApacheCarbon_PR_Builder_2.4.5/3319/ -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
In reply to this post by GitBox
liuhe0702 commented on a change in pull request #4104: URL: https://github.com/apache/carbondata/pull/4104#discussion_r598454997 ########## File path: processing/src/main/java/org/apache/carbondata/processing/merger/CompactionResultSortProcessor.java ########## @@ -190,7 +190,12 @@ public boolean execute(List<RawResultIterator> unsortedResultIteratorList, LOGGER.error(e.getLocalizedMessage(), e); throw e; } finally { - if (partitionSpec != null) { + boolean isMergeIndex = Boolean.parseBoolean(CarbonProperties.getInstance().getProperty( + CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT, + CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT_DEFAULT)); + // mergeIndex is true, the segment file not need to be wrotten + // and will be wrotten during merging index + if (partitionSpec != null && !isMergeIndex) { Review comment: done -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
Free forum by Nabble | Edit this page |