GitHub user manishgupta88 opened a pull request:
https://github.com/apache/carbondata/pull/1782 [WIP] Changes for creating carbon index merge file from old store which did not contain the blocklet info in index information Changes for creating carbon index merge file from old store which did not contain the blocklet info in index information - [ ] Any interfaces changed? - [ ] Any backward compatibility impacted? - [ ] Document update required? - [ ] Testing done - [ ] For large changes, please consider breaking it into sub-tasks under an umbrella JIRA. You can merge this pull request into a Git repository by running: $ git pull https://github.com/manishgupta88/carbondata enhace_segment_index_compaction Alternatively you can review and apply these changes as the patch at: https://github.com/apache/carbondata/pull/1782.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #1782 ---- commit 5f4a75f7395c8949a5ccf53030ebdcba5411022f Author: manishgupta88 <tomanishgupta18@...> Date: 2018-01-09T15:02:36Z changes for creating carbon index merge file from old store which did not contain the blocklet info in index information ---- --- |
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/1782 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/2665/ --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on the issue:
https://github.com/apache/carbondata/pull/1782 SDV Build Fail , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/2806/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/1782 Build Success with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/1429/ --- |
In reply to this post by qiuchenjian-2
Github user manishgupta88 commented on the issue:
https://github.com/apache/carbondata/pull/1782 retest sdv please --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on the issue:
https://github.com/apache/carbondata/pull/1782 SDV Build Fail , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/2807/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/1782 Build Success with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/1449/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/1782 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/2683/ --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on the issue:
https://github.com/apache/carbondata/pull/1782 SDV Build Success , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/2820/ --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1782#discussion_r160908770 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonAlterTableCompactionCommand.scala --- @@ -220,11 +220,14 @@ case class CarbonAlterTableCompactionCommand( try { if (compactionType == CompactionType.SEGMENT_INDEX) { // Just launch job to merge index and return - CommonUtil.mergeIndexFiles(sqlContext.sparkContext, + CommonUtil.mergeIndexFiles( + sqlContext.sparkContext, CarbonDataMergerUtil.getValidSegmentList( carbonTable.getAbsoluteTableIdentifier).asScala, carbonLoadModel.getTablePath, - carbonTable, true) + carbonTable, + true, + true) --- End diff -- provide name to parameters --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1782#discussion_r160909240 --- Diff: integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/CommonUtil.scala --- @@ -893,33 +893,53 @@ object CommonUtil { /** * Merge the carbonindex files with in the segment to carbonindexmerge file inside same segment + * + * @param sparkContext + * @param segmentIds + * @param tablePath + * @param carbonTable + * @param mergeIndexProperty + * @param readFileFooterFromCarbonDataFile flag to read file footer information from carbondata + * file. This will used in case of upgrade from version + * which do not store the blocklet info to current version */ def mergeIndexFiles(sparkContext: SparkContext, segmentIds: Seq[String], tablePath: String, carbonTable: CarbonTable, - mergeIndexProperty: Boolean): Unit = { + mergeIndexProperty: Boolean, + readFileFooterFromCarbonDataFile: Boolean = false): Unit = { if (mergeIndexProperty) { - new CarbonMergeFilesRDD(sparkContext, AbsoluteTableIdentifier.from(tablePath, - carbonTable.getDatabaseName, carbonTable.getTableName).getTablePath, - segmentIds).collect() + new CarbonMergeFilesRDD( + sparkContext, + AbsoluteTableIdentifier + .from(tablePath, carbonTable.getDatabaseName, carbonTable.getTableName).getTablePath, + segmentIds, + readFileFooterFromCarbonDataFile).collect() --- End diff -- Use like this ``` new CarbonMergeFilesRDD( sparkContext, AbsoluteTableIdentifier.from( tablePath, carbonTable.getDatabaseName, carbonTable.getTableName).getTablePath, segmentIds, readFileFooterFromCarbonDataFile).collect() ``` --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1782#discussion_r160909350 --- Diff: integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/CommonUtil.scala --- @@ -893,33 +893,53 @@ object CommonUtil { /** * Merge the carbonindex files with in the segment to carbonindexmerge file inside same segment + * + * @param sparkContext + * @param segmentIds + * @param tablePath + * @param carbonTable + * @param mergeIndexProperty + * @param readFileFooterFromCarbonDataFile flag to read file footer information from carbondata + * file. This will used in case of upgrade from version + * which do not store the blocklet info to current version */ def mergeIndexFiles(sparkContext: SparkContext, segmentIds: Seq[String], tablePath: String, carbonTable: CarbonTable, - mergeIndexProperty: Boolean): Unit = { + mergeIndexProperty: Boolean, + readFileFooterFromCarbonDataFile: Boolean = false): Unit = { if (mergeIndexProperty) { - new CarbonMergeFilesRDD(sparkContext, AbsoluteTableIdentifier.from(tablePath, - carbonTable.getDatabaseName, carbonTable.getTableName).getTablePath, - segmentIds).collect() + new CarbonMergeFilesRDD( + sparkContext, + AbsoluteTableIdentifier + .from(tablePath, carbonTable.getDatabaseName, carbonTable.getTableName).getTablePath, + segmentIds, + readFileFooterFromCarbonDataFile).collect() } else { try { CarbonProperties.getInstance() .getProperty(CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT).toBoolean if (CarbonProperties.getInstance().getProperty( CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT, CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT_DEFAULT).toBoolean) { - new CarbonMergeFilesRDD(sparkContext, AbsoluteTableIdentifier.from(tablePath, - carbonTable.getDatabaseName, carbonTable.getTableName).getTablePath, - segmentIds).collect() + new CarbonMergeFilesRDD( + sparkContext, + AbsoluteTableIdentifier + .from(tablePath, carbonTable.getDatabaseName, carbonTable.getTableName).getTablePath, --- End diff -- Please use like this ``` new CarbonMergeFilesRDD( sparkContext, AbsoluteTableIdentifier.from( tablePath, carbonTable.getDatabaseName, carbonTable.getTableName).getTablePath, segmentIds, readFileFooterFromCarbonDataFile).collect() ``` --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1782#discussion_r160909442 --- Diff: integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/CommonUtil.scala --- @@ -893,33 +893,53 @@ object CommonUtil { /** * Merge the carbonindex files with in the segment to carbonindexmerge file inside same segment + * + * @param sparkContext + * @param segmentIds + * @param tablePath + * @param carbonTable + * @param mergeIndexProperty + * @param readFileFooterFromCarbonDataFile flag to read file footer information from carbondata + * file. This will used in case of upgrade from version + * which do not store the blocklet info to current version */ def mergeIndexFiles(sparkContext: SparkContext, segmentIds: Seq[String], tablePath: String, carbonTable: CarbonTable, - mergeIndexProperty: Boolean): Unit = { + mergeIndexProperty: Boolean, + readFileFooterFromCarbonDataFile: Boolean = false): Unit = { if (mergeIndexProperty) { - new CarbonMergeFilesRDD(sparkContext, AbsoluteTableIdentifier.from(tablePath, - carbonTable.getDatabaseName, carbonTable.getTableName).getTablePath, - segmentIds).collect() + new CarbonMergeFilesRDD( + sparkContext, + AbsoluteTableIdentifier + .from(tablePath, carbonTable.getDatabaseName, carbonTable.getTableName).getTablePath, + segmentIds, + readFileFooterFromCarbonDataFile).collect() } else { try { CarbonProperties.getInstance() .getProperty(CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT).toBoolean if (CarbonProperties.getInstance().getProperty( CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT, CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT_DEFAULT).toBoolean) { - new CarbonMergeFilesRDD(sparkContext, AbsoluteTableIdentifier.from(tablePath, - carbonTable.getDatabaseName, carbonTable.getTableName).getTablePath, - segmentIds).collect() + new CarbonMergeFilesRDD( + sparkContext, + AbsoluteTableIdentifier + .from(tablePath, carbonTable.getDatabaseName, carbonTable.getTableName).getTablePath, --- End diff -- change also at line 936 --- |
In reply to this post by qiuchenjian-2
Github user manishgupta88 commented on the issue:
https://github.com/apache/carbondata/pull/1782 @ravipesala ..handled review comments...kindly review and merge --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/1782 Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/2716/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/1782 Build Failed with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/1483/ --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on the issue:
https://github.com/apache/carbondata/pull/1782 SDV Build Success , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/2843/ --- |
In reply to this post by qiuchenjian-2
Github user mohammadshahidkhan commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1782#discussion_r161366867 --- Diff: integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/CommonUtil.scala --- @@ -893,33 +893,58 @@ object CommonUtil { /** * Merge the carbonindex files with in the segment to carbonindexmerge file inside same segment + * + * @param sparkContext + * @param segmentIds + * @param tablePath + * @param carbonTable + * @param mergeIndexProperty + * @param readFileFooterFromCarbonDataFile flag to read file footer information from carbondata + * file. This will used in case of upgrade from version + * which do not store the blocklet info to current version */ def mergeIndexFiles(sparkContext: SparkContext, segmentIds: Seq[String], tablePath: String, carbonTable: CarbonTable, - mergeIndexProperty: Boolean): Unit = { + mergeIndexProperty: Boolean, + readFileFooterFromCarbonDataFile: Boolean = false): Unit = { if (mergeIndexProperty) { - new CarbonMergeFilesRDD(sparkContext, AbsoluteTableIdentifier.from(tablePath, - carbonTable.getDatabaseName, carbonTable.getTableName).getTablePath, - segmentIds).collect() + new CarbonMergeFilesRDD( + sparkContext, + AbsoluteTableIdentifier.from( + tablePath, + carbonTable.getDatabaseName, + carbonTable.getTableName).getTablePath, --- End diff -- if already have tablePath then why to get tablePath from AbsoluteTableIdentifier.from( tablePath, carbonTable.getDatabaseName, carbonTable.getTableName).getTablePath ? This is unnecessary forming objects so please simply use the tablePath. --- |
In reply to this post by qiuchenjian-2
Github user mohammadshahidkhan commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1782#discussion_r161367075 --- Diff: core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/SegmentIndexFileStore.java --- @@ -185,4 +219,98 @@ private MergedBlockIndex readMergeBlockIndex(ThriftReader thriftReader) throws I public Map<String, byte[]> getCarbonIndexMap() { return carbonIndexMap; } + + /** + * This method will read the index information from carbon index file + * + * @param indexFile + * @return + * @throws IOException + */ + private void readIndexAndFillBlockletInfo(CarbonFile indexFile) throws IOException { + // flag to take decision whether carbondata file footer reading is required. + // If the index file does not contain the file footer then carbondata file footer + // read is required else not required + boolean isCarbonDataFileFooterReadRequired = true; + List<BlockletInfo> blockletInfoList = null; + List<BlockIndex> blockIndexThrift = + new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE); + CarbonIndexFileReader indexReader = new CarbonIndexFileReader(); + try { + indexReader.openThriftReader(indexFile.getCanonicalPath()); + // get the index header + org.apache.carbondata.format.IndexHeader indexHeader = indexReader.readIndexHeader(); + DataFileFooterConverter fileFooterConverter = new DataFileFooterConverter(); + String filePath = indexFile.getCanonicalPath(); + String parentPath = filePath.substring(0, filePath.lastIndexOf(File.separator)); --- End diff -- I think better to CarbonCommonConstants.FILE_SEPARATOR instead of File.separator. --- |
In reply to this post by qiuchenjian-2
Github user mohammadshahidkhan commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1782#discussion_r161366900 --- Diff: integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/CommonUtil.scala --- @@ -893,33 +893,58 @@ object CommonUtil { /** * Merge the carbonindex files with in the segment to carbonindexmerge file inside same segment + * + * @param sparkContext + * @param segmentIds + * @param tablePath + * @param carbonTable + * @param mergeIndexProperty + * @param readFileFooterFromCarbonDataFile flag to read file footer information from carbondata + * file. This will used in case of upgrade from version + * which do not store the blocklet info to current version */ def mergeIndexFiles(sparkContext: SparkContext, segmentIds: Seq[String], tablePath: String, carbonTable: CarbonTable, - mergeIndexProperty: Boolean): Unit = { + mergeIndexProperty: Boolean, + readFileFooterFromCarbonDataFile: Boolean = false): Unit = { if (mergeIndexProperty) { - new CarbonMergeFilesRDD(sparkContext, AbsoluteTableIdentifier.from(tablePath, - carbonTable.getDatabaseName, carbonTable.getTableName).getTablePath, - segmentIds).collect() + new CarbonMergeFilesRDD( + sparkContext, + AbsoluteTableIdentifier.from( + tablePath, + carbonTable.getDatabaseName, + carbonTable.getTableName).getTablePath, + segmentIds, + readFileFooterFromCarbonDataFile).collect() } else { try { CarbonProperties.getInstance() .getProperty(CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT).toBoolean if (CarbonProperties.getInstance().getProperty( CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT, CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT_DEFAULT).toBoolean) { - new CarbonMergeFilesRDD(sparkContext, AbsoluteTableIdentifier.from(tablePath, - carbonTable.getDatabaseName, carbonTable.getTableName).getTablePath, - segmentIds).collect() + new CarbonMergeFilesRDD( + sparkContext, + AbsoluteTableIdentifier.from( --- End diff -- Same here also, please take care the same in other places also in this method. --- |
Free forum by Nabble | Edit this page |