Github user ravipesala commented on the issue:
https://github.com/apache/carbondata/pull/2482 SDV Build Fail , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/5867/ --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on the issue:
https://github.com/apache/carbondata/pull/2482 LGTM --- |
In reply to this post by qiuchenjian-2
|
In reply to this post by qiuchenjian-2
Github user ravipesala commented on the issue:
https://github.com/apache/carbondata/pull/2482 SDV Build Success , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/5868/ --- |
In reply to this post by qiuchenjian-2
Github user xubo245 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2482#discussion_r237739793 --- Diff: integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datacompaction/CarbonIndexFileMergeTestCase.scala --- @@ -215,43 +249,215 @@ class CarbonIndexFileMergeTestCase Assert .assertEquals(getIndexOrMergeIndexFileSize(table, "0", CarbonTablePath.INDEX_FILE_EXT), segment0.head.getIndexSize.toLong) - new CarbonIndexFileMergeWriter(table) - .mergeCarbonIndexFilesOfSegment("0", table.getTablePath, false, String.valueOf(System.currentTimeMillis())) + sql("Alter table fileSize compact 'segment_index'") loadMetadataDetails = SegmentStatusManager .readTableStatusFile(CarbonTablePath.getTableStatusFilePath(table.getTablePath)) segment0 = loadMetadataDetails.filter(x=> x.getLoadName.equalsIgnoreCase("0")) Assert .assertEquals(getIndexOrMergeIndexFileSize(table, "0", CarbonTablePath.MERGE_INDEX_FILE_EXT), segment0.head.getIndexSize.toLong) + CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT, "true") sql("DROP TABLE IF EXISTS fileSize") } - private def getIndexFileCount(tableName: String, segmentNo: String): Int = { - val carbonTable = CarbonMetadata.getInstance().getCarbonTable(tableName) - val segmentDir = CarbonTablePath.getSegmentPath(carbonTable.getTablePath, segmentNo) - if (FileFactory.isFileExist(segmentDir)) { - val indexFiles = new SegmentIndexFileStore().getIndexFilesFromSegment(segmentDir) - indexFiles.asScala.map { f => - if (f._2 == null) { - 1 - } else { - 0 - } - }.sum - } else { - val segment = Segment.getSegment(segmentNo, carbonTable.getTablePath) - if (segment != null) { - val store = new SegmentFileStore(carbonTable.getTablePath, segment.getSegmentFileName) - store.getSegmentFile.getLocationMap.values().asScala.map { f => - if (f.getMergeFileName == null) { - f.getFiles.size() - } else { - 0 - } - }.sum - } else { - 0 + test("Verify index merge for compacted segments MINOR - level 2") { + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.COMPACTION_SEGMENT_LEVEL_THRESHOLD, "2,2") + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT, "false") + sql("DROP TABLE IF EXISTS nonindexmerge") + sql( + """ + | CREATE TABLE nonindexmerge(id INT, name STRING, city STRING, age INT) + | STORED BY 'org.apache.carbondata.format' + | TBLPROPERTIES('SORT_COLUMNS'='city,name', 'SORT_SCOPE'='GLOBAL_SORT') + """.stripMargin) + sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge OPTIONS('header'='false', " + + s"'GLOBAL_SORT_PARTITIONS'='100')") + sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge OPTIONS('header'='false', " + + s"'GLOBAL_SORT_PARTITIONS'='100')") + sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge OPTIONS('header'='false', " + + s"'GLOBAL_SORT_PARTITIONS'='100')") + sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge OPTIONS('header'='false', " + + s"'GLOBAL_SORT_PARTITIONS'='100')") + val rows = sql("""Select count(*) from nonindexmerge""").collect() + assert(getIndexFileCount("default_nonindexmerge", "0") == 100) + assert(getIndexFileCount("default_nonindexmerge", "1") == 100) + assert(getIndexFileCount("default_nonindexmerge", "2") == 100) + assert(getIndexFileCount("default_nonindexmerge", "3") == 100) + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT, "true") + sql("ALTER TABLE nonindexmerge COMPACT 'minor'").collect() + assert(getIndexFileCount("default_nonindexmerge", "0") == 100) + assert(getIndexFileCount("default_nonindexmerge", "1") == 100) + assert(getIndexFileCount("default_nonindexmerge", "2") == 100) + assert(getIndexFileCount("default_nonindexmerge", "3") == 100) + assert(getIndexFileCount("default_nonindexmerge", "0.1") == 100) + assert(getIndexFileCount("default_nonindexmerge", "2.1") == 100) + assert(getIndexFileCount("default_nonindexmerge", "0.2") == 0) + checkAnswer(sql("""Select count(*) from nonindexmerge"""), rows) + } + + test("Verify index merge for compacted segments Auto Compaction") { + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.COMPACTION_SEGMENT_LEVEL_THRESHOLD, "2,3") + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT, "false") + sql("DROP TABLE IF EXISTS nonindexmerge") + sql( + """ + | CREATE TABLE nonindexmerge(id INT, name STRING, city STRING, age INT) + | STORED BY 'org.apache.carbondata.format' + | TBLPROPERTIES('SORT_COLUMNS'='city,name', 'SORT_SCOPE'='GLOBAL_SORT') + """.stripMargin) + sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge OPTIONS('header'='false', " + + s"'GLOBAL_SORT_PARTITIONS'='100')") + sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge OPTIONS('header'='false', " + + s"'GLOBAL_SORT_PARTITIONS'='100')") + sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge OPTIONS('header'='false', " + + s"'GLOBAL_SORT_PARTITIONS'='100')") + sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge OPTIONS('header'='false', " + + s"'GLOBAL_SORT_PARTITIONS'='100')") + val rows = sql("""Select count(*) from nonindexmerge""").collect() + assert(getIndexFileCount("default_nonindexmerge", "0") == 100) + assert(getIndexFileCount("default_nonindexmerge", "1") == 100) + assert(getIndexFileCount("default_nonindexmerge", "2") == 100) + assert(getIndexFileCount("default_nonindexmerge", "3") == 100) + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT, "true") + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.DEFAULT_ENABLE_AUTO_LOAD_MERGE, "true") + sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge OPTIONS('header'='false', " + + s"'GLOBAL_SORT_PARTITIONS'='100')" + ) + assert(getIndexFileCount("default_nonindexmerge", "0") == 100) + assert(getIndexFileCount("default_nonindexmerge", "1") == 100) + assert(getIndexFileCount("default_nonindexmerge", "2") == 100) + assert(getIndexFileCount("default_nonindexmerge", "3") == 100) + assert(getIndexFileCount("default_nonindexmerge", "4") == 0) + assert(getIndexFileCount("default_nonindexmerge", "0.1") == 0) + assert(getIndexFileCount("default_nonindexmerge", "2.1") == 0) + checkAnswer(sql("""Select count(*) from nonindexmerge"""), Seq(Row(3000000))) + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.DEFAULT_ENABLE_AUTO_LOAD_MERGE, "false") + } + + test("Verify index merge for compacted segments Auto Compaction - level 2") { + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.COMPACTION_SEGMENT_LEVEL_THRESHOLD, "2,2") + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT, "false") + sql("DROP TABLE IF EXISTS nonindexmerge") + sql( + """ + | CREATE TABLE nonindexmerge(id INT, name STRING, city STRING, age INT) + | STORED BY 'org.apache.carbondata.format' + | TBLPROPERTIES('SORT_COLUMNS'='city,name', 'SORT_SCOPE'='GLOBAL_SORT') + """.stripMargin) + sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge OPTIONS('header'='false', " + + s"'GLOBAL_SORT_PARTITIONS'='100')") + sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge OPTIONS('header'='false', " + + s"'GLOBAL_SORT_PARTITIONS'='100')") + sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge OPTIONS('header'='false', " + + s"'GLOBAL_SORT_PARTITIONS'='100')") + sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge OPTIONS('header'='false', " + + s"'GLOBAL_SORT_PARTITIONS'='100')") + val rows = sql("""Select count(*) from nonindexmerge""").collect() + assert(getIndexFileCount("default_nonindexmerge", "0") == 100) + assert(getIndexFileCount("default_nonindexmerge", "1") == 100) + assert(getIndexFileCount("default_nonindexmerge", "2") == 100) + assert(getIndexFileCount("default_nonindexmerge", "3") == 100) + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT, "true") + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.DEFAULT_ENABLE_AUTO_LOAD_MERGE, "true") --- End diff -- Why the key is CarbonCommonConstants.DEFAULT_ENABLE_AUTO_LOAD_MERGE? --- |
In reply to this post by qiuchenjian-2
Github user xubo245 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2482#discussion_r237740112 --- Diff: integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datacompaction/CarbonIndexFileMergeTestCase.scala --- @@ -215,43 +249,215 @@ class CarbonIndexFileMergeTestCase Assert .assertEquals(getIndexOrMergeIndexFileSize(table, "0", CarbonTablePath.INDEX_FILE_EXT), segment0.head.getIndexSize.toLong) - new CarbonIndexFileMergeWriter(table) - .mergeCarbonIndexFilesOfSegment("0", table.getTablePath, false, String.valueOf(System.currentTimeMillis())) + sql("Alter table fileSize compact 'segment_index'") loadMetadataDetails = SegmentStatusManager .readTableStatusFile(CarbonTablePath.getTableStatusFilePath(table.getTablePath)) segment0 = loadMetadataDetails.filter(x=> x.getLoadName.equalsIgnoreCase("0")) Assert .assertEquals(getIndexOrMergeIndexFileSize(table, "0", CarbonTablePath.MERGE_INDEX_FILE_EXT), segment0.head.getIndexSize.toLong) + CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT, "true") sql("DROP TABLE IF EXISTS fileSize") } - private def getIndexFileCount(tableName: String, segmentNo: String): Int = { - val carbonTable = CarbonMetadata.getInstance().getCarbonTable(tableName) - val segmentDir = CarbonTablePath.getSegmentPath(carbonTable.getTablePath, segmentNo) - if (FileFactory.isFileExist(segmentDir)) { - val indexFiles = new SegmentIndexFileStore().getIndexFilesFromSegment(segmentDir) - indexFiles.asScala.map { f => - if (f._2 == null) { - 1 - } else { - 0 - } - }.sum - } else { - val segment = Segment.getSegment(segmentNo, carbonTable.getTablePath) - if (segment != null) { - val store = new SegmentFileStore(carbonTable.getTablePath, segment.getSegmentFileName) - store.getSegmentFile.getLocationMap.values().asScala.map { f => - if (f.getMergeFileName == null) { - f.getFiles.size() - } else { - 0 - } - }.sum - } else { - 0 + test("Verify index merge for compacted segments MINOR - level 2") { + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.COMPACTION_SEGMENT_LEVEL_THRESHOLD, "2,2") + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT, "false") + sql("DROP TABLE IF EXISTS nonindexmerge") + sql( + """ + | CREATE TABLE nonindexmerge(id INT, name STRING, city STRING, age INT) + | STORED BY 'org.apache.carbondata.format' + | TBLPROPERTIES('SORT_COLUMNS'='city,name', 'SORT_SCOPE'='GLOBAL_SORT') + """.stripMargin) + sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge OPTIONS('header'='false', " + + s"'GLOBAL_SORT_PARTITIONS'='100')") + sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge OPTIONS('header'='false', " + + s"'GLOBAL_SORT_PARTITIONS'='100')") + sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge OPTIONS('header'='false', " + + s"'GLOBAL_SORT_PARTITIONS'='100')") + sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge OPTIONS('header'='false', " + + s"'GLOBAL_SORT_PARTITIONS'='100')") + val rows = sql("""Select count(*) from nonindexmerge""").collect() + assert(getIndexFileCount("default_nonindexmerge", "0") == 100) + assert(getIndexFileCount("default_nonindexmerge", "1") == 100) + assert(getIndexFileCount("default_nonindexmerge", "2") == 100) + assert(getIndexFileCount("default_nonindexmerge", "3") == 100) + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT, "true") + sql("ALTER TABLE nonindexmerge COMPACT 'minor'").collect() + assert(getIndexFileCount("default_nonindexmerge", "0") == 100) + assert(getIndexFileCount("default_nonindexmerge", "1") == 100) + assert(getIndexFileCount("default_nonindexmerge", "2") == 100) + assert(getIndexFileCount("default_nonindexmerge", "3") == 100) + assert(getIndexFileCount("default_nonindexmerge", "0.1") == 100) + assert(getIndexFileCount("default_nonindexmerge", "2.1") == 100) + assert(getIndexFileCount("default_nonindexmerge", "0.2") == 0) + checkAnswer(sql("""Select count(*) from nonindexmerge"""), rows) + } + + test("Verify index merge for compacted segments Auto Compaction") { + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.COMPACTION_SEGMENT_LEVEL_THRESHOLD, "2,3") + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT, "false") + sql("DROP TABLE IF EXISTS nonindexmerge") + sql( + """ + | CREATE TABLE nonindexmerge(id INT, name STRING, city STRING, age INT) + | STORED BY 'org.apache.carbondata.format' + | TBLPROPERTIES('SORT_COLUMNS'='city,name', 'SORT_SCOPE'='GLOBAL_SORT') + """.stripMargin) + sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge OPTIONS('header'='false', " + + s"'GLOBAL_SORT_PARTITIONS'='100')") + sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge OPTIONS('header'='false', " + + s"'GLOBAL_SORT_PARTITIONS'='100')") + sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge OPTIONS('header'='false', " + + s"'GLOBAL_SORT_PARTITIONS'='100')") + sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge OPTIONS('header'='false', " + + s"'GLOBAL_SORT_PARTITIONS'='100')") + val rows = sql("""Select count(*) from nonindexmerge""").collect() + assert(getIndexFileCount("default_nonindexmerge", "0") == 100) + assert(getIndexFileCount("default_nonindexmerge", "1") == 100) + assert(getIndexFileCount("default_nonindexmerge", "2") == 100) + assert(getIndexFileCount("default_nonindexmerge", "3") == 100) + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT, "true") + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.DEFAULT_ENABLE_AUTO_LOAD_MERGE, "true") --- End diff -- Why the key is CarbonCommonConstants.DEFAULT_ENABLE_AUTO_LOAD_MERGE?DEFAULT_ENABLE_AUTO_LOAD_MERGE should be value --- |
In reply to this post by qiuchenjian-2
Github user xubo245 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2482#discussion_r237740134 --- Diff: integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datacompaction/CarbonIndexFileMergeTestCase.scala --- @@ -215,43 +249,215 @@ class CarbonIndexFileMergeTestCase Assert .assertEquals(getIndexOrMergeIndexFileSize(table, "0", CarbonTablePath.INDEX_FILE_EXT), segment0.head.getIndexSize.toLong) - new CarbonIndexFileMergeWriter(table) - .mergeCarbonIndexFilesOfSegment("0", table.getTablePath, false, String.valueOf(System.currentTimeMillis())) + sql("Alter table fileSize compact 'segment_index'") loadMetadataDetails = SegmentStatusManager .readTableStatusFile(CarbonTablePath.getTableStatusFilePath(table.getTablePath)) segment0 = loadMetadataDetails.filter(x=> x.getLoadName.equalsIgnoreCase("0")) Assert .assertEquals(getIndexOrMergeIndexFileSize(table, "0", CarbonTablePath.MERGE_INDEX_FILE_EXT), segment0.head.getIndexSize.toLong) + CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT, "true") sql("DROP TABLE IF EXISTS fileSize") } - private def getIndexFileCount(tableName: String, segmentNo: String): Int = { - val carbonTable = CarbonMetadata.getInstance().getCarbonTable(tableName) - val segmentDir = CarbonTablePath.getSegmentPath(carbonTable.getTablePath, segmentNo) - if (FileFactory.isFileExist(segmentDir)) { - val indexFiles = new SegmentIndexFileStore().getIndexFilesFromSegment(segmentDir) - indexFiles.asScala.map { f => - if (f._2 == null) { - 1 - } else { - 0 - } - }.sum - } else { - val segment = Segment.getSegment(segmentNo, carbonTable.getTablePath) - if (segment != null) { - val store = new SegmentFileStore(carbonTable.getTablePath, segment.getSegmentFileName) - store.getSegmentFile.getLocationMap.values().asScala.map { f => - if (f.getMergeFileName == null) { - f.getFiles.size() - } else { - 0 - } - }.sum - } else { - 0 + test("Verify index merge for compacted segments MINOR - level 2") { + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.COMPACTION_SEGMENT_LEVEL_THRESHOLD, "2,2") + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT, "false") + sql("DROP TABLE IF EXISTS nonindexmerge") + sql( + """ + | CREATE TABLE nonindexmerge(id INT, name STRING, city STRING, age INT) + | STORED BY 'org.apache.carbondata.format' + | TBLPROPERTIES('SORT_COLUMNS'='city,name', 'SORT_SCOPE'='GLOBAL_SORT') + """.stripMargin) + sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge OPTIONS('header'='false', " + + s"'GLOBAL_SORT_PARTITIONS'='100')") + sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge OPTIONS('header'='false', " + + s"'GLOBAL_SORT_PARTITIONS'='100')") + sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge OPTIONS('header'='false', " + + s"'GLOBAL_SORT_PARTITIONS'='100')") + sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge OPTIONS('header'='false', " + + s"'GLOBAL_SORT_PARTITIONS'='100')") + val rows = sql("""Select count(*) from nonindexmerge""").collect() + assert(getIndexFileCount("default_nonindexmerge", "0") == 100) + assert(getIndexFileCount("default_nonindexmerge", "1") == 100) + assert(getIndexFileCount("default_nonindexmerge", "2") == 100) + assert(getIndexFileCount("default_nonindexmerge", "3") == 100) + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT, "true") + sql("ALTER TABLE nonindexmerge COMPACT 'minor'").collect() + assert(getIndexFileCount("default_nonindexmerge", "0") == 100) + assert(getIndexFileCount("default_nonindexmerge", "1") == 100) + assert(getIndexFileCount("default_nonindexmerge", "2") == 100) + assert(getIndexFileCount("default_nonindexmerge", "3") == 100) + assert(getIndexFileCount("default_nonindexmerge", "0.1") == 100) + assert(getIndexFileCount("default_nonindexmerge", "2.1") == 100) + assert(getIndexFileCount("default_nonindexmerge", "0.2") == 0) + checkAnswer(sql("""Select count(*) from nonindexmerge"""), rows) + } + + test("Verify index merge for compacted segments Auto Compaction") { + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.COMPACTION_SEGMENT_LEVEL_THRESHOLD, "2,3") + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT, "false") + sql("DROP TABLE IF EXISTS nonindexmerge") + sql( + """ + | CREATE TABLE nonindexmerge(id INT, name STRING, city STRING, age INT) + | STORED BY 'org.apache.carbondata.format' + | TBLPROPERTIES('SORT_COLUMNS'='city,name', 'SORT_SCOPE'='GLOBAL_SORT') + """.stripMargin) + sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge OPTIONS('header'='false', " + + s"'GLOBAL_SORT_PARTITIONS'='100')") + sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge OPTIONS('header'='false', " + + s"'GLOBAL_SORT_PARTITIONS'='100')") + sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge OPTIONS('header'='false', " + + s"'GLOBAL_SORT_PARTITIONS'='100')") + sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge OPTIONS('header'='false', " + + s"'GLOBAL_SORT_PARTITIONS'='100')") + val rows = sql("""Select count(*) from nonindexmerge""").collect() + assert(getIndexFileCount("default_nonindexmerge", "0") == 100) + assert(getIndexFileCount("default_nonindexmerge", "1") == 100) + assert(getIndexFileCount("default_nonindexmerge", "2") == 100) + assert(getIndexFileCount("default_nonindexmerge", "3") == 100) + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT, "true") + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.DEFAULT_ENABLE_AUTO_LOAD_MERGE, "true") + sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge OPTIONS('header'='false', " + + s"'GLOBAL_SORT_PARTITIONS'='100')" + ) + assert(getIndexFileCount("default_nonindexmerge", "0") == 100) + assert(getIndexFileCount("default_nonindexmerge", "1") == 100) + assert(getIndexFileCount("default_nonindexmerge", "2") == 100) + assert(getIndexFileCount("default_nonindexmerge", "3") == 100) + assert(getIndexFileCount("default_nonindexmerge", "4") == 0) + assert(getIndexFileCount("default_nonindexmerge", "0.1") == 0) + assert(getIndexFileCount("default_nonindexmerge", "2.1") == 0) + checkAnswer(sql("""Select count(*) from nonindexmerge"""), Seq(Row(3000000))) + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.DEFAULT_ENABLE_AUTO_LOAD_MERGE, "false") --- End diff -- Why the key is CarbonCommonConstants.DEFAULT_ENABLE_AUTO_LOAD_MERGE?DEFAULT_ENABLE_AUTO_LOAD_MERGE should be value --- |
In reply to this post by qiuchenjian-2
Github user xubo245 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2482#discussion_r237740164 --- Diff: integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datacompaction/CarbonIndexFileMergeTestCase.scala --- @@ -215,43 +249,215 @@ class CarbonIndexFileMergeTestCase Assert .assertEquals(getIndexOrMergeIndexFileSize(table, "0", CarbonTablePath.INDEX_FILE_EXT), segment0.head.getIndexSize.toLong) - new CarbonIndexFileMergeWriter(table) - .mergeCarbonIndexFilesOfSegment("0", table.getTablePath, false, String.valueOf(System.currentTimeMillis())) + sql("Alter table fileSize compact 'segment_index'") loadMetadataDetails = SegmentStatusManager .readTableStatusFile(CarbonTablePath.getTableStatusFilePath(table.getTablePath)) segment0 = loadMetadataDetails.filter(x=> x.getLoadName.equalsIgnoreCase("0")) Assert .assertEquals(getIndexOrMergeIndexFileSize(table, "0", CarbonTablePath.MERGE_INDEX_FILE_EXT), segment0.head.getIndexSize.toLong) + CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT, "true") sql("DROP TABLE IF EXISTS fileSize") } - private def getIndexFileCount(tableName: String, segmentNo: String): Int = { - val carbonTable = CarbonMetadata.getInstance().getCarbonTable(tableName) - val segmentDir = CarbonTablePath.getSegmentPath(carbonTable.getTablePath, segmentNo) - if (FileFactory.isFileExist(segmentDir)) { - val indexFiles = new SegmentIndexFileStore().getIndexFilesFromSegment(segmentDir) - indexFiles.asScala.map { f => - if (f._2 == null) { - 1 - } else { - 0 - } - }.sum - } else { - val segment = Segment.getSegment(segmentNo, carbonTable.getTablePath) - if (segment != null) { - val store = new SegmentFileStore(carbonTable.getTablePath, segment.getSegmentFileName) - store.getSegmentFile.getLocationMap.values().asScala.map { f => - if (f.getMergeFileName == null) { - f.getFiles.size() - } else { - 0 - } - }.sum - } else { - 0 + test("Verify index merge for compacted segments MINOR - level 2") { + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.COMPACTION_SEGMENT_LEVEL_THRESHOLD, "2,2") + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT, "false") + sql("DROP TABLE IF EXISTS nonindexmerge") + sql( + """ + | CREATE TABLE nonindexmerge(id INT, name STRING, city STRING, age INT) + | STORED BY 'org.apache.carbondata.format' + | TBLPROPERTIES('SORT_COLUMNS'='city,name', 'SORT_SCOPE'='GLOBAL_SORT') + """.stripMargin) + sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge OPTIONS('header'='false', " + + s"'GLOBAL_SORT_PARTITIONS'='100')") + sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge OPTIONS('header'='false', " + + s"'GLOBAL_SORT_PARTITIONS'='100')") + sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge OPTIONS('header'='false', " + + s"'GLOBAL_SORT_PARTITIONS'='100')") + sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge OPTIONS('header'='false', " + + s"'GLOBAL_SORT_PARTITIONS'='100')") + val rows = sql("""Select count(*) from nonindexmerge""").collect() + assert(getIndexFileCount("default_nonindexmerge", "0") == 100) + assert(getIndexFileCount("default_nonindexmerge", "1") == 100) + assert(getIndexFileCount("default_nonindexmerge", "2") == 100) + assert(getIndexFileCount("default_nonindexmerge", "3") == 100) + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT, "true") + sql("ALTER TABLE nonindexmerge COMPACT 'minor'").collect() + assert(getIndexFileCount("default_nonindexmerge", "0") == 100) + assert(getIndexFileCount("default_nonindexmerge", "1") == 100) + assert(getIndexFileCount("default_nonindexmerge", "2") == 100) + assert(getIndexFileCount("default_nonindexmerge", "3") == 100) + assert(getIndexFileCount("default_nonindexmerge", "0.1") == 100) + assert(getIndexFileCount("default_nonindexmerge", "2.1") == 100) + assert(getIndexFileCount("default_nonindexmerge", "0.2") == 0) + checkAnswer(sql("""Select count(*) from nonindexmerge"""), rows) + } + + test("Verify index merge for compacted segments Auto Compaction") { + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.COMPACTION_SEGMENT_LEVEL_THRESHOLD, "2,3") + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT, "false") + sql("DROP TABLE IF EXISTS nonindexmerge") + sql( + """ + | CREATE TABLE nonindexmerge(id INT, name STRING, city STRING, age INT) + | STORED BY 'org.apache.carbondata.format' + | TBLPROPERTIES('SORT_COLUMNS'='city,name', 'SORT_SCOPE'='GLOBAL_SORT') + """.stripMargin) + sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge OPTIONS('header'='false', " + + s"'GLOBAL_SORT_PARTITIONS'='100')") + sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge OPTIONS('header'='false', " + + s"'GLOBAL_SORT_PARTITIONS'='100')") + sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge OPTIONS('header'='false', " + + s"'GLOBAL_SORT_PARTITIONS'='100')") + sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge OPTIONS('header'='false', " + + s"'GLOBAL_SORT_PARTITIONS'='100')") + val rows = sql("""Select count(*) from nonindexmerge""").collect() + assert(getIndexFileCount("default_nonindexmerge", "0") == 100) + assert(getIndexFileCount("default_nonindexmerge", "1") == 100) + assert(getIndexFileCount("default_nonindexmerge", "2") == 100) + assert(getIndexFileCount("default_nonindexmerge", "3") == 100) + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT, "true") + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.DEFAULT_ENABLE_AUTO_LOAD_MERGE, "true") + sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge OPTIONS('header'='false', " + + s"'GLOBAL_SORT_PARTITIONS'='100')" + ) + assert(getIndexFileCount("default_nonindexmerge", "0") == 100) + assert(getIndexFileCount("default_nonindexmerge", "1") == 100) + assert(getIndexFileCount("default_nonindexmerge", "2") == 100) + assert(getIndexFileCount("default_nonindexmerge", "3") == 100) + assert(getIndexFileCount("default_nonindexmerge", "4") == 0) + assert(getIndexFileCount("default_nonindexmerge", "0.1") == 0) + assert(getIndexFileCount("default_nonindexmerge", "2.1") == 0) + checkAnswer(sql("""Select count(*) from nonindexmerge"""), Seq(Row(3000000))) + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.DEFAULT_ENABLE_AUTO_LOAD_MERGE, "false") + } + + test("Verify index merge for compacted segments Auto Compaction - level 2") { + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.COMPACTION_SEGMENT_LEVEL_THRESHOLD, "2,2") + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT, "false") + sql("DROP TABLE IF EXISTS nonindexmerge") + sql( + """ + | CREATE TABLE nonindexmerge(id INT, name STRING, city STRING, age INT) + | STORED BY 'org.apache.carbondata.format' + | TBLPROPERTIES('SORT_COLUMNS'='city,name', 'SORT_SCOPE'='GLOBAL_SORT') + """.stripMargin) + sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge OPTIONS('header'='false', " + + s"'GLOBAL_SORT_PARTITIONS'='100')") + sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge OPTIONS('header'='false', " + + s"'GLOBAL_SORT_PARTITIONS'='100')") + sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge OPTIONS('header'='false', " + + s"'GLOBAL_SORT_PARTITIONS'='100')") + sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE nonindexmerge OPTIONS('header'='false', " + + s"'GLOBAL_SORT_PARTITIONS'='100')") + val rows = sql("""Select count(*) from nonindexmerge""").collect() + assert(getIndexFileCount("default_nonindexmerge", "0") == 100) + assert(getIndexFileCount("default_nonindexmerge", "1") == 100) + assert(getIndexFileCount("default_nonindexmerge", "2") == 100) + assert(getIndexFileCount("default_nonindexmerge", "3") == 100) + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_MERGE_INDEX_IN_SEGMENT, "true") + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.DEFAULT_ENABLE_AUTO_LOAD_MERGE, "true") --- End diff -- Why the key is CarbonCommonConstants.DEFAULT_ENABLE_AUTO_LOAD_MERGE?DEFAULT_ENABLE_AUTO_LOAD_MERGE should be value --- |
Free forum by Nabble | Edit this page |