Github user BJangir commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2433#discussion_r200820929 --- Diff: store/sdk/src/test/java/org/apache/carbondata/sdk/file/CSVNonTransactionalCarbonWriterTest.java --- @@ -284,4 +284,137 @@ public void testSchemaPersistence() throws IOException { FileUtils.deleteDirectory(new File(path)); } + @Test + public void testLocalDictionarywithTrue() throws Exception { + String path = "./testWriteFiles"; + FileUtils.deleteDirectory(new File(path)); + + Field[] fields = new Field[3]; + fields[0] = new Field("name", DataTypes.STRING); + fields[1] = new Field("surname", DataTypes.STRING); + fields[2] = new Field("age", DataTypes.INT); + + CarbonWriterBuilder builder = CarbonWriter.builder().isTransactionalTable(false).sortBy(new String[]{"name"}).withBlockSize(12).isLocalDictionaryEnabled(true) + .uniqueIdentifier(System.currentTimeMillis()).taskNo(System.nanoTime()).outputPath(path); + CarbonWriter carbonWriter = builder.buildWriterForCSVInput(new Schema(fields)); + for (int i = 0; i < 100; i++) { + carbonWriter.write(new String[]{"robot" + (i % 10),"robot_surname" + (i % 10), String.valueOf(i)}); + } + carbonWriter.close(); + + File segmentFolder = new File(path); + Assert.assertTrue(segmentFolder.exists()); + + File[] dataFiles = segmentFolder.listFiles(new FileFilter() { + @Override public boolean accept(File pathname) { + return pathname.getName().endsWith(CarbonCommonConstants.FACT_FILE_EXT); + } + }); + Assert.assertNotNull(dataFiles); + Assert.assertTrue(dataFiles.length > 0); + + + FileUtils.deleteDirectory(new File(path)); + } + + @Test + public void testLocalDictionarywithFalseOption() throws Exception { + String path = "./testWriteFiles"; + FileUtils.deleteDirectory(new File(path)); + + Field[] fields = new Field[3]; + fields[0] = new Field("name", DataTypes.STRING); + fields[1] = new Field("surname", DataTypes.STRING); + fields[2] = new Field("age", DataTypes.INT); + + CarbonWriterBuilder builder = CarbonWriter.builder().isTransactionalTable(false).sortBy(new String[]{"name"}).withBlockSize(12).isLocalDictionaryEnabled(false) + .uniqueIdentifier(System.currentTimeMillis()).taskNo(System.nanoTime()).outputPath(path); + CarbonWriter carbonWriter = builder.buildWriterForCSVInput(new Schema(fields)); + for (int i = 0; i < 100; i++) { + carbonWriter.write(new String[]{"robot" + (i % 10),"robot_surname" + (i % 10), String.valueOf(i)}); + } + carbonWriter.close(); + + File segmentFolder = new File(path); + Assert.assertTrue(segmentFolder.exists()); + + File[] dataFiles = segmentFolder.listFiles(new FileFilter() { + @Override public boolean accept(File pathname) { + return pathname.getName().endsWith(CarbonCommonConstants.FACT_FILE_EXT); + } + }); + Assert.assertNotNull(dataFiles); + Assert.assertTrue(dataFiles.length > 0); + + + FileUtils.deleteDirectory(new File(path)); + } + + @Test + public void testLocalDictionarywithThreshold() throws Exception { --- End diff -- Add Validation whether DataChunk has dictionary or not. --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on the issue:
https://github.com/apache/carbondata/pull/2433 SDV Build Fail , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/5681/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2433 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/6915/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2433 Build Success with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/5700/ --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on the issue:
https://github.com/apache/carbondata/pull/2433 SDV Build Fail , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/5682/ --- |
In reply to this post by qiuchenjian-2
Github user kumarvishal09 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2433#discussion_r200998411 --- Diff: core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java --- @@ -108,6 +122,20 @@ public TableSchema build() { schema.setTableProperties(property); } + if (isLocalDictionaryEnabled) { + property.put(CarbonCommonConstants.LOCAL_DICTIONARY_ENABLE, + String.valueOf(isLocalDictionaryEnabled)); + String localdictionaryThreshold = + localDictionaryThreshold.equalsIgnoreCase("0") ? null : localDictionaryThreshold; + property.put(CarbonCommonConstants.LOCAL_DICTIONARY_THRESHOLD, localdictionaryThreshold); + for (int index = 0; index < allColumns.size(); index++) { + ColumnSchema colSchema = allColumns.get(index); + if (colSchema.getDataType() == DataTypes.STRING) { + colSchema.setLocalDictColumn(true); --- End diff -- @babu now we varchar is also supported pls handle the same --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2433 Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/6968/ --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on the issue:
https://github.com/apache/carbondata/pull/2433 SDV Build Success , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/5724/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2433 Build Failed with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/5748/ --- |
In reply to this post by qiuchenjian-2
Github user ajantha-bhat commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2433#discussion_r201266103 --- Diff: docs/sdk-guide.md --- @@ -251,6 +251,23 @@ public CarbonWriterBuilder withBlockSize(int blockSize); public CarbonWriterBuilder withBlockletSize(int blockletSize); ``` +``` +/** + * @param enableLocalDictionary enable local dictionary , default is false + * @return updated CarbonWriterBuilder + */ +public CarbonWriterBuilder enableLocalDictionary(boolean enableLocalDictionary); +``` + +``` +/** + * @param localDictionaryThreshold is localDictionaryThreshold,default is 1000 --- End diff -- please update it as 10000. --- |
In reply to this post by qiuchenjian-2
Github user ajantha-bhat commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2433#discussion_r201266791 --- Diff: integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala --- @@ -2291,6 +2295,132 @@ class TestNonTransactionalCarbonTable extends QueryTest with BeforeAndAfterAll { |'$writerPath' """.stripMargin) checkAnswer(sql("select * from sdkOutputTable"), Seq(Row(Timestamp.valueOf("1970-01-02 16:00:00"), Row(Timestamp.valueOf("1970-01-02 16:00:00"))))) } + + test("test LocalDictionary with True") { + FileUtils.deleteDirectory(new File(writerPath)) + val builder = CarbonWriter.builder.isTransactionalTable(false) + .sortBy(Array[String]("name")).withBlockSize(12).enableLocalDictionary(true) + .uniqueIdentifier(System.currentTimeMillis).taskNo(System.nanoTime).outputPath(writerPath) + generateCarbonData(builder) + assert(FileFactory.getCarbonFile(writerPath).exists()) + assert(avroUtil.checkForLocalDictionary(avroUtil.getDimRawChunk(0,writerPath))) + sql("DROP TABLE IF EXISTS sdkTable") + sql( + s"""CREATE EXTERNAL TABLE sdkTable STORED BY 'carbondata' LOCATION + |'$writerPath' """.stripMargin) + val descLoc = sql("describe formatted sdkTable").collect + descLoc.find(_.get(0).toString.contains("Local Dictionary Enabled")) match { + case Some(row) => assert(row.get(1).toString.contains("true")) + } + descLoc.find(_.get(0).toString.contains("Local Dictionary Threshold")) match { + case Some(row) => assert(row.get(1).toString.contains("10000")) + } + descLoc.find(_.get(0).toString.contains("Local Dictionary Include")) match { + case Some(row) => assert(row.get(1).toString.contains("name,surname")) + } + FileUtils.deleteDirectory(new File(writerPath)) + } + + test("test LocalDictionary with custom Threshold") { + FileUtils.deleteDirectory(new File(writerPath)) + val builder = CarbonWriter.builder.isTransactionalTable(false) + .sortBy(Array[String]("name")).withBlockSize(12).enableLocalDictionary(true) + .localDictionaryThreshold(200) + .uniqueIdentifier(System.currentTimeMillis).taskNo(System.nanoTime).outputPath(writerPath) + generateCarbonData(builder) + assert(FileFactory.getCarbonFile(writerPath).exists()) + assert(avroUtil.checkForLocalDictionary(avroUtil.getDimRawChunk(0,writerPath))) + sql("DROP TABLE IF EXISTS sdkTable") + sql( + s"""CREATE EXTERNAL TABLE sdkTable STORED BY 'carbondata' LOCATION + |'$writerPath' """.stripMargin) + val descLoc = sql("describe formatted sdkTable").collect + descLoc.find(_.get(0).toString.contains("Local Dictionary Enabled")) match { + case Some(row) => assert(row.get(1).toString.contains("true")) + } + descLoc.find(_.get(0).toString.contains("Local Dictionary Threshold")) match { + case Some(row) => assert(row.get(1).toString.contains("10000")) --- End diff -- no need of this check, as this value is not inferred value from carbondata file. --- |
In reply to this post by qiuchenjian-2
Github user ajantha-bhat commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2433#discussion_r201266849 --- Diff: integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala --- @@ -2291,6 +2295,132 @@ class TestNonTransactionalCarbonTable extends QueryTest with BeforeAndAfterAll { |'$writerPath' """.stripMargin) checkAnswer(sql("select * from sdkOutputTable"), Seq(Row(Timestamp.valueOf("1970-01-02 16:00:00"), Row(Timestamp.valueOf("1970-01-02 16:00:00"))))) } + + test("test LocalDictionary with True") { + FileUtils.deleteDirectory(new File(writerPath)) + val builder = CarbonWriter.builder.isTransactionalTable(false) + .sortBy(Array[String]("name")).withBlockSize(12).enableLocalDictionary(true) + .uniqueIdentifier(System.currentTimeMillis).taskNo(System.nanoTime).outputPath(writerPath) + generateCarbonData(builder) + assert(FileFactory.getCarbonFile(writerPath).exists()) + assert(avroUtil.checkForLocalDictionary(avroUtil.getDimRawChunk(0,writerPath))) + sql("DROP TABLE IF EXISTS sdkTable") + sql( + s"""CREATE EXTERNAL TABLE sdkTable STORED BY 'carbondata' LOCATION + |'$writerPath' """.stripMargin) + val descLoc = sql("describe formatted sdkTable").collect + descLoc.find(_.get(0).toString.contains("Local Dictionary Enabled")) match { + case Some(row) => assert(row.get(1).toString.contains("true")) + } + descLoc.find(_.get(0).toString.contains("Local Dictionary Threshold")) match { + case Some(row) => assert(row.get(1).toString.contains("10000")) --- End diff -- no need of this check, as this value is not inferred value from carbondata file. --- |
In reply to this post by qiuchenjian-2
Github user ajantha-bhat commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2433#discussion_r201272195 --- Diff: store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java --- @@ -284,6 +286,29 @@ public CarbonWriterBuilder withBlockSize(int blockSize) { return this; } + /** + * @param localDictionaryThreshold is localDictionaryThreshold,default is 1000 --- End diff -- also here , change to 10000 --- |
In reply to this post by qiuchenjian-2
Github user ajantha-bhat commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2433#discussion_r201273392 --- Diff: integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala --- @@ -2291,6 +2295,132 @@ class TestNonTransactionalCarbonTable extends QueryTest with BeforeAndAfterAll { |'$writerPath' """.stripMargin) checkAnswer(sql("select * from sdkOutputTable"), Seq(Row(Timestamp.valueOf("1970-01-02 16:00:00"), Row(Timestamp.valueOf("1970-01-02 16:00:00"))))) } + + test("test LocalDictionary with True") { + FileUtils.deleteDirectory(new File(writerPath)) + val builder = CarbonWriter.builder.isTransactionalTable(false) + .sortBy(Array[String]("name")).withBlockSize(12).enableLocalDictionary(true) + .uniqueIdentifier(System.currentTimeMillis).taskNo(System.nanoTime).outputPath(writerPath) + generateCarbonData(builder) + assert(FileFactory.getCarbonFile(writerPath).exists()) + assert(avroUtil.checkForLocalDictionary(avroUtil.getDimRawChunk(0,writerPath))) + sql("DROP TABLE IF EXISTS sdkTable") + sql( + s"""CREATE EXTERNAL TABLE sdkTable STORED BY 'carbondata' LOCATION + |'$writerPath' """.stripMargin) + val descLoc = sql("describe formatted sdkTable").collect + descLoc.find(_.get(0).toString.contains("Local Dictionary Enabled")) match { + case Some(row) => assert(row.get(1).toString.contains("true")) + } + descLoc.find(_.get(0).toString.contains("Local Dictionary Threshold")) match { + case Some(row) => assert(row.get(1).toString.contains("10000")) + } + descLoc.find(_.get(0).toString.contains("Local Dictionary Include")) match { + case Some(row) => assert(row.get(1).toString.contains("name,surname")) + } + FileUtils.deleteDirectory(new File(writerPath)) + } + + test("test LocalDictionary with custom Threshold") { + FileUtils.deleteDirectory(new File(writerPath)) + val builder = CarbonWriter.builder.isTransactionalTable(false) + .sortBy(Array[String]("name")).withBlockSize(12).enableLocalDictionary(true) + .localDictionaryThreshold(200) + .uniqueIdentifier(System.currentTimeMillis).taskNo(System.nanoTime).outputPath(writerPath) + generateCarbonData(builder) + assert(FileFactory.getCarbonFile(writerPath).exists()) + assert(avroUtil.checkForLocalDictionary(avroUtil.getDimRawChunk(0,writerPath))) + sql("DROP TABLE IF EXISTS sdkTable") + sql( + s"""CREATE EXTERNAL TABLE sdkTable STORED BY 'carbondata' LOCATION + |'$writerPath' """.stripMargin) + val descLoc = sql("describe formatted sdkTable").collect + descLoc.find(_.get(0).toString.contains("Local Dictionary Enabled")) match { + case Some(row) => assert(row.get(1).toString.contains("true")) + } + descLoc.find(_.get(0).toString.contains("Local Dictionary Threshold")) match { + case Some(row) => assert(row.get(1).toString.contains("10000")) + } + descLoc.find(_.get(0).toString.contains("Local Dictionary Include")) match { + case Some(row) => assert(row.get(1).toString.contains("name,surname")) + } + FileUtils.deleteDirectory(new File(writerPath)) + } + + test("test Local Dictionary with FallBack") { + FileUtils.deleteDirectory(new File(writerPath)) + val builder = CarbonWriter.builder.isTransactionalTable(false) + .sortBy(Array[String]("name")).withBlockSize(12).enableLocalDictionary(true) + .localDictionaryThreshold(5) + .uniqueIdentifier(System.currentTimeMillis).taskNo(System.nanoTime).outputPath(writerPath) + generateCarbonData(builder) + assert(FileFactory.getCarbonFile(writerPath).exists()) + assert(!avroUtil.checkForLocalDictionary(avroUtil.getDimRawChunk(0,writerPath))) + sql("DROP TABLE IF EXISTS sdkTable") + sql( + s"""CREATE EXTERNAL TABLE sdkTable STORED BY 'carbondata' LOCATION + |'$writerPath' """.stripMargin) + val descLoc = sql("describe formatted sdkTable").collect + descLoc.find(_.get(0).toString.contains("Local Dictionary Enabled")) match { + case Some(row) => assert(row.get(1).toString.contains("true")) + } + descLoc.find(_.get(0).toString.contains("Local Dictionary Threshold")) match { + case Some(row) => assert(row.get(1).toString.contains("10000")) + } + descLoc.find(_.get(0).toString.contains("Local Dictionary Include")) match { + case Some(row) => assert(row.get(1).toString.contains("name,surname")) + } + FileUtils.deleteDirectory(new File(writerPath)) + } + + test("test local dictionary with External Table data load ") { + FileUtils.deleteDirectory(new File(writerPath)) + val builder = CarbonWriter.builder.isTransactionalTable(false) + .sortBy(Array[String]("name")).withBlockSize(12).enableLocalDictionary(true) + .localDictionaryThreshold(200) + .uniqueIdentifier(System.currentTimeMillis).taskNo(System.nanoTime).outputPath(writerPath) + generateCarbonData(builder) + assert(FileFactory.getCarbonFile(writerPath).exists()) + assert(avroUtil.checkForLocalDictionary(avroUtil.getDimRawChunk(0,writerPath))) + sql("DROP TABLE IF EXISTS sdkTable") + sql( + s"""CREATE EXTERNAL TABLE sdkTable STORED BY 'carbondata' LOCATION + |'$writerPath' """.stripMargin) + FileUtils.deleteDirectory(new File(writerPath)) + sql("insert into sdkTable select 's1','s2',23 ") + assert(FileFactory.getCarbonFile(writerPath).exists()) + assert(avroUtil.checkForLocalDictionary(avroUtil.getDimRawChunk(0,writerPath))) --- End diff -- rename avroUtil to testUtil, as local dictionary doesn't have anything to do with avro --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2433 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/6978/ --- |
In reply to this post by qiuchenjian-2
Github user kumarvishal09 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2433#discussion_r200999110 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/parser/CarbonSparkSqlParser.scala --- @@ -293,6 +296,29 @@ class CarbonHelperSqlAstBuilder(conf: SQLConf, table.getFactTable.getTableProperties.put("_external", "true") table.getFactTable.getTableProperties.put("_filelevelformat", "false") } + // setting local dictionary for all string coloumn for external table + var isLocalDic_enabled = table.getFactTable.getTableProperties + .get(CarbonCommonConstants.LOCAL_DICTIONARY_ENABLE) + if (null == isLocalDic_enabled) { + table.getFactTable.getTableProperties + .put(CarbonCommonConstants.LOCAL_DICTIONARY_ENABLE, + CarbonCommonConstants.LOCAL_DICTIONARY_ENABLE_DEFAULT) + } + isLocalDic_enabled = table.getFactTable.getTableProperties + .get(CarbonCommonConstants.LOCAL_DICTIONARY_ENABLE) + if (CarbonScalaUtil.validateLocalDictionaryEnable(isLocalDic_enabled) && + isLocalDic_enabled.toBoolean) { + val allcolumns = table.getFactTable.getListOfColumns + for (i <- 0 until allcolumns.size()) { + val cols = allcolumns.get(i) + if (cols.getDataType == DataTypes.STRING) { --- End diff -- Handle for varchar type --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2433 Build Success with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/5758/ --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on the issue:
https://github.com/apache/carbondata/pull/2433 SDV Build Success , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/5737/ --- |
In reply to this post by qiuchenjian-2
Github user BJangir commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2433#discussion_r201340518 --- Diff: store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java --- @@ -284,6 +286,29 @@ public CarbonWriterBuilder withBlockSize(int blockSize) { return this; } + /** + * @param localDictionaryThreshold is localDictionaryThreshold,default is 1000 --- End diff -- Done --- |
In reply to this post by qiuchenjian-2
Github user BJangir commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2433#discussion_r201340590 --- Diff: integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala --- @@ -2291,6 +2295,132 @@ class TestNonTransactionalCarbonTable extends QueryTest with BeforeAndAfterAll { |'$writerPath' """.stripMargin) checkAnswer(sql("select * from sdkOutputTable"), Seq(Row(Timestamp.valueOf("1970-01-02 16:00:00"), Row(Timestamp.valueOf("1970-01-02 16:00:00"))))) } + + test("test LocalDictionary with True") { + FileUtils.deleteDirectory(new File(writerPath)) + val builder = CarbonWriter.builder.isTransactionalTable(false) + .sortBy(Array[String]("name")).withBlockSize(12).enableLocalDictionary(true) + .uniqueIdentifier(System.currentTimeMillis).taskNo(System.nanoTime).outputPath(writerPath) + generateCarbonData(builder) + assert(FileFactory.getCarbonFile(writerPath).exists()) + assert(avroUtil.checkForLocalDictionary(avroUtil.getDimRawChunk(0,writerPath))) + sql("DROP TABLE IF EXISTS sdkTable") + sql( + s"""CREATE EXTERNAL TABLE sdkTable STORED BY 'carbondata' LOCATION + |'$writerPath' """.stripMargin) + val descLoc = sql("describe formatted sdkTable").collect + descLoc.find(_.get(0).toString.contains("Local Dictionary Enabled")) match { + case Some(row) => assert(row.get(1).toString.contains("true")) + } + descLoc.find(_.get(0).toString.contains("Local Dictionary Threshold")) match { + case Some(row) => assert(row.get(1).toString.contains("10000")) + } + descLoc.find(_.get(0).toString.contains("Local Dictionary Include")) match { + case Some(row) => assert(row.get(1).toString.contains("name,surname")) + } + FileUtils.deleteDirectory(new File(writerPath)) + } + + test("test LocalDictionary with custom Threshold") { + FileUtils.deleteDirectory(new File(writerPath)) + val builder = CarbonWriter.builder.isTransactionalTable(false) + .sortBy(Array[String]("name")).withBlockSize(12).enableLocalDictionary(true) + .localDictionaryThreshold(200) + .uniqueIdentifier(System.currentTimeMillis).taskNo(System.nanoTime).outputPath(writerPath) + generateCarbonData(builder) + assert(FileFactory.getCarbonFile(writerPath).exists()) + assert(avroUtil.checkForLocalDictionary(avroUtil.getDimRawChunk(0,writerPath))) + sql("DROP TABLE IF EXISTS sdkTable") + sql( + s"""CREATE EXTERNAL TABLE sdkTable STORED BY 'carbondata' LOCATION + |'$writerPath' """.stripMargin) + val descLoc = sql("describe formatted sdkTable").collect + descLoc.find(_.get(0).toString.contains("Local Dictionary Enabled")) match { + case Some(row) => assert(row.get(1).toString.contains("true")) + } + descLoc.find(_.get(0).toString.contains("Local Dictionary Threshold")) match { + case Some(row) => assert(row.get(1).toString.contains("10000")) + } + descLoc.find(_.get(0).toString.contains("Local Dictionary Include")) match { + case Some(row) => assert(row.get(1).toString.contains("name,surname")) + } + FileUtils.deleteDirectory(new File(writerPath)) + } + + test("test Local Dictionary with FallBack") { + FileUtils.deleteDirectory(new File(writerPath)) + val builder = CarbonWriter.builder.isTransactionalTable(false) + .sortBy(Array[String]("name")).withBlockSize(12).enableLocalDictionary(true) + .localDictionaryThreshold(5) + .uniqueIdentifier(System.currentTimeMillis).taskNo(System.nanoTime).outputPath(writerPath) + generateCarbonData(builder) + assert(FileFactory.getCarbonFile(writerPath).exists()) + assert(!avroUtil.checkForLocalDictionary(avroUtil.getDimRawChunk(0,writerPath))) + sql("DROP TABLE IF EXISTS sdkTable") + sql( + s"""CREATE EXTERNAL TABLE sdkTable STORED BY 'carbondata' LOCATION + |'$writerPath' """.stripMargin) + val descLoc = sql("describe formatted sdkTable").collect + descLoc.find(_.get(0).toString.contains("Local Dictionary Enabled")) match { + case Some(row) => assert(row.get(1).toString.contains("true")) + } + descLoc.find(_.get(0).toString.contains("Local Dictionary Threshold")) match { + case Some(row) => assert(row.get(1).toString.contains("10000")) + } + descLoc.find(_.get(0).toString.contains("Local Dictionary Include")) match { + case Some(row) => assert(row.get(1).toString.contains("name,surname")) + } + FileUtils.deleteDirectory(new File(writerPath)) + } + + test("test local dictionary with External Table data load ") { + FileUtils.deleteDirectory(new File(writerPath)) + val builder = CarbonWriter.builder.isTransactionalTable(false) + .sortBy(Array[String]("name")).withBlockSize(12).enableLocalDictionary(true) + .localDictionaryThreshold(200) + .uniqueIdentifier(System.currentTimeMillis).taskNo(System.nanoTime).outputPath(writerPath) + generateCarbonData(builder) + assert(FileFactory.getCarbonFile(writerPath).exists()) + assert(avroUtil.checkForLocalDictionary(avroUtil.getDimRawChunk(0,writerPath))) + sql("DROP TABLE IF EXISTS sdkTable") + sql( + s"""CREATE EXTERNAL TABLE sdkTable STORED BY 'carbondata' LOCATION + |'$writerPath' """.stripMargin) + FileUtils.deleteDirectory(new File(writerPath)) + sql("insert into sdkTable select 's1','s2',23 ") + assert(FileFactory.getCarbonFile(writerPath).exists()) + assert(avroUtil.checkForLocalDictionary(avroUtil.getDimRawChunk(0,writerPath))) --- End diff -- Done --- |
Free forum by Nabble | Edit this page |