Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2882 Build Success with Spark 2.3.1, Please check CI http://136.243.101.176:8080/job/carbondataprbuilder2.3/9508/ --- |
In reply to this post by qiuchenjian-2
Github user akashrn5 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2882#discussion_r230340585 --- Diff: tools/cli/src/main/java/org/apache/carbondata/tool/DataFile.java --- @@ -453,7 +455,16 @@ private double computePercentage(byte[] data, byte[] min, byte[] max, ColumnSche return dataValue.divide(factorValue).doubleValue(); } double dataValue, minValue, factorValue; - if (column.getDataType() == DataTypes.SHORT) { + if (columnChunk.column.isDimensionColumn() && DataTypeUtil --- End diff -- done --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2882#discussion_r230615332 --- Diff: tools/cli/src/main/java/org/apache/carbondata/tool/DataSummary.java --- @@ -311,26 +309,39 @@ private void printColumnStats(String columnName) throws IOException, MemoryExcep max = new String(blockletMax, Charset.forName(DEFAULT_CHARSET)); } } else { - minPercent = String.format("%.1f", blocklet.getColumnChunk().getMinPercentage() * 100); - maxPercent = String.format("%.1f", blocklet.getColumnChunk().getMaxPercentage() * 100); + // for complex columns min and max and percentage + if (blocklet.getColumnChunk().column.getColumnName().contains(".val") || + blocklet.getColumnChunk().column.getColumnName().contains(".")) { + minPercent = "NA"; + maxPercent = "NA"; + } else { + minPercent = + String.format("%.1f", Math.abs(blocklet.getColumnChunk().getMinPercentage() * 100)); + maxPercent = + String.format("%.1f", Math.abs(blocklet.getColumnChunk().getMaxPercentage() * 100)); + } DataFile.ColumnChunk columnChunk = blocklet.columnChunk; - if (columnChunk.column.isDimensionColumn() && DataTypeUtil + // need to consider no dictionary complex column + if (columnChunk.column.hasEncoding(Encoding.DICTIONARY) || blocklet + .getColumnChunk().column.getColumnName().contains(".val") || blocklet --- End diff -- Can you add a function in ColumnSchema to return whether it is a complex column encoded in global dictionary, instead of hard coding it here --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2882#discussion_r230615363 --- Diff: examples/spark2/src/main/java/org/apache/carbondata/examples/sdk/CarbonReaderExample.java --- @@ -61,7 +61,8 @@ public static void main(String[] args) { CarbonWriter writer = CarbonWriter.builder() .outputPath(path) .withLoadOptions(map) - .withCsvInput(new Schema(fields)).build(); + .withCsvInput(new Schema(fields)) + .writtenBy("CarbonReaderExample").build(); --- End diff -- `.build` should be moved to next line also. please follow coding style --- |
In reply to this post by qiuchenjian-2
Github user akashrn5 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2882#discussion_r230637383 --- Diff: examples/spark2/src/main/java/org/apache/carbondata/examples/sdk/CarbonReaderExample.java --- @@ -61,7 +61,8 @@ public static void main(String[] args) { CarbonWriter writer = CarbonWriter.builder() .outputPath(path) .withLoadOptions(map) - .withCsvInput(new Schema(fields)).build(); + .withCsvInput(new Schema(fields)) + .writtenBy("CarbonReaderExample").build(); --- End diff -- done, actually initially it was like that, and im using carbon formatting only, but i do not know why it is formatting like this, need to once check the xml --- |
In reply to this post by qiuchenjian-2
Github user akashrn5 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2882#discussion_r230637603 --- Diff: tools/cli/src/main/java/org/apache/carbondata/tool/DataSummary.java --- @@ -311,26 +309,39 @@ private void printColumnStats(String columnName) throws IOException, MemoryExcep max = new String(blockletMax, Charset.forName(DEFAULT_CHARSET)); } } else { - minPercent = String.format("%.1f", blocklet.getColumnChunk().getMinPercentage() * 100); - maxPercent = String.format("%.1f", blocklet.getColumnChunk().getMaxPercentage() * 100); + // for complex columns min and max and percentage + if (blocklet.getColumnChunk().column.getColumnName().contains(".val") || + blocklet.getColumnChunk().column.getColumnName().contains(".")) { + minPercent = "NA"; + maxPercent = "NA"; + } else { + minPercent = + String.format("%.1f", Math.abs(blocklet.getColumnChunk().getMinPercentage() * 100)); + maxPercent = + String.format("%.1f", Math.abs(blocklet.getColumnChunk().getMaxPercentage() * 100)); + } DataFile.ColumnChunk columnChunk = blocklet.columnChunk; - if (columnChunk.column.isDimensionColumn() && DataTypeUtil + // need to consider no dictionary complex column + if (columnChunk.column.hasEncoding(Encoding.DICTIONARY) || blocklet + .getColumnChunk().column.getColumnName().contains(".val") || blocklet --- End diff -- can have a method which tells the column is complex column based on name, we already have method which tells the column by datatype, and for dictionary include and complex type, togetther no need to check, because again for child columns need to have other method, as we cant give child columns in dictionary --- |
In reply to this post by qiuchenjian-2
Github user akashrn5 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2882#discussion_r230637639 --- Diff: tools/cli/src/main/java/org/apache/carbondata/tool/DataSummary.java --- @@ -311,26 +309,39 @@ private void printColumnStats(String columnName) throws IOException, MemoryExcep max = new String(blockletMax, Charset.forName(DEFAULT_CHARSET)); } } else { - minPercent = String.format("%.1f", blocklet.getColumnChunk().getMinPercentage() * 100); - maxPercent = String.format("%.1f", blocklet.getColumnChunk().getMaxPercentage() * 100); + // for complex columns min and max and percentage + if (blocklet.getColumnChunk().column.getColumnName().contains(".val") || + blocklet.getColumnChunk().column.getColumnName().contains(".")) { + minPercent = "NA"; + maxPercent = "NA"; + } else { + minPercent = + String.format("%.1f", Math.abs(blocklet.getColumnChunk().getMinPercentage() * 100)); + maxPercent = + String.format("%.1f", Math.abs(blocklet.getColumnChunk().getMaxPercentage() * 100)); + } DataFile.ColumnChunk columnChunk = blocklet.columnChunk; - if (columnChunk.column.isDimensionColumn() && DataTypeUtil + // need to consider no dictionary complex column + if (columnChunk.column.hasEncoding(Encoding.DICTIONARY) || blocklet + .getColumnChunk().column.getColumnName().contains(".val") || blocklet --- End diff -- handled, please review --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2882 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder2.1/1261/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2882 Build Success with Spark 2.3.1, Please check CI http://136.243.101.176:8080/job/carbondataprbuilder2.3/9525/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2882 Build Success with Spark 2.2.1, Please check CI http://95.216.28.178:8080/job/ApacheCarbonPRBuilder1/1476/ --- |
In reply to this post by qiuchenjian-2
|
In reply to this post by qiuchenjian-2
|
Free forum by Nabble | Edit this page |