Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2209 Build Failed with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/4358/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2209 Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/5522/ --- |
In reply to this post by qiuchenjian-2
Github user kumarvishal09 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2209#discussion_r184916011 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/complextypes/PrimitiveQueryType.java --- @@ -53,6 +55,21 @@ public PrimitiveQueryType(String name, String parentname, int blockIndex, this.name = name; this.parentname = parentname; this.isDirectDictionary = isDirectDictionary; + this.isDictionary = true; + } + + + public PrimitiveQueryType(String name, String parentname, int blockIndex, --- End diff -- One more extra constructor is not required. If dictionary is not null then isDictionary=true --- |
In reply to this post by qiuchenjian-2
Github user kumarvishal09 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2209#discussion_r184916437 --- Diff: processing/src/main/java/org/apache/carbondata/processing/datatypes/ArrayDataType.java --- @@ -171,9 +175,9 @@ public void fillCardinality(List<Integer> dimCardWithComplex) { /** * parse byte array and bit pack */ - @Override - public void parseAndBitPack(ByteBuffer byteArrayInput, DataOutputStream dataOutputStream, - KeyGenerator[] generator) throws IOException, KeyGenException { + @Override public void parseAndBitPack(ByteBuffer byteArrayInput, + DataOutputStream dataOutputStream, KeyGenerator[] generator) + throws IOException, KeyGenException { int dataLength = byteArrayInput.getInt(); --- End diff -- remove extra space --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2209#discussion_r184916441 --- Diff: core/src/main/java/org/apache/carbondata/core/metadata/schema/table/column/ColumnSchema.java --- @@ -43,6 +43,9 @@ */ private static final long serialVersionUID = 7676766554874863763L; + public void columnSchema() { --- End diff -- What is the use of this method --- |
In reply to this post by qiuchenjian-2
Github user kumarvishal09 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2209#discussion_r184916594 --- Diff: processing/src/main/java/org/apache/carbondata/processing/datatypes/ArrayDataType.java --- @@ -171,9 +175,9 @@ public void fillCardinality(List<Integer> dimCardWithComplex) { /** * parse byte array and bit pack */ - @Override - public void parseAndBitPack(ByteBuffer byteArrayInput, DataOutputStream dataOutputStream, - KeyGenerator[] generator) throws IOException, KeyGenException { + @Override public void parseAndBitPack(ByteBuffer byteArrayInput, --- End diff -- Please remove this method from interface as below new method will be used --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2209#discussion_r184916731 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/collector/impl/DictionaryBasedResultCollector.java --- @@ -134,7 +134,13 @@ void fillDimensionData(BlockletScannedResult scannedResult, int[] surrogateResul row[order[i]] = DataTypeUtil.getDataBasedOnDataType(scannedResult.getBlockletId(), DataTypes.STRING); } - } else { + } else if (complexDataTypeArray[i]) { + // Complex Type With No Dictionary Encoding. + row[order[i]] = comlexDimensionInfoMap.get(queryDimensions[i].getDimension().getOrdinal()) + .getDataBasedOnDataTypeFromNoDictionary( + ByteBuffer.wrap(complexTypeKeyArray[complexTypeColumnIndex++])); + } else + { --- End diff -- Move the braces up --- |
In reply to this post by qiuchenjian-2
Github user kumarvishal09 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2209#discussion_r184916825 --- Diff: processing/src/main/java/org/apache/carbondata/processing/datatypes/ArrayDataType.java --- @@ -183,7 +187,27 @@ public void parseAndBitPack(ByteBuffer byteArrayInput, DataOutputStream dataOutp for (int i = 0; i < dataLength; i++) { children.parseAndBitPack(byteArrayInput, dataOutputStream, generator); } + } + + @Override + public int parseComplexValue(ByteBuffer byteArrayInput, DataOutputStream dataOutputStream, + KeyGenerator[] generator, Boolean[][] complexDictionaryIndentification, int startOffset) --- End diff -- complexDictionaryIndentification is not required as each primitive children knows it is not of dictionary type or no dictionary type --- |
In reply to this post by qiuchenjian-2
Github user kumarvishal09 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2209#discussion_r184916986 --- Diff: processing/src/main/java/org/apache/carbondata/processing/datatypes/GenericDataType.java --- @@ -71,6 +71,8 @@ void writeByteArray(T input, DataOutputStream dataOutputStream) */ void setSurrogateIndex(int surrIndex); + Boolean getIsColumnDictionary(); --- End diff -- change this to boolean isDictionaryEncoded() --- |
In reply to this post by qiuchenjian-2
Github user kumarvishal09 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2209#discussion_r184917041 --- Diff: processing/src/main/java/org/apache/carbondata/processing/datatypes/PrimitiveDataType.java --- @@ -94,6 +99,11 @@ private CarbonDimension carbonDimension; + private Boolean isDictionary; --- End diff -- change this to boolean isDictionary --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2209#discussion_r184917127 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/complextypes/PrimitiveQueryType.java --- @@ -53,6 +55,21 @@ public PrimitiveQueryType(String name, String parentname, int blockIndex, this.name = name; this.parentname = parentname; this.isDirectDictionary = isDirectDictionary; + this.isDictionary = true; + } + + + public PrimitiveQueryType(String name, String parentname, int blockIndex, + org.apache.carbondata.core.metadata.datatype.DataType dataType, int keySize, + Dictionary dictionary, boolean isDirectDictionary, boolean isDictionary) { --- End diff -- I don't think it is required to have a separate constructor, if (dictionary == null && !isDirectDictionary) then it becomes nodictionary --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2209#discussion_r184917500 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/complextypes/PrimitiveQueryType.java --- @@ -84,6 +101,9 @@ public PrimitiveQueryType(String name, String parentname, int blockIndex, DimensionRawColumnChunk[] rawColumnChunks, int rowNumber, int pageNumber, DataOutputStream dataOutputStream) throws IOException { byte[] currentVal = copyBlockDataChunk(rawColumnChunks, rowNumber, pageNumber); + if (!this.isDictionary) { + dataOutputStream.writeInt(currentVal.length); --- End diff -- Just writing length? where are you writing data? --- |
In reply to this post by qiuchenjian-2
Github user kumarvishal09 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2209#discussion_r184917713 --- Diff: processing/src/main/java/org/apache/carbondata/processing/datatypes/PrimitiveDataType.java --- @@ -94,6 +99,11 @@ private CarbonDimension carbonDimension; + private Boolean isDictionary; + + private FieldConverter fieldConverterForNoDictionary; --- End diff -- FieldConverter is not required in this class, as caller will create for primitive type like existing flow..same way we can handle for no dictionary --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2209#discussion_r184917833 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/complextypes/PrimitiveQueryType.java --- @@ -108,4 +128,14 @@ public PrimitiveQueryType(String name, String parentname, int blockIndex, } return actualData; } + + @Override public Object getDataBasedOnDataTypeFromNoDictionary(ByteBuffer data) { --- End diff -- I don't think it is required to have one more method, just rename and handle in same method `getDataBasedOnDataTypeFromSurrogates` , it can avoid duplicate code and extra handling --- |
In reply to this post by qiuchenjian-2
Github user kumarvishal09 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2209#discussion_r184918097 --- Diff: processing/src/main/java/org/apache/carbondata/processing/datatypes/PrimitiveDataType.java --- @@ -211,29 +241,66 @@ public int getSurrogateIndex() { /* * set surrogate index */ - @Override - public void setSurrogateIndex(int surrIndex) { - index = surrIndex; + @Override public void setSurrogateIndex(int surrIndex) { + if (this.carbonDimension != null && !this.carbonDimension.hasEncoding(Encoding.DICTIONARY)) { + index = 0; + } else if (this.carbonDimension == null && isDictionary == false) { + index = 0; + } else { + index = surrIndex; + } + } + + @Override public Boolean getIsColumnDictionary() { + return isDictionary; } @Override public void writeByteArray(Object input, DataOutputStream dataOutputStream) throws IOException, DictionaryGenerationException { String parsedValue = input == null ? null : DataTypeUtil.parseValue(input.toString(), carbonDimension); - Integer surrogateKey; - if (null == parsedValue) { - surrogateKey = CarbonCommonConstants.MEMBER_DEFAULT_VAL_SURROGATE_KEY; - } else { - surrogateKey = dictionaryGenerator.getOrGenerateKey(parsedValue); - if (surrogateKey == CarbonCommonConstants.INVALID_SURROGATE_KEY) { + if (this.isDictionary) { + Integer surrogateKey; + if (null == parsedValue) { surrogateKey = CarbonCommonConstants.MEMBER_DEFAULT_VAL_SURROGATE_KEY; + } else { + surrogateKey = dictionaryGenerator.getOrGenerateKey(parsedValue); + if (surrogateKey == CarbonCommonConstants.INVALID_SURROGATE_KEY) { + surrogateKey = CarbonCommonConstants.MEMBER_DEFAULT_VAL_SURROGATE_KEY; + } + } + dataOutputStream.writeInt(surrogateKey); + } else { + // Transform into ByteArray for No Dictionary. + // TODO have to refactor and place all the cases present in NonDictionaryFieldConverterImpl + if (null == parsedValue && this.carbonDimension.getDataType() != DataTypes.STRING) { --- End diff -- null value updation is also required for String data type second condition in If check is not correct --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2209#discussion_r184918157 --- Diff: integration/spark2/src/main/scala/org/apache/spark/util/SparkTypeConverter.scala --- @@ -97,16 +97,30 @@ private[spark] object SparkTypeConverter { def getStructChildren(table: CarbonTable, dimName: String): String = { table.getChildren(dimName).asScala.map(childDim => { childDim.getDataType.getName.toLowerCase match { - case "array" => s"${ + case "array" => if (table.isTransactionalTable) {s"${ --- End diff -- Could explain in comments why this many checks are required for non transactional table? Better write another method rather than keeping so many if checks --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2209#discussion_r184918197 --- Diff: integration/spark2/src/main/scala/org/apache/spark/util/SparkTypeConverter.scala --- @@ -123,13 +137,29 @@ private[spark] object SparkTypeConverter { private def recursiveMethod( table: CarbonTable, dimName: String, childDim: CarbonDimension) = { childDim.getDataType.getName.toLowerCase match { - case "array" => s"${ - childDim.getColName.substring(dimName.length + 1) - }:array<${ getArrayChildren(table, childDim.getColName) }>" - case "struct" => s"${ - childDim.getColName.substring(dimName.length + 1) - }:struct<${ getStructChildren(table, childDim.getColName) }>" - case dType => s"${ childDim.getColName.substring(dimName.length + 1) }:${ dType }" + case "array" => if (table.isTransactionalTable) { --- End diff -- Better write another method rather than keeping so many if checks --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2209#discussion_r184918548 --- Diff: processing/src/main/java/org/apache/carbondata/processing/datatypes/ArrayDataType.java --- @@ -171,9 +175,9 @@ public void fillCardinality(List<Integer> dimCardWithComplex) { /** * parse byte array and bit pack */ - @Override - public void parseAndBitPack(ByteBuffer byteArrayInput, DataOutputStream dataOutputStream, - KeyGenerator[] generator) throws IOException, KeyGenException { + @Override public void parseAndBitPack(ByteBuffer byteArrayInput, --- End diff -- Now this method is not used? if not delete it --- |
In reply to this post by qiuchenjian-2
Github user kumarvishal09 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2209#discussion_r184918635 --- Diff: processing/src/main/java/org/apache/carbondata/processing/datatypes/PrimitiveDataType.java --- @@ -211,29 +241,66 @@ public int getSurrogateIndex() { /* * set surrogate index */ - @Override - public void setSurrogateIndex(int surrIndex) { - index = surrIndex; + @Override public void setSurrogateIndex(int surrIndex) { + if (this.carbonDimension != null && !this.carbonDimension.hasEncoding(Encoding.DICTIONARY)) { + index = 0; + } else if (this.carbonDimension == null && isDictionary == false) { + index = 0; + } else { + index = surrIndex; + } + } + + @Override public Boolean getIsColumnDictionary() { + return isDictionary; } @Override public void writeByteArray(Object input, DataOutputStream dataOutputStream) throws IOException, DictionaryGenerationException { String parsedValue = input == null ? null : DataTypeUtil.parseValue(input.toString(), carbonDimension); - Integer surrogateKey; - if (null == parsedValue) { - surrogateKey = CarbonCommonConstants.MEMBER_DEFAULT_VAL_SURROGATE_KEY; - } else { - surrogateKey = dictionaryGenerator.getOrGenerateKey(parsedValue); - if (surrogateKey == CarbonCommonConstants.INVALID_SURROGATE_KEY) { + if (this.isDictionary) { + Integer surrogateKey; + if (null == parsedValue) { surrogateKey = CarbonCommonConstants.MEMBER_DEFAULT_VAL_SURROGATE_KEY; + } else { + surrogateKey = dictionaryGenerator.getOrGenerateKey(parsedValue); + if (surrogateKey == CarbonCommonConstants.INVALID_SURROGATE_KEY) { + surrogateKey = CarbonCommonConstants.MEMBER_DEFAULT_VAL_SURROGATE_KEY; + } + } + dataOutputStream.writeInt(surrogateKey); + } else { + // Transform into ByteArray for No Dictionary. + // TODO have to refactor and place all the cases present in NonDictionaryFieldConverterImpl + if (null == parsedValue && this.carbonDimension.getDataType() != DataTypes.STRING) { + updateNullValue(dataOutputStream); + } else { + byte[] value = DataTypeUtil.getBytesBasedOnDataTypeForNoDictionaryColumn(parsedValue, --- End diff -- In case of no dictionary primitive type column NoDictionary field converter will convert and give the parsed value so no need to call DataTypeUtil.getBytesBasedOnDataTypeForNoDictionaryColumn again, dictionary write this value in LV format to outputstream --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2209#discussion_r184918720 --- Diff: processing/src/main/java/org/apache/carbondata/processing/datatypes/ArrayDataType.java --- @@ -183,7 +187,27 @@ public void parseAndBitPack(ByteBuffer byteArrayInput, DataOutputStream dataOutp for (int i = 0; i < dataLength; i++) { children.parseAndBitPack(byteArrayInput, dataOutputStream, generator); } + } + + @Override + public int parseComplexValue(ByteBuffer byteArrayInput, DataOutputStream dataOutputStream, + KeyGenerator[] generator, Boolean[][] complexDictionaryIndentification, int startOffset) + throws IOException, KeyGenException { + int dataLength = byteArrayInput.getInt(); + startOffset += Integer.SIZE / Byte.SIZE; --- End diff -- No need to calculate every time , just use `4` and add comment --- |
Free forum by Nabble | Edit this page |