Apache CarbonData Dev Mailing List archive › Apache CarbonData JIRA issues

[GitHub] carbondata pull request #2261: [CARBONDATA-2430][SDK] Reshuffling of Columns...

Classic

List

67 messages Options

Options

1234

[GitHub] carbondata pull request #2261: [CARBONDATA-2430][SDK] Reshuffling of Columns...

Github user sounakr commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2261#discussion_r186057833

--- Diff: integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala ---
@@ -750,7 +750,93 @@ class TestNonTransactionalCarbonTable extends QueryTest with BeforeAndAfterAll {
buildAvroTestData(3, null)
}

- test("Read sdk writer Avro output ") {
+ def buildAvroTestDataArrayType(rows: Int, options: util.Map[String, String]): Any = {
+ FileUtils.deleteDirectory(new File(writerPath))
+ /**
+ * *
+ * {
+ * "name": "address",
+ * "type": "record",
+ * "fields": [
+ * {
+ * "name": "name",
+ * "type": "string"
+ * },
+ * {
+ * "name": "age",
+ * "type": "int"
+ * },
+ * {
+ * "name": "address",
+ * "type": {
+ * "type": "array",
+ * "items": {
+ * "name": "street",
+ * "type": "string"
+ * }
+ * }
+ * }
+ * ]
+ * }
+ **/
+ val mySchema = "{\n" + "\t\"name\": \"address\",\n" + "\t\"type\": \"record\",\n" +
+ "\t\"fields\": [\n" + "\t\t{\n" + "\t\t\t\"name\": \"name\",\n" +
+ "\t\t\t\"type\": \"string\"\n" + "\t\t},\n" + "\t\t{\n" +
+ "\t\t\t\"name\": \"age\",\n" + "\t\t\t\"type\": \"int\"\n" + "\t\t},\n" +
+ "\t\t{\n" + "\t\t\t\"name\": \"address\",\n" + "\t\t\t\"type\": {\n" +
+ "\t\t\t\t\"type\": \"array\",\n" + "\t\t\t\t\"items\": {\n" +
+ "\t\t\t\t\t\"name\": \"street\",\n" +
+ "\t\t\t\t\t\"type\": \"string\"\n" + "\t\t\t\t}\n" + "\t\t\t}\n" +
+ "\t\t}\n" + "\t]\n" + "}"
+ /**
+ * {
+ * "name": "bob",
+ * "age": 10,
+ * "address": [
+ * "abc", "def"
+ * ]
+ * }
+ **/
+ val json: String = "{\n" + "\t\"name\": \"bob\",\n" + "\t\"age\": 10,\n" +
+ "\t\"address\": [\n" + "\t\t\"abc\", \"defg\"\n" + "\t]\n" + "}"
+ // conversion to GenericData.Record
+ val nn = new org.apache.avro.Schema.Parser().parse(mySchema)
+ val converter = new JsonAvroConverter
+ val record = converter
+ .convertToGenericDataRecord(json.getBytes(CharEncoding.UTF_8), nn)
+ val fields = new Array[Field](3)
+ fields(0) = new Field("name", DataTypes.STRING)
+ fields(1) = new Field("age", DataTypes.INT)
+ // fields[1] = new Field("age", DataTypes.INT);
--- End diff --

Done

---

[GitHub] carbondata pull request #2261: [CARBONDATA-2430][SDK] Reshuffling of Columns...

In reply to this post by qiuchenjian-2

Github user sounakr commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2261#discussion_r186057854

--- Diff: core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java ---
@@ -135,25 +157,41 @@ public ColumnSchema addColumn(StructField field, boolean isSortColumn) {
newColumn.setColumnReferenceId(newColumn.getColumnUniqueId());
newColumn.setEncodingList(createEncoding(field.getDataType(), isSortColumn));
if (field.getDataType().isComplexType()) {
- newColumn.setNumberOfChild(((StructType) field.getDataType()).getFields().size());
+ if (field.getDataType().getName().equalsIgnoreCase("ARRAY")) {
+ newColumn.setNumberOfChild(1);
+ } else {
+ newColumn.setNumberOfChild(((StructType) field.getDataType()).getFields().size());
+ }
}
if (DataTypes.isDecimal(field.getDataType())) {
DecimalType decimalType = (DecimalType) field.getDataType();
newColumn.setPrecision(decimalType.getPrecision());
newColumn.setScale(decimalType.getScale());
}
if (!isSortColumn) {
- otherColumns.add(newColumn);
+ if (!newColumn.isDimensionColumn()) {
+ measures.add(newColumn);
+ } else if (DataTypes.isStructType(field.getDataType()) ||
+ DataTypes.isArrayType(field.getDataType()) || isComplexChild) {
--- End diff --

Done

---

[GitHub] carbondata pull request #2261: [CARBONDATA-2430][SDK] Reshuffling of Columns...

In reply to this post by qiuchenjian-2

Github user sounakr commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2261#discussion_r186058580

--- Diff: integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala ---
@@ -761,9 +847,29 @@ class TestNonTransactionalCarbonTable extends QueryTest with BeforeAndAfterAll {
sql("select * from sdkOutputTable").show(false)

checkAnswer(sql("select * from sdkOutputTable"), Seq(
- Row("bob", "10", Row("abc","bang")),
- Row("bob", "10", Row("abc","bang")),
- Row("bob", "10", Row("abc","bang"))))
+ Row("bob", 10, Row("abc","bang")),
+ Row("bob", 10, Row("abc","bang")),
+ Row("bob", 10, Row("abc","bang"))))
+
+ sql("DROP TABLE sdkOutputTable")
+ // drop table should not delete the files
+ assert(new File(writerPath).exists())
--- End diff --

done

---

[GitHub] carbondata pull request #2261: [CARBONDATA-2430][SDK] Reshuffling of Columns...

In reply to this post by qiuchenjian-2

Github user sounakr commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2261#discussion_r186058667

--- Diff: core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java ---
@@ -169,11 +186,11 @@ private void checkRepeatColumnName(StructField field) {
throw new IllegalArgumentException("column name already exists");
}
}
- for (ColumnSchema column : otherColumns) {
- if (column.getColumnName().equalsIgnoreCase(field.getFieldName())) {
- throw new IllegalArgumentException("column name already exists");
- }
- }
+// for (ColumnSchema column : otherColumns) {
--- End diff --

done

---

[GitHub] carbondata pull request #2261: [CARBONDATA-2430][SDK] Reshuffling of Columns...

In reply to this post by qiuchenjian-2

Github user sounakr commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2261#discussion_r186059780

--- Diff: integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala ---
@@ -604,31 +604,58 @@ abstract class CarbonDDLSqlParser extends AbstractCarbonSparkSQLParser {
tableProperties.get(CarbonCommonConstants.DICTIONARY_EXCLUDE).get.split(',').map(_.trim)
dictExcludeCols
.foreach { dictExcludeCol =>
- if (!fields.exists(x => x.column.equalsIgnoreCase(dictExcludeCol))) {
+ if (!checkFields(fields, dictExcludeCol)) {
val errormsg = "DICTIONARY_EXCLUDE column: " + dictExcludeCol +
" does not exist in table. Please check create table statement."
throw new MalformedCarbonCommandException(errormsg)
} else {
- val dataType = fields.find(x =>
- x.column.equalsIgnoreCase(dictExcludeCol)).get.dataType.get
- if (isComplexDimDictionaryExclude(dataType)) {
- val errormsg = "DICTIONARY_EXCLUDE is unsupported for complex datatype column: " +
- dictExcludeCol
- throw new MalformedCarbonCommandException(errormsg)
- } else if (!isDataTypeSupportedForDictionary_Exclude(dataType)) {
+ val dataType = findField(fields, dictExcludeCol).get.dataType.get
+ if (!isDataTypeSupportedForDictionary_Exclude(dataType)) {
val errorMsg = "DICTIONARY_EXCLUDE is unsupported for " + dataType.toLowerCase() +
" data type column: " + dictExcludeCol
throw new MalformedCarbonCommandException(errorMsg)
}
}
}
}
+
+
+ def checkFields(y: Seq[Field], colToMatch: String): Boolean = {
+ y.exists { fld =>
+ if (fld.column.equalsIgnoreCase(colToMatch)) {
+ true
+ } else if (fld.children.isDefined && fld.children.get != null) {
+ checkFields(fld.children.get, colToMatch)
+ } else {
+ false
+ }
+ }
+ }
+
+ def findField(y: Seq[Field], colToMatch: String): Option[Field] = {
--- End diff --

Done

---

[GitHub] carbondata issue #2261: [CARBONDATA-2430][SDK] Reshuffling of Columns given ...

In reply to this post by qiuchenjian-2

Github user sounakr commented on the issue:

https://github.com/apache/carbondata/pull/2261

Retest this please

---

[GitHub] carbondata issue #2261: [CARBONDATA-2430][SDK] Reshuffling of Columns given ...

In reply to this post by qiuchenjian-2

Github user CarbonDataQA commented on the issue:

https://github.com/apache/carbondata/pull/2261

Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/5648/

---

[GitHub] carbondata issue #2261: [CARBONDATA-2430][SDK] Reshuffling of Columns given ...

In reply to this post by qiuchenjian-2

Github user CarbonDataQA commented on the issue:

https://github.com/apache/carbondata/pull/2261

Build Success with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/4488/

---

[GitHub] carbondata issue #2261: [CARBONDATA-2430][SDK] Reshuffling of Columns given ...

In reply to this post by qiuchenjian-2

Github user ravipesala commented on the issue:

https://github.com/apache/carbondata/pull/2261

SDV Build Success , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/4724/

---

[GitHub] carbondata issue #2261: [CARBONDATA-2430][SDK] Reshuffling of Columns given ...

In reply to this post by qiuchenjian-2

Github user CarbonDataQA commented on the issue:

https://github.com/apache/carbondata/pull/2261

Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/5655/

---

[GitHub] carbondata issue #2261: [CARBONDATA-2430][SDK] Reshuffling of Columns given ...

In reply to this post by qiuchenjian-2

Github user CarbonDataQA commented on the issue:

https://github.com/apache/carbondata/pull/2261

Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/5657/

---

[GitHub] carbondata issue #2261: [CARBONDATA-2430][SDK] Reshuffling of Columns given ...

In reply to this post by qiuchenjian-2

Github user CarbonDataQA commented on the issue:

https://github.com/apache/carbondata/pull/2261

Build Failed with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/4497/

---

[GitHub] carbondata issue #2261: [CARBONDATA-2430][SDK] Reshuffling of Columns given ...

In reply to this post by qiuchenjian-2

Github user ravipesala commented on the issue:

https://github.com/apache/carbondata/pull/2261

SDV Build Success , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/4732/

---

[GitHub] carbondata issue #2261: [CARBONDATA-2430][SDK] Reshuffling of Columns given ...

In reply to this post by qiuchenjian-2

Github user ravipesala commented on the issue:

https://github.com/apache/carbondata/pull/2261

SDV Build Success , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/4734/

---

[GitHub] carbondata issue #2261: [CARBONDATA-2430][SDK] Reshuffling of Columns given ...

In reply to this post by qiuchenjian-2

Github user sounakr commented on the issue:

https://github.com/apache/carbondata/pull/2261

Retest this please

---

[GitHub] carbondata issue #2261: [CARBONDATA-2430][SDK] Reshuffling of Columns given ...

In reply to this post by qiuchenjian-2

Github user ravipesala commented on the issue:

https://github.com/apache/carbondata/pull/2261

@sounakr Building schema should be common across spark-integration and sdk, there should not be two paths. I feel it is better to refactor to use common code.

---

[GitHub] carbondata issue #2261: [CARBONDATA-2430][SDK] Reshuffling of Columns given ...

In reply to this post by qiuchenjian-2

Github user CarbonDataQA commented on the issue:

https://github.com/apache/carbondata/pull/2261

Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/5687/

---

[GitHub] carbondata issue #2261: [CARBONDATA-2430][SDK] Reshuffling of Columns given ...

In reply to this post by qiuchenjian-2

Github user CarbonDataQA commented on the issue:

https://github.com/apache/carbondata/pull/2261

Build Failed with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/4527/

---

[GitHub] carbondata issue #2261: [CARBONDATA-2430][SDK] Reshuffling of Columns given ...

In reply to this post by qiuchenjian-2

Github user sounakr commented on the issue:

https://github.com/apache/carbondata/pull/2261

Retest this please.

---

[GitHub] carbondata issue #2261: [CARBONDATA-2430][SDK] Reshuffling of Columns given ...

In reply to this post by qiuchenjian-2

Github user sounakr commented on the issue:

https://github.com/apache/carbondata/pull/2261

@ravipesala Refactoring of Building schema will be done as part of a separate PR as the changes are more and can impact the CarbonTable creation.

---

1234