[jira] [Commented] (CARBONDATA-3248) Spark carbon file format can't read transactional table segment path

classic Classic list List threaded Threaded
1 message Options
Reply | Threaded
Open this post in threaded view
|

[jira] [Commented] (CARBONDATA-3248) Spark carbon file format can't read transactional table segment path

Akash R Nilugal (Jira)

    [ https://issues.apache.org/jira/browse/CARBONDATA-3248?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16741834#comment-16741834 ]

xubo245 commented on CARBONDATA-3248:
-------------------------------------

LOAD DATA is not supported for datasource tables:

> Spark carbon file format can't read transactional table segment path
> --------------------------------------------------------------------
>
>                 Key: CARBONDATA-3248
>                 URL: https://issues.apache.org/jira/browse/CARBONDATA-3248
>             Project: CarbonData
>          Issue Type: Improvement
>            Reporter: xubo245
>            Priority: Major
>
> Code:
> {code:java}
>         val tableNameForAllTypeOriginal = "alluxio_table_all_type_original"
>         val tableNameForAllType = "alluxio_table_all_type"
>         try {
>             sql("DROP TABLE IF EXISTS " + tableNameForAllTypeOriginal)
>             sql(
>                 s"""create table $tableNameForAllTypeOriginal(
>                    | smallIntField SMALLINT,
>                    | intField INT,
>                    | bigIntField BIGINT,
>                    | floatField FLOAT,
>                    | doubleField DOUBLE,
>                    | decimalField DECIMAL(25, 4),
>                    | timestampField TIMESTAMP,
>                    | dateField DATE,
>                    | stringField STRING,
>                    | varcharField VARCHAR(10),
>                    | charField CHAR(10),
>                    | arrayField ARRAY<string>,
>                    | structField STRUCT<col1:STRING, col2:STRING, col3:STRING>,
>                    | booleanField BOOLEAN)
>                    | using carbondata
>              """.stripMargin)
>             val path = localAlluxioCluster.getMasterURI + allDataTypeRemote
>             try {
>                 sql(s"LOAD DATA LOCAL INPATH '$path' INTO TABLE $tableNameForAllTypeOriginal " +
>                         "options('COMPLEX_DELIMITER_LEVEL_1'='$','COMPLEX_DELIMITER_LEVEL_2'=':')")
>                 sql(s"select * from $tableNameForAllTypeOriginal").show()
>                 assert(false)
>                 // Don't support like that, TODO: to analysis whether can support it
>             } catch {
>                 case e: Exception =>
> //                    e.printStackTrace()
>                     assert(true)
>             } finally {
>                 sql("DROP TABLE IF EXISTS " + tableNameForAllTypeOriginal)
>             }
>             println("\n\n\n\n")
>             sql("DROP TABLE IF EXISTS " + tableNameForAllType)
>             sql("DROP TABLE IF EXISTS " + tableNameForAllTypeOriginal)
>             sql(
>                 s"""create table $tableNameForAllTypeOriginal(
>                    | smallIntField SMALLINT,
>                    | intField INT,
>                    | bigIntField BIGINT,
>                    | floatField FLOAT,
>                    | doubleField DOUBLE,
>                    | decimalField DECIMAL(25, 4),
>                    | timestampField TIMESTAMP,
>                    | dateField DATE,
>                    | stringField STRING,
>                    | varcharField VARCHAR(10),
>                    | charField CHAR(10),
>                    | arrayField ARRAY<string>,
>                    | structField STRUCT<col1:STRING, col2:STRING, col3:STRING>,
>                    | booleanField BOOLEAN)
>                    | stored by 'carbondata'
>              """.stripMargin)
>             sql(s"LOAD DATA LOCAL INPATH '$path' INTO TABLE $tableNameForAllTypeOriginal " +
>                     "options('COMPLEX_DELIMITER_LEVEL_1'='$','COMPLEX_DELIMITER_LEVEL_2'=':')")
>             fileSystemShell.run("ls", carbonAndAlluxio + "/default")
>             val externalTablePath = localAlluxioCluster.getMasterURI + carbonAndAlluxio + "/default/" + tableNameForAllTypeOriginal + "/Fact/Part0/Segment_0"
>             fileSystemShell.run("ls",externalTablePath)
>             sql(s"CREATE TABLE $tableNameForAllType using carbon" +
>                     s" LOCATION '$externalTablePath'")
> {code}
> Exception:
> {code:java}
> 2019-01-14 15:09:10 AUDIT audit:93 - {"time":"January 13, 2019 11:09:10 PM PST","username":"xubo","opName":"CREATE TABLE","opId":"15248775671301","opStatus":"SUCCESS","opTime":"140 ms","table":"default.alluxio_table_all_type_original","extraInfo":{"bad_record_path":"","local_dictionary_enable":"true","external":"false","sort_columns":"","comment":""}}
> 2019-01-14 15:09:10 AUDIT audit:72 - {"time":"January 13, 2019 11:09:10 PM PST","username":"xubo","opName":"LOAD DATA","opId":"15248921660444","opStatus":"START"}
> 2019-01-14 15:09:10 AUDIT audit:93 - {"time":"January 13, 2019 11:09:10 PM PST","username":"xubo","opName":"LOAD DATA","opId":"15248921660444","opStatus":"SUCCESS","opTime":"511 ms","table":"default.alluxio_table_all_type_original","extraInfo":{"SegmentId":"0","DataSize":"5.07KB","IndexSize":"2.48KB"}}
> drwxr-xr-x xubo           staff                        3       PERSISTED 01-13-2019 23:09:10:129  DIR /CarbonAndAlluxio/default/alluxio_table_all_type_original
> -rw-r--r-- xubo           staff                     2588       PERSISTED 01-13-2019 23:09:10:498 100% /CarbonAndAlluxio/default/alluxio_table_all_type_original/Fact/Part0/Segment_0/0_1547449750488.carbonindexmerge
> -rw-r--r-- xubo           staff                     5187       PERSISTED 01-13-2019 23:09:10:303   0% /CarbonAndAlluxio/default/alluxio_table_all_type_original/Fact/Part0/Segment_0/part-0-0_batchno0-0-0-1547449750082.carbondata
> 2019-01-14 15:09:10 ERROR AbstractQueryExecutor:280 - Schema of alluxio://xubodembp:52203/CarbonAndAlluxio/default/alluxio_table_all_type_original/Fact/Part0/Segment_0/part-0-0_batchno0-0-0-1547449750082.carbondata doesn't match with the table's schema
> 2019-01-14 15:09:10 ERROR Executor:91 - Exception in task 0.0 in stage 5.0 (TID 5)
> java.io.IOException: All the files doesn't have same schema. Unsupported operation on nonTransactional table. Check logs.
> at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.updateColumns(AbstractQueryExecutor.java:281)
> at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.getDataBlocks(AbstractQueryExecutor.java:234)
> at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.initQuery(AbstractQueryExecutor.java:138)
> at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.getBlockExecutionInfos(AbstractQueryExecutor.java:406)
> at org.apache.carbondata.core.scan.executor.impl.DetailQueryExecutor.execute(DetailQueryExecutor.java:47)
> at org.apache.carbondata.hadoop.CarbonRecordReader.initialize(CarbonRecordReader.java:112)
> at org.apache.spark.sql.carbondata.execution.datasources.SparkCarbonFileFormat$$anonfun$buildReaderWithPartitionValues$2.apply(SparkCarbonFileFormat.scala:427)
> at org.apache.spark.sql.carbondata.execution.datasources.SparkCarbonFileFormat$$anonfun$buildReaderWithPartitionValues$2.apply(SparkCarbonFileFormat.scala:381)
> at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:124)
> at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:174)
> at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:105)
> at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.processNext(Unknown Source)
> at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
> at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$8$$anon$1.hasNext(WholeStageCodegenExec.scala:395)
> at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:234)
> at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:228)
> at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:827)
> at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:827)
> at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
> at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
> at org.apache.spark.scheduler.Task.run(Task.scala:108)
> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:338)
> at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> at java.lang.Thread.run(Thread.java:748)
> 2019-01-14 15:09:10 ERROR TaskSetManager:70 - Task 0 in stage 5.0 failed 1 times; aborting job
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)