[jira] [Commented] (CARBONDATA-1650) load data into hive table fail

classic Classic list List threaded Threaded
1 message Options
Reply | Threaded
Open this post in threaded view
|

[jira] [Commented] (CARBONDATA-1650) load data into hive table fail

Akash R Nilugal (Jira)

    [ https://issues.apache.org/jira/browse/CARBONDATA-1650?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16222115#comment-16222115 ]

xujie commented on CARBONDATA-1650:
-----------------------------------


scala> spark.sql("desc formatted target_table").show
+--------------------+--------------------+-------+
|            col_name|           data_type|comment|
+--------------------+--------------------+-------+
|                  id|                 int|   null|
|                name|              string|   null|
|                city|              string|   null|
|                    |                    |       |
|# Detailed Table ...|                    |       |
|           Database:|             default|       |
|              Owner:|              hadoop|       |
|        Create Time:|Wed Oct 25 14:20:...|       |
|   Last Access Time:|Thu Jan 01 08:00:...|       |
|           Location:|hdfs://namenodeb:...|       |
|         Table Type:|             MANAGED|       |
|   Table Parameters:|                    |       |
|         rawDataSize|                  -1|       |
|            numFiles|                   0|       |
|  transient_lastD...|          1509091869|       |
|  last_modified_time|          1509088050|       |
|    last_modified_by|              hadoop|       |
|           totalSize|                   0|       |
|  COLUMN_STATS_AC...|               false|       |
|             numRows|                  -1|       |

> load data into hive table fail
> ------------------------------
>
>                 Key: CARBONDATA-1650
>                 URL: https://issues.apache.org/jira/browse/CARBONDATA-1650
>             Project: CarbonData
>          Issue Type: Bug
>          Components: hive-integration
>    Affects Versions: 1.2.0
>         Environment: hive.version:1.1.0-cdh5.10.0
> hadoop:version:2.6.0-cdh5.10.0
>            Reporter: xujie
>            Priority: Critical
>
> import org.apache.spark.sql.SparkSession
> import org.apache.spark.sql.CarbonSession._
> val rootPath = "hdfs://namenodeb:8020/app/carbondata"
> val storeLocation = s"$rootPath/store"
> val warehouse = s"$rootPath/warehouse"
> val metastoredb = s"$rootPath/metastore_db"
> val carbon = SparkSession.builder().enableHiveSupport().config("spark.sql.warehouse.dir", warehouse).config(org.apache.carbondata.core.constants.CarbonCommonConstants.STORE_LOCATION, storeLocation).getOrCreateCarbonSession(storeLocation, metastoredb)
> import org.apache.spark.sql.types._
> import org.apache.spark.sql.Row
> val rdd = sc.textFile("/data/home/hadoop/test.txt");
> val schemaString = "id name city"
> val fields = schemaString.split(" ").map(fieldName => StructField(fieldName, StringType, nullable = true))
> val schema = StructType(fields)
> val rowRDD = rdd.map(_.split(",")).map(attributes => Row(attributes(0),attributes(1),attributes(2)))
> val peopleDF = spark.createDataFrame(rowRDD, schema)
> peopleDF.createOrReplaceTempView("tmp_table")
> spark.sql("insert into target_table SELECT * FROM tmp_table")
> java.lang.RuntimeException: Failed to add entry in table status for default.target_table
>   at scala.sys.package$.error(package.scala:27)
>   at org.apache.carbondata.spark.util.CommonUtil$.readAndUpdateLoadProgressInTableMeta(CommonUtil.scala:533)
>   at org.apache.spark.sql.execution.command.LoadTable.processData(carbonTableSchema.scala:928)
>   at org.apache.spark.sql.execution.command.LoadTable.run(carbonTableSchema.scala:754)
>   at org.apache.spark.sql.execution.command.LoadTableByInsert.processData(carbonTableSchema.scala:651)
>   at org.apache.spark.sql.execution.command.LoadTableByInsert.run(carbonTableSchema.scala:637)
>   at org.apache.spark.sql.CarbonDatasourceHadoopRelation.insert(CarbonDatasourceHadoopRelation.scala:98)
>   at org.apache.spark.sql.execution.datasources.InsertIntoDataSourceCommand.run(InsertIntoDataSourceCommand.scala:43)
>   at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:58)
>   at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:56)
>   at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:74)
>   at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114)
>   at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114)
>   at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135)
>   at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
>   at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132)
>   at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113)
>   at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:92)
>   at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:92)
>   at org.apache.spark.sql.Dataset.<init>(Dataset.scala:185)
>   at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:64)
>   at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:592)
>   ... 52 elided



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)