[jira] [Created] (CARBONDATA-3300) ClassNotFoundException when using UDF on spark-shell

classic Classic list List threaded Threaded
1 message Options
Reply | Threaded
Open this post in threaded view
|

[jira] [Created] (CARBONDATA-3300) ClassNotFoundException when using UDF on spark-shell

Akash R Nilugal (Jira)
Kunal Kapoor created CARBONDATA-3300:
----------------------------------------

             Summary: ClassNotFoundException when using UDF on spark-shell
                 Key: CARBONDATA-3300
                 URL: https://issues.apache.org/jira/browse/CARBONDATA-3300
             Project: CarbonData
          Issue Type: New Feature
            Reporter: Kunal Kapoor
            Assignee: Kunal Kapoor


create table x1 (imei string, deviceInformationId int, mac string, productdate timestamp, updatetime timestamp, gamePointId double, contractNumber double) STORED BY 'org.apache.carbondata.format';

Load the data to x1:
LOAD DATA inpath 'hdfs://localhost/x1_without_header.csv' into table x1 options('DELIMITER'=',', 'QUOTECHAR'='"','FILEHEADER'='imei, deviceinformationid,mac, productdate,updatetime, gamepointid,contractnumber');

Create another table res_1 using following sql:
create table res_1 as select * from x1 limit 2;

2. Login spark-shell, register udf and run the join query

import java.sql.Date
import java.sql.Timestamp;
spark.udf.register("castTimestampToDate", (x: Timestamp) =>
  try {
    Some(new Date(x.getTime - x.toLocalDateTime.getHour * 3600 * 1000L - x.toLocalDateTime.getMinute * 60 * 1000L - x.toLocalDateTime.getSecond * 1000L))
  } catch {
    case _: Exception => None
  }
)

spark.sql("select res_1.* from x1, res_1 where castTimestampToDate(x1.productdate) = castTimestampToDate(res_1.productdate) and x1.deviceInformationId = res_1.deviceInformationId").show(false)

 

java.lang.RuntimeException: Error while reading filter expression
  at org.apache.carbondata.hadoop.api.CarbonInputFormat.getFilterPredicates(CarbonInputFormat.java:392)
  at org.apache.carbondata.hadoop.api.CarbonTableInputFormat.getSplits(CarbonTableInputFormat.java:204)
  at org.apache.carbondata.spark.rdd.CarbonScanRDD.internalGetPartitions(CarbonScanRDD.scala:139)
  at org.apache.carbondata.spark.rdd.CarbonRDD.getPartitions(CarbonRDD.scala:66)
  at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:253)
  at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:251)
  at scala.Option.getOrElse(Option.scala:121)
  at org.apache.spark.rdd.RDD.partitions(RDD.scala:251)
  at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:46)
  at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:253)
  at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:251)
  at scala.Option.getOrElse(Option.scala:121)
  at org.apache.spark.rdd.RDD.partitions(RDD.scala:251)
  at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:46)
  at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:253)
  at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:251)
  at scala.Option.getOrElse(Option.scala:121)
  at org.apache.spark.rdd.RDD.partitions(RDD.scala:251)
  at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:46)
  at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:253)
  at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:251)
  at scala.Option.getOrElse(Option.scala:121)
  at org.apache.spark.rdd.RDD.partitions(RDD.scala:251)
  at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:340)
  at org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:38)
  at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collectFromPlan(Dataset.scala:3278)
  at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2489)
  at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2489)
  at org.apache.spark.sql.Dataset$$anonfun$52.apply(Dataset.scala:3259)
  at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:77)
  at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3258)
  at org.apache.spark.sql.Dataset.head(Dataset.scala:2489)
  at org.apache.spark.sql.Dataset.take(Dataset.scala:2703)
  at org.apache.spark.sql.Dataset.showString(Dataset.scala:254)
  at org.apache.spark.sql.Dataset.show(Dataset.scala:725)
  at org.apache.spark.sql.Dataset.show(Dataset.scala:702)
  ... 49 elided
Caused by: java.io.IOException: Could not read object
  at org.apache.carbondata.core.util.ObjectSerializationUtil.convertStringToObject(ObjectSerializationUtil.java:100)
  at org.apache.carbondata.hadoop.api.CarbonInputFormat.getFilterPredicates(CarbonInputFormat.java:389)
  ... 84 more
Caused by: java.lang.ClassNotFoundException: $anonfun$1
  at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
  at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
  at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:338)
  at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
  at java.lang.Class.forName0(Native Method)
  at java.lang.Class.forName(Class.java:348)
  at java.io.ObjectInputStream.resolveClass(ObjectInputStream.java:682)
  at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1859)
  at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1745)
  at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2033)
  at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1567)
  at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2278)
  at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2202)
  at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2060)
  at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1567)
  at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2278)
  at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2202)
  at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2060)
  at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1567)
  at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2278)
  at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2202)
  at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2060)
  at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1567)
  at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2278)
  at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2202)
  at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2060)
  at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1567)
  at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2278)
  at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2202)
  at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2060)
  at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1567)
  at java.io.ObjectInputStream.readObject(ObjectInputStream.java:427)
  at java.util.ArrayList.readObject(ArrayList.java:797)
  at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
  at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
  at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
  at java.lang.reflect.Method.invoke(Method.java:498)
  at java.io.ObjectStreamClass.invokeReadObject(ObjectStreamClass.java:1158)
  at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2169)
  at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2060)
  at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1567)
  at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2278)
  at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2202)
  at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2060)
  at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1567)
  at java.io.ObjectInputStream.readObject(ObjectInputStream.java:427)
  at java.util.ArrayList.readObject(ArrayList.java:797)
  at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
  at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
  at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
  at java.lang.reflect.Method.invoke(Method.java:498)
  at java.io.ObjectStreamClass.invokeReadObject(ObjectStreamClass.java:1158)
  at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2169)
  at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2060)
  at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1567)
  at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2278)
  at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2202)
  at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2060)
  at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1567)
  at java.io.ObjectInputStream.readObject(ObjectInputStream.java:427)
  at org.apache.carbondata.core.util.ObjectSerializationUtil.convertStringToObject(ObjectSerializationUtil.java:98)
  ... 85 more



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)