Babulal created CARBONDATA-2925:
-----------------------------------
Summary: Wrong data displayed for spark file format if carbon file has mtuiple blocklet
Key: CARBONDATA-2925
URL:
https://issues.apache.org/jira/browse/CARBONDATA-2925 Project: CarbonData
Issue Type: Bug
Reporter: Babulal
// LoadData
def loadData(spark: SparkSession): Unit ={
spark.experimental.extraOptimizations=Seq(new CarbonFileIndexReplaceRule())
val fields=new Array[Field](8)
fields(0)=new Field("myid",DataTypes.INT);
fields(1)=new Field("event_id",DataTypes.STRING);
fields(2)=new Field("eve_time",DataTypes.DATE);
fields(3)=new Field("ingestion_time",DataTypes.TIMESTAMP);
fields(4)=new Field("alldate",DataTypes.createArrayType(DataTypes.DATE));
fields(5)=new Field("subject",DataTypes.STRING);
fields(6)=new Field("from_email",DataTypes.STRING);
fields(7)=new Field("sal",DataTypes.DOUBLE);
import scala.collection.JavaConverters._
val options=Map("bad_records_action"->"FORCE","complex_delimiter_level_1"->"$").asJava
val writer=CarbonWriter.builder().outputPath("D:/data/sdkpath/1").sortBy(Array("myid","ingestion_time","event_id")).withLoadOptions(options).buildWriterForCSVInput(new Schema(fields))
val timeF=new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
val date_F=new SimpleDateFormat("yyyy-MM-dd")
for(i<-0 to 5000000){
val time=new Date(System.currentTimeMillis())
writer.write(Array(""+i,"event_"+i,""+date_F.format(time),""+timeF.format(time),""+date_F.format(time)+"$"+date_F.format(time),"Subject_0","FromEmail",""+new Random().nextDouble()))
}
writer.close()
} // Query def queryUsingFormat(spark: SparkSession): Unit ={
spark.experimental.extraOptimizations=Seq(new CarbonFileIndexReplaceRule())
val df= spark.read.format("carbon").load("D:/data/sdkpath/1")
println("==============================="+df.count())
}
it is giving {color:#000000}2496000
{color}{color:#000000}Expected :-5000001{color}
{color:#000000} {color}
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)