nihal0107 commented on a change in pull request #3819: URL: https://github.com/apache/carbondata/pull/3819#discussion_r479925640 ########## File path: sdk/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java ########## @@ -594,6 +607,446 @@ public CarbonWriterBuilder withJsonInput(Schema carbonSchema) { return this; } + private void validateCsvFiles() throws IOException { + CarbonFile[] dataFiles = this.extractDataFiles(CarbonCommonConstants.CSV_FILE_EXTENSION); + if (CollectionUtils.isEmpty(Arrays.asList(dataFiles))) { + throw new RuntimeException("CSV files can't be empty."); + } + for (CarbonFile dataFile : dataFiles) { + try { + CsvParser csvParser = SDKUtil.buildCsvParser(this.hadoopConf); + csvParser.beginParsing(FileFactory.getDataInputStream(dataFile.getPath(), + -1, this.hadoopConf)); + } catch (IllegalArgumentException ex) { + if (ex.getCause() instanceof FileNotFoundException) { + throw new FileNotFoundException("File " + dataFile + + " not found to build carbon writer."); + } + throw ex; + } + } + this.dataFiles = dataFiles; + } + + /** + * to build a {@link CarbonWriter}, which accepts loading CSV files. + * + * @param filePath absolute path under which files should be loaded. + * @return CarbonWriterBuilder + */ + public CarbonWriterBuilder withCsvPath(String filePath) throws IOException { + this.validateFilePath(filePath); + this.filePath = filePath; + this.setIsDirectory(filePath); + this.withCsvInput(); + this.validateCsvFiles(); + return this; + } + + /** + * to build a {@link CarbonWriter}, which accepts CSV files directory and + * list of file which has to be loaded. + * + * @param filePath directory where the CSV file exists. + * @param fileList list of files which has to be loaded. + * @return CarbonWriterBuilder + */ + public CarbonWriterBuilder withCsvPath(String filePath, List<String> fileList) + throws IOException { + this.fileList = fileList; + this.withCsvPath(filePath); + return this; + } + + private void validateJsonFiles() throws IOException { + CarbonFile[] dataFiles = this.extractDataFiles(CarbonCommonConstants.JSON_FILE_EXTENSION); + for (CarbonFile dataFile : dataFiles) { + try { + new JSONParser().parse(SDKUtil.buildJsonReader(dataFile, this.hadoopConf)); + } catch (FileNotFoundException ex) { + throw new FileNotFoundException("File " + dataFile + " not found to build carbon writer."); + } catch (ParseException ex) { + throw new RuntimeException("File " + dataFile + " is not in json format."); + } + } + this.dataFiles = dataFiles; + } + + /** + * to build a {@link CarbonWriter}, which accepts loading JSON files. + * + * @param filePath absolute path under which files should be loaded. + * @return CarbonWriterBuilder + */ + public CarbonWriterBuilder withJsonPath(String filePath) throws IOException { + this.validateFilePath(filePath); + this.filePath = filePath; + this.setIsDirectory(filePath); + this.withJsonInput(); + this.validateJsonFiles(); + return this; + } + + /** + * to build a {@link CarbonWriter}, which accepts JSON file directory and + * list of file which has to be loaded. + * + * @param filePath directory where the json file exists. + * @param fileList list of files which has to be loaded. + * @return CarbonWriterBuilder + * @throws IOException + */ + public CarbonWriterBuilder withJsonPath(String filePath, List<String> fileList) + throws IOException { + this.fileList = fileList; + this.withJsonPath(filePath); + return this; + } + + private void validateFilePath(String filePath) { + if (StringUtils.isEmpty(filePath)) { + throw new IllegalArgumentException("filePath can not be empty"); + } + } + + /** + * to build a {@link CarbonWriter}, which accepts loading Parquet files. + * + * @param filePath absolute path under which files should be loaded. + * @return CarbonWriterBuilder + */ + public CarbonWriterBuilder withParquetPath(String filePath) throws IOException { + this.validateFilePath(filePath); + this.filePath = filePath; + this.setIsDirectory(filePath); + this.writerType = WRITER_TYPE.PARQUET; + this.validateParquetFiles(); + return this; + } + + private void setIsDirectory(String filePath) { + if (this.hadoopConf == null) { + this.hadoopConf = new Configuration(FileFactory.getConfiguration()); Review comment: I have checked the build() method of the same file and their hadoopConf is building like this. please let me know if this is not the correct way. ########## File path: sdk/sdk/src/main/java/org/apache/carbondata/sdk/file/utils/SDKUtil.java ########## @@ -79,4 +98,75 @@ public static ArrayList listFiles(String sourceImageFolder, return (Object[]) input[i]; } + public static List<CarbonFile> extractFilesFromFolder(String path, + String suf, Configuration hadoopConf) { + List dataFiles = listFiles(path, suf, hadoopConf); + List<CarbonFile> carbonFiles = new ArrayList<>(); + for (Object dataFile: dataFiles) { + carbonFiles.add(FileFactory.getCarbonFile(dataFile.toString(), hadoopConf)); + } + if (CollectionUtils.isEmpty(dataFiles)) { + throw new RuntimeException("No file found at given location. Please provide" + + "the correct folder location."); + } + return carbonFiles; + } + + public static DataFileStream<GenericData.Record> buildAvroReader(CarbonFile carbonFile, + Configuration configuration) throws IOException { + try { + GenericDatumReader<GenericData.Record> genericDatumReader = + new GenericDatumReader<>(); + DataFileStream<GenericData.Record> avroReader = + new DataFileStream<>(FileFactory.getDataInputStream(carbonFile.getPath(), + -1, configuration), genericDatumReader); + return avroReader; + } catch (FileNotFoundException ex) { + throw new FileNotFoundException("File " + carbonFile.getPath() + + " not found to build carbon writer."); + } catch (IOException ex) { + if (ex.getMessage().contains("Not a data file")) { + throw new RuntimeException("File " + carbonFile.getPath() + " is not in avro format."); + } else { + throw ex; + } + } + } + + public static Reader buildOrcReader(String path, Configuration conf) throws IOException { + try { + Reader orcReader = OrcFile.createReader(new Path(path), + OrcFile.readerOptions(conf)); + return orcReader; + } catch (FileFormatException ex) { + throw new RuntimeException("File " + path + " is not in ORC format"); + } catch (FileNotFoundException ex) { + throw new FileNotFoundException("File " + path + " not found to build carbon writer."); + } + } + + public static ParquetReader<GenericRecord> buildPqrquetReader(String path, Configuration conf) Review comment: Done ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] |
Free forum by Nabble | Edit this page |