Github user KanakaKumar commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2804#discussion_r231057030 --- Diff: store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonSchemaReader.java --- @@ -61,14 +65,122 @@ public static Schema readSchemaInSchemaFile(String schemaFilePath) throws IOExce return new Schema(schemaList); } + /** + * get carbondata/carbonindex file in path + * + * @param path carbon file path + * @return CarbonFile array + */ + private static CarbonFile[] getCarbonFile(String path, final String extension) + throws IOException { + String dataFilePath = path; + if (!(dataFilePath.contains(extension))) { + CarbonFile[] carbonFiles = FileFactory + .getCarbonFile(path) + .listFiles(new CarbonFileFilter() { + @Override + public boolean accept(CarbonFile file) { + if (file == null) { + return false; + } + return file.getName().endsWith(extension); + } + }); + if (carbonFiles == null || carbonFiles.length < 1) { + throw new IOException("Carbon file not exists."); + } + return carbonFiles; + } + return null; + } + + /** + * read schema from path, + * path can be folder path, carbonindex file path, and carbondata file path + * and will not check all files schema + * + * @param path file/folder path + * @return schema + * @throws IOException + */ + public static Schema readSchema(String path) throws IOException { + return readSchema(path, false); + } + + /** + * read schema from path, + * path can be folder path, carbonindex file path, and carbondata file path + * and user can decide whether check all files schema + * + * @param path file/folder path + * @param validateSchema whether check all files schema + * @return schema + * @throws IOException + */ + public static Schema readSchema(String path, boolean validateSchema) throws IOException { + if (path.endsWith(INDEX_FILE_EXT)) { + return readSchemaFromIndexFile(path); + } else if (path.endsWith(CARBON_DATA_EXT)) { + return readSchemaFromDataFile(path); + } else if (validateSchema) { + CarbonFile[] carbonIndexFiles = getCarbonFile(path, INDEX_FILE_EXT); + Schema schema; + if (carbonIndexFiles != null && carbonIndexFiles.length != 0) { + schema = readSchemaFromIndexFile(carbonIndexFiles[0].getAbsolutePath()); + for (int i = 1; i < carbonIndexFiles.length; i++) { + Schema schema2 = readSchemaFromIndexFile(carbonIndexFiles[i].getAbsolutePath()); + if (schema != schema2) { --- End diff -- use equals .. schema.equals(schema2) --- |
In reply to this post by qiuchenjian-2
Github user KanakaKumar commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2804#discussion_r231058670 --- Diff: store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonSchemaReader.java --- @@ -61,14 +65,122 @@ public static Schema readSchemaInSchemaFile(String schemaFilePath) throws IOExce return new Schema(schemaList); } + /** + * get carbondata/carbonindex file in path + * + * @param path carbon file path + * @return CarbonFile array + */ + private static CarbonFile[] getCarbonFile(String path, final String extension) + throws IOException { + String dataFilePath = path; + if (!(dataFilePath.contains(extension))) { + CarbonFile[] carbonFiles = FileFactory + .getCarbonFile(path) + .listFiles(new CarbonFileFilter() { + @Override + public boolean accept(CarbonFile file) { + if (file == null) { + return false; + } + return file.getName().endsWith(extension); + } + }); + if (carbonFiles == null || carbonFiles.length < 1) { + throw new IOException("Carbon file not exists."); + } + return carbonFiles; + } + return null; + } + + /** + * read schema from path, + * path can be folder path, carbonindex file path, and carbondata file path + * and will not check all files schema + * + * @param path file/folder path + * @return schema + * @throws IOException + */ + public static Schema readSchema(String path) throws IOException { + return readSchema(path, false); + } + + /** + * read schema from path, + * path can be folder path, carbonindex file path, and carbondata file path + * and user can decide whether check all files schema + * + * @param path file/folder path + * @param validateSchema whether check all files schema + * @return schema + * @throws IOException + */ + public static Schema readSchema(String path, boolean validateSchema) throws IOException { + if (path.endsWith(INDEX_FILE_EXT)) { + return readSchemaFromIndexFile(path); + } else if (path.endsWith(CARBON_DATA_EXT)) { + return readSchemaFromDataFile(path); + } else if (validateSchema) { + CarbonFile[] carbonIndexFiles = getCarbonFile(path, INDEX_FILE_EXT); + Schema schema; + if (carbonIndexFiles != null && carbonIndexFiles.length != 0) { + schema = readSchemaFromIndexFile(carbonIndexFiles[0].getAbsolutePath()); + for (int i = 1; i < carbonIndexFiles.length; i++) { + Schema schema2 = readSchemaFromIndexFile(carbonIndexFiles[i].getAbsolutePath()); + if (schema != schema2) { + throw new CarbonDataLoadingException("Schema is different between different files."); + } + } + CarbonFile[] carbonDataFiles = getCarbonFile(path, CARBON_DATA_EXT); + for (int i = 0; i < carbonDataFiles.length; i++) { + Schema schema2 = readSchemaFromDataFile(carbonDataFiles[i].getAbsolutePath()); + if (!schema.equals(schema2)) { + throw new CarbonDataLoadingException("Schema is different between different files."); + } + } + return schema; + } else { + throw new CarbonDataLoadingException("No carbonindex file in this path."); + } + } else { + String indexFilePath = getCarbonFile(path, INDEX_FILE_EXT)[0].getAbsolutePath(); + if (indexFilePath != null) { + return readSchemaFromIndexFile(indexFilePath); + } else { + String dataFilePath = getCarbonFile(path, CARBON_DATA_EXT)[0].getAbsolutePath(); --- End diff -- As per getCarbonFile(...) implementation, if there is no INDEX file found, it throws exception. So, there is no need of this else case ? --- |
In reply to this post by qiuchenjian-2
Github user KanakaKumar commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2804#discussion_r231059908 --- Diff: store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonSchemaReader.java --- @@ -61,14 +65,122 @@ public static Schema readSchemaInSchemaFile(String schemaFilePath) throws IOExce return new Schema(schemaList); } + /** + * get carbondata/carbonindex file in path + * + * @param path carbon file path + * @return CarbonFile array + */ + private static CarbonFile[] getCarbonFile(String path, final String extension) + throws IOException { + String dataFilePath = path; + if (!(dataFilePath.contains(extension))) { + CarbonFile[] carbonFiles = FileFactory + .getCarbonFile(path) + .listFiles(new CarbonFileFilter() { + @Override + public boolean accept(CarbonFile file) { + if (file == null) { + return false; + } + return file.getName().endsWith(extension); + } + }); + if (carbonFiles == null || carbonFiles.length < 1) { + throw new IOException("Carbon file not exists."); + } + return carbonFiles; + } + return null; --- End diff -- We can stick to one contract from the method. Either return the list or throw exception. Generally listing APIs should not return null, if this case is not expected, we can throw exception. --- |
In reply to this post by qiuchenjian-2
Github user KanakaKumar commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2804#discussion_r231060332 --- Diff: store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonSchemaReader.java --- @@ -144,4 +246,28 @@ public static Schema readSchemaInIndexFile(String indexFilePath) throws IOExcept } } + /** + * This method return the version details in formatted string by reading from carbondata file + * + * @param dataFilePath + * @return + * @throws IOException + */ + public static String getVersionDetails(String dataFilePath) throws IOException { --- End diff -- This complete method is displayed as removed and added again. Is it possible to avoid? --- |
In reply to this post by qiuchenjian-2
Github user KanakaKumar commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2804#discussion_r231061128 --- Diff: store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonSchemaReader.java --- @@ -61,14 +65,122 @@ public static Schema readSchemaInSchemaFile(String schemaFilePath) throws IOExce return new Schema(schemaList); } + /** + * get carbondata/carbonindex file in path + * + * @param path carbon file path + * @return CarbonFile array + */ + private static CarbonFile[] getCarbonFile(String path, final String extension) + throws IOException { + String dataFilePath = path; + if (!(dataFilePath.contains(extension))) { + CarbonFile[] carbonFiles = FileFactory + .getCarbonFile(path) + .listFiles(new CarbonFileFilter() { + @Override + public boolean accept(CarbonFile file) { + if (file == null) { + return false; + } + return file.getName().endsWith(extension); + } + }); + if (carbonFiles == null || carbonFiles.length < 1) { + throw new IOException("Carbon file not exists."); + } + return carbonFiles; + } + return null; + } + + /** + * read schema from path, + * path can be folder path, carbonindex file path, and carbondata file path + * and will not check all files schema + * + * @param path file/folder path + * @return schema + * @throws IOException + */ + public static Schema readSchema(String path) throws IOException { + return readSchema(path, false); + } + + /** + * read schema from path, + * path can be folder path, carbonindex file path, and carbondata file path + * and user can decide whether check all files schema + * + * @param path file/folder path + * @param validateSchema whether check all files schema + * @return schema + * @throws IOException + */ + public static Schema readSchema(String path, boolean validateSchema) throws IOException { + if (path.endsWith(INDEX_FILE_EXT)) { + return readSchemaFromIndexFile(path); + } else if (path.endsWith(CARBON_DATA_EXT)) { + return readSchemaFromDataFile(path); + } else if (validateSchema) { + CarbonFile[] carbonIndexFiles = getCarbonFile(path, INDEX_FILE_EXT); + Schema schema; + if (carbonIndexFiles != null && carbonIndexFiles.length != 0) { + schema = readSchemaFromIndexFile(carbonIndexFiles[0].getAbsolutePath()); + for (int i = 1; i < carbonIndexFiles.length; i++) { + Schema schema2 = readSchemaFromIndexFile(carbonIndexFiles[i].getAbsolutePath()); + if (schema != schema2) { + throw new CarbonDataLoadingException("Schema is different between different files."); + } + } + CarbonFile[] carbonDataFiles = getCarbonFile(path, CARBON_DATA_EXT); + for (int i = 0; i < carbonDataFiles.length; i++) { + Schema schema2 = readSchemaFromDataFile(carbonDataFiles[i].getAbsolutePath()); + if (!schema.equals(schema2)) { + throw new CarbonDataLoadingException("Schema is different between different files."); + } + } + return schema; + } else { + throw new CarbonDataLoadingException("No carbonindex file in this path."); + } + } else { + String indexFilePath = getCarbonFile(path, INDEX_FILE_EXT)[0].getAbsolutePath(); + if (indexFilePath != null) { --- End diff -- I think this null check is not required. Is there any chance the absolute path can be null ? --- |
In reply to this post by qiuchenjian-2
Github user xubo245 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2804#discussion_r231101081 --- Diff: store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonSchemaReader.java --- @@ -61,14 +65,122 @@ public static Schema readSchemaInSchemaFile(String schemaFilePath) throws IOExce return new Schema(schemaList); } + /** + * get carbondata/carbonindex file in path + * + * @param path carbon file path + * @return CarbonFile array + */ + private static CarbonFile[] getCarbonFile(String path, final String extension) + throws IOException { + String dataFilePath = path; + if (!(dataFilePath.contains(extension))) { + CarbonFile[] carbonFiles = FileFactory + .getCarbonFile(path) + .listFiles(new CarbonFileFilter() { + @Override + public boolean accept(CarbonFile file) { + if (file == null) { + return false; + } + return file.getName().endsWith(extension); + } + }); + if (carbonFiles == null || carbonFiles.length < 1) { + throw new IOException("Carbon file not exists."); + } + return carbonFiles; + } + return null; + } + + /** + * read schema from path, + * path can be folder path, carbonindex file path, and carbondata file path + * and will not check all files schema + * + * @param path file/folder path + * @return schema + * @throws IOException + */ + public static Schema readSchema(String path) throws IOException { + return readSchema(path, false); + } + + /** + * read schema from path, + * path can be folder path, carbonindex file path, and carbondata file path + * and user can decide whether check all files schema + * + * @param path file/folder path + * @param validateSchema whether check all files schema + * @return schema + * @throws IOException + */ + public static Schema readSchema(String path, boolean validateSchema) throws IOException { + if (path.endsWith(INDEX_FILE_EXT)) { + return readSchemaFromIndexFile(path); + } else if (path.endsWith(CARBON_DATA_EXT)) { + return readSchemaFromDataFile(path); + } else if (validateSchema) { + CarbonFile[] carbonIndexFiles = getCarbonFile(path, INDEX_FILE_EXT); + Schema schema; + if (carbonIndexFiles != null && carbonIndexFiles.length != 0) { + schema = readSchemaFromIndexFile(carbonIndexFiles[0].getAbsolutePath()); + for (int i = 1; i < carbonIndexFiles.length; i++) { + Schema schema2 = readSchemaFromIndexFile(carbonIndexFiles[i].getAbsolutePath()); + if (schema != schema2) { --- End diff -- ok, done --- |
In reply to this post by qiuchenjian-2
Github user xubo245 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2804#discussion_r231105573 --- Diff: store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonSchemaReader.java --- @@ -61,14 +65,122 @@ public static Schema readSchemaInSchemaFile(String schemaFilePath) throws IOExce return new Schema(schemaList); } + /** + * get carbondata/carbonindex file in path + * + * @param path carbon file path + * @return CarbonFile array + */ + private static CarbonFile[] getCarbonFile(String path, final String extension) + throws IOException { + String dataFilePath = path; + if (!(dataFilePath.contains(extension))) { + CarbonFile[] carbonFiles = FileFactory + .getCarbonFile(path) + .listFiles(new CarbonFileFilter() { + @Override + public boolean accept(CarbonFile file) { + if (file == null) { + return false; + } + return file.getName().endsWith(extension); + } + }); + if (carbonFiles == null || carbonFiles.length < 1) { + throw new IOException("Carbon file not exists."); + } + return carbonFiles; + } + return null; + } + + /** + * read schema from path, + * path can be folder path, carbonindex file path, and carbondata file path + * and will not check all files schema + * + * @param path file/folder path + * @return schema + * @throws IOException + */ + public static Schema readSchema(String path) throws IOException { + return readSchema(path, false); + } + + /** + * read schema from path, + * path can be folder path, carbonindex file path, and carbondata file path + * and user can decide whether check all files schema + * + * @param path file/folder path + * @param validateSchema whether check all files schema + * @return schema + * @throws IOException + */ + public static Schema readSchema(String path, boolean validateSchema) throws IOException { + if (path.endsWith(INDEX_FILE_EXT)) { + return readSchemaFromIndexFile(path); + } else if (path.endsWith(CARBON_DATA_EXT)) { + return readSchemaFromDataFile(path); + } else if (validateSchema) { + CarbonFile[] carbonIndexFiles = getCarbonFile(path, INDEX_FILE_EXT); + Schema schema; + if (carbonIndexFiles != null && carbonIndexFiles.length != 0) { + schema = readSchemaFromIndexFile(carbonIndexFiles[0].getAbsolutePath()); + for (int i = 1; i < carbonIndexFiles.length; i++) { + Schema schema2 = readSchemaFromIndexFile(carbonIndexFiles[i].getAbsolutePath()); + if (schema != schema2) { + throw new CarbonDataLoadingException("Schema is different between different files."); + } + } + CarbonFile[] carbonDataFiles = getCarbonFile(path, CARBON_DATA_EXT); + for (int i = 0; i < carbonDataFiles.length; i++) { + Schema schema2 = readSchemaFromDataFile(carbonDataFiles[i].getAbsolutePath()); + if (!schema.equals(schema2)) { + throw new CarbonDataLoadingException("Schema is different between different files."); + } + } + return schema; + } else { + throw new CarbonDataLoadingException("No carbonindex file in this path."); + } + } else { + String indexFilePath = getCarbonFile(path, INDEX_FILE_EXT)[0].getAbsolutePath(); + if (indexFilePath != null) { + return readSchemaFromIndexFile(indexFilePath); + } else { + String dataFilePath = getCarbonFile(path, CARBON_DATA_EXT)[0].getAbsolutePath(); --- End diff -- yeah, removed else --- |
In reply to this post by qiuchenjian-2
Github user xubo245 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2804#discussion_r231107288 --- Diff: store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonSchemaReader.java --- @@ -61,14 +65,122 @@ public static Schema readSchemaInSchemaFile(String schemaFilePath) throws IOExce return new Schema(schemaList); } + /** + * get carbondata/carbonindex file in path + * + * @param path carbon file path + * @return CarbonFile array + */ + private static CarbonFile[] getCarbonFile(String path, final String extension) + throws IOException { + String dataFilePath = path; + if (!(dataFilePath.contains(extension))) { + CarbonFile[] carbonFiles = FileFactory + .getCarbonFile(path) + .listFiles(new CarbonFileFilter() { + @Override + public boolean accept(CarbonFile file) { + if (file == null) { + return false; + } + return file.getName().endsWith(extension); + } + }); + if (carbonFiles == null || carbonFiles.length < 1) { + throw new IOException("Carbon file not exists."); + } + return carbonFiles; + } + return null; --- End diff -- ok, throw exception --- |
In reply to this post by qiuchenjian-2
Github user xubo245 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2804#discussion_r231107900 --- Diff: store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonSchemaReader.java --- @@ -144,4 +246,28 @@ public static Schema readSchemaInIndexFile(String indexFilePath) throws IOExcept } } + /** + * This method return the version details in formatted string by reading from carbondata file + * + * @param dataFilePath + * @return + * @throws IOException + */ + public static String getVersionDetails(String dataFilePath) throws IOException { --- End diff -- can't --- |
In reply to this post by qiuchenjian-2
Github user xubo245 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2804#discussion_r231108134 --- Diff: store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonSchemaReader.java --- @@ -61,14 +65,122 @@ public static Schema readSchemaInSchemaFile(String schemaFilePath) throws IOExce return new Schema(schemaList); } + /** + * get carbondata/carbonindex file in path + * + * @param path carbon file path + * @return CarbonFile array + */ + private static CarbonFile[] getCarbonFile(String path, final String extension) + throws IOException { + String dataFilePath = path; + if (!(dataFilePath.contains(extension))) { + CarbonFile[] carbonFiles = FileFactory + .getCarbonFile(path) + .listFiles(new CarbonFileFilter() { + @Override + public boolean accept(CarbonFile file) { + if (file == null) { + return false; + } + return file.getName().endsWith(extension); + } + }); + if (carbonFiles == null || carbonFiles.length < 1) { + throw new IOException("Carbon file not exists."); + } + return carbonFiles; + } + return null; + } + + /** + * read schema from path, + * path can be folder path, carbonindex file path, and carbondata file path + * and will not check all files schema + * + * @param path file/folder path + * @return schema + * @throws IOException + */ + public static Schema readSchema(String path) throws IOException { + return readSchema(path, false); + } + + /** + * read schema from path, + * path can be folder path, carbonindex file path, and carbondata file path + * and user can decide whether check all files schema + * + * @param path file/folder path + * @param validateSchema whether check all files schema + * @return schema + * @throws IOException + */ + public static Schema readSchema(String path, boolean validateSchema) throws IOException { + if (path.endsWith(INDEX_FILE_EXT)) { + return readSchemaFromIndexFile(path); + } else if (path.endsWith(CARBON_DATA_EXT)) { + return readSchemaFromDataFile(path); + } else if (validateSchema) { + CarbonFile[] carbonIndexFiles = getCarbonFile(path, INDEX_FILE_EXT); + Schema schema; + if (carbonIndexFiles != null && carbonIndexFiles.length != 0) { + schema = readSchemaFromIndexFile(carbonIndexFiles[0].getAbsolutePath()); + for (int i = 1; i < carbonIndexFiles.length; i++) { + Schema schema2 = readSchemaFromIndexFile(carbonIndexFiles[i].getAbsolutePath()); + if (schema != schema2) { + throw new CarbonDataLoadingException("Schema is different between different files."); + } + } + CarbonFile[] carbonDataFiles = getCarbonFile(path, CARBON_DATA_EXT); + for (int i = 0; i < carbonDataFiles.length; i++) { + Schema schema2 = readSchemaFromDataFile(carbonDataFiles[i].getAbsolutePath()); + if (!schema.equals(schema2)) { + throw new CarbonDataLoadingException("Schema is different between different files."); + } + } + return schema; + } else { + throw new CarbonDataLoadingException("No carbonindex file in this path."); + } + } else { + String indexFilePath = getCarbonFile(path, INDEX_FILE_EXT)[0].getAbsolutePath(); + if (indexFilePath != null) { --- End diff -- removed. --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2804 Build Success with Spark 2.2.1, Please check CI http://95.216.28.178:8080/job/ApacheCarbonPRBuilder1/1531/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2804 Build Success with Spark 2.3.1, Please check CI http://136.243.101.176:8080/job/carbondataprbuilder2.3/9578/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2804 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder2.1/1320/ --- |
In reply to this post by qiuchenjian-2
Github user xubo245 commented on the issue:
https://github.com/apache/carbondata/pull/2804 @KanakaKumar CI pass, please check it. --- |
In reply to this post by qiuchenjian-2
Github user KanakaKumar commented on the issue:
https://github.com/apache/carbondata/pull/2804 LGTM --- |
In reply to this post by qiuchenjian-2
|
In reply to this post by qiuchenjian-2
|
Free forum by Nabble | Edit this page |