[GitHub] [carbondata] ajantha-bhat commented on a change in pull request #3196: [CARBONDATA-3366] Support SDK reader to read blocklet level split

classic Classic list List threaded Threaded
1 message Options
Reply | Threaded
Open this post in threaded view
|

[GitHub] [carbondata] ajantha-bhat commented on a change in pull request #3196: [CARBONDATA-3366] Support SDK reader to read blocklet level split

GitBox
ajantha-bhat commented on a change in pull request #3196: [CARBONDATA-3366] Support SDK reader to read blocklet level split
URL: https://github.com/apache/carbondata/pull/3196#discussion_r281540029
 
 

 ##########
 File path: store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReaderBuilder.java
 ##########
 @@ -228,4 +228,118 @@ public CarbonReaderBuilder withRowRecordReader() {
     }
   }
 
+  /**
+   * gets an array of blocklet level CarbonInputSplits
+   * @return
+   * @throws IOException
+   */
+  public InputSplit[] getSplits() throws IOException {
+    if (hadoopConf == null) {
+      hadoopConf = FileFactory.getConfiguration();
+    }
+    CarbonTable table;
+    table = CarbonTable.buildTable(tablePath, tableName, hadoopConf);
+    final CarbonFileInputFormat format = new CarbonFileInputFormat();
+    final Job job = new Job(hadoopConf);
+    // set cache level to blockletlevel
+    Map<String, String> tableProperties = table.getTableInfo().getFactTable().getTableProperties();
+    tableProperties.put(CarbonCommonConstants.CACHE_LEVEL,"BLOCKLET");
+    table.getTableInfo().getFactTable().setTableProperties(tableProperties);
+    format.setTableInfo(job.getConfiguration(), table.getTableInfo());
+    format.setTablePath(job.getConfiguration(), table.getTablePath());
+    format.setTableName(job.getConfiguration(), table.getTableName());
+    format.setDatabaseName(job.getConfiguration(), table.getDatabaseName());
+    if (filterExpression != null) {
+      format.setFilterPredicates(job.getConfiguration(), filterExpression);
+    }
+    if (projectionColumns != null) {
+      // set the user projection
+      int len = projectionColumns.length;
+      for (int i = 0; i < len; i++) {
+        if (projectionColumns[i].contains(".")) {
+          throw new UnsupportedOperationException(
+              "Complex child columns projection NOT supported through CarbonReader");
+        }
+      }
+      format.setColumnProjection(job.getConfiguration(), projectionColumns);
+    }
+    List<InputSplit> splits =
+        format.getSplits(new JobContextImpl(job.getConfiguration(), new JobID()));
+    return splits.toArray(new InputSplit[splits.size()]);
+  }
+
+  /**
+   * build the carbon reader with specified split.
+   * @param inputSplit
+   * @param <T>
+   * @return
+   * @throws IOException
+   * @throws InterruptedException
+   */
+  public <T> CarbonReader<T> buildWithSplits(InputSplit inputSplit)
+      throws IOException, InterruptedException {
+    if (hadoopConf == null) {
+      hadoopConf = FileFactory.getConfiguration();
+    }
+    CarbonTable table;
+    // now always infer schema. TODO:Refactor in next version.
+    table = CarbonTable.buildTable(tablePath, tableName, hadoopConf);
+    final CarbonFileInputFormat format = new CarbonFileInputFormat();
+    final Job job = new Job(hadoopConf);
+    format.setTableInfo(job.getConfiguration(), table.getTableInfo());
+    format.setTablePath(job.getConfiguration(), table.getTablePath());
 
 Review comment:
   it was difficult to extract. but completed now.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[hidden email]


With regards,
Apache Git Services