[GitHub] [carbondata] kumarvishal09 commented on a change in pull request #3177: [CARBONDATA-3337][CARBONDATA-3306] Distributed index server

classic Classic list List threaded Threaded
1 message Options
Reply | Threaded
Open this post in threaded view
|

[GitHub] [carbondata] kumarvishal09 commented on a change in pull request #3177: [CARBONDATA-3337][CARBONDATA-3306] Distributed index server

GitBox
kumarvishal09 commented on a change in pull request #3177: [CARBONDATA-3337][CARBONDATA-3306] Distributed index server
URL: https://github.com/apache/carbondata/pull/3177#discussion_r280716281
 
 

 ##########
 File path: core/src/main/java/org/apache/carbondata/core/datamap/DataMapUtil.java
 ##########
 @@ -91,43 +102,105 @@ public static DataMapJob getDataMapJob(Configuration configuration) throws IOExc
    * @param carbonTable
    * @throws IOException
    */
-  public static void executeDataMapJobForClearingDataMaps(CarbonTable carbonTable)
+  private static void executeClearDataMapJob(DataMapJob dataMapJob,
+      CarbonTable carbonTable, String dataMapToClear) throws IOException {
+    SegmentStatusManager.ValidAndInvalidSegmentsInfo validAndInvalidSegmentsInfo =
+            getValidAndInvalidSegments(carbonTable, FileFactory.getConfiguration());
+    List<String> invalidSegment = new ArrayList<>();
+    for (Segment segment : validAndInvalidSegmentsInfo.getInvalidSegments()) {
+      invalidSegment.add(segment.getSegmentNo());
+    }
+    DistributableDataMapFormat dataMapFormat = new DistributableDataMapFormat(carbonTable,
+        validAndInvalidSegmentsInfo.getValidSegments(), invalidSegment, true,
+        dataMapToClear);
+    dataMapJob.execute(dataMapFormat);
+  }
+
+  public static void executeClearDataMapJob(CarbonTable carbonTable, String jobClassName)
       throws IOException {
-    String dataMapJobClassName = "org.apache.carbondata.spark.rdd.SparkDataMapJob";
-    DataMapJob dataMapJob = (DataMapJob) createDataMapJob(dataMapJobClassName);
+    executeClearDataMapJob(carbonTable, jobClassName, "");
+  }
+
+  static void executeClearDataMapJob(CarbonTable carbonTable, String jobClassName,
+      String dataMapToClear) throws IOException {
+    DataMapJob dataMapJob = (DataMapJob) createDataMapJob(jobClassName);
     if (dataMapJob == null) {
       return;
     }
-    String className = "org.apache.carbondata.core.datamap.DistributableDataMapFormat";
-    SegmentStatusManager.ValidAndInvalidSegmentsInfo validAndInvalidSegmentsInfo =
-        getValidAndInvalidSegments(carbonTable, FileFactory.getConfiguration());
-    List<Segment> validSegments = validAndInvalidSegmentsInfo.getValidSegments();
-    List<Segment> invalidSegments = validAndInvalidSegmentsInfo.getInvalidSegments();
-    DataMapExprWrapper dataMapExprWrapper = null;
-    if (DataMapStoreManager.getInstance().getAllDataMap(carbonTable).size() > 0) {
-      DataMapChooser dataMapChooser = new DataMapChooser(carbonTable);
-      dataMapExprWrapper = dataMapChooser.getAllDataMapsForClear(carbonTable);
-    } else {
-      return;
+    executeClearDataMapJob(dataMapJob, carbonTable, dataMapToClear);
+  }
+
+  public static DataMapJob getEmbeddedJob() {
+    DataMapJob dataMapJob = (DataMapJob) DataMapUtil.createDataMapJob(EMBEDDED_JOB_NAME);
+    if (dataMapJob == null) {
+      throw new ExceptionInInitializerError("Unable to create EmbeddedDataMapJob");
     }
-    DistributableDataMapFormat dataMapFormat =
-        createDataMapJob(carbonTable, dataMapExprWrapper, validSegments, invalidSegments, null,
-            className, true);
-    dataMapJob.execute(dataMapFormat, null);
+    return dataMapJob;
   }
 
-  private static DistributableDataMapFormat createDataMapJob(CarbonTable carbonTable,
-      DataMapExprWrapper dataMapExprWrapper, List<Segment> validsegments,
-      List<Segment> invalidSegments, List<PartitionSpec> partitionsToPrune, String clsName,
-      boolean isJobToClearDataMaps) {
-    try {
-      Constructor<?> cons = Class.forName(clsName).getDeclaredConstructors()[0];
-      return (DistributableDataMapFormat) cons
-          .newInstance(carbonTable, dataMapExprWrapper, validsegments, invalidSegments,
-              partitionsToPrune, isJobToClearDataMaps);
-    } catch (Exception e) {
-      throw new RuntimeException(e);
+  /**
+   * Prune the segments from the already pruned blocklets.
+   */
+  public static void pruneSegments(List<Segment> segments, List<ExtendedBlocklet> prunedBlocklets) {
+    Set<Segment> validSegments = new HashSet<>();
+    for (ExtendedBlocklet blocklet : prunedBlocklets) {
+      // Clear the old pruned index files if any present
+      blocklet.getSegment().getFilteredIndexShardNames().clear();
+      // Set the pruned index file to the segment
+      // for further pruning.
+      String shardName = CarbonTablePath.getShardName(blocklet.getFilePath());
+      blocklet.getSegment().setFilteredIndexShardName(shardName);
+      validSegments.add(blocklet.getSegment());
     }
+    segments.retainAll(validSegments);
 
 Review comment:
   retainAll will compare each element present in valid segments set which will internally calls Segment.equals can we replace this code like below??
   segments.clear()
   segments.addAll(validsegments)

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[hidden email]


With regards,
Apache Git Services