[GitHub] [carbondata] kunal642 commented on a change in pull request #3177: [CARBONDATA-3337][CARBONDATA-3306] Distributed index server

classic Classic list List threaded Threaded
1 message Options
Reply | Threaded
Open this post in threaded view
|

[GitHub] [carbondata] kunal642 commented on a change in pull request #3177: [CARBONDATA-3337][CARBONDATA-3306] Distributed index server

GitBox
kunal642 commented on a change in pull request #3177: [CARBONDATA-3337][CARBONDATA-3306] Distributed index server
URL: https://github.com/apache/carbondata/pull/3177#discussion_r281009505
 
 

 ##########
 File path: core/src/main/java/org/apache/carbondata/core/datamap/DataMapUtil.java
 ##########
 @@ -91,43 +94,132 @@ public static DataMapJob getDataMapJob(Configuration configuration) throws IOExc
    * @param carbonTable
    * @throws IOException
    */
-  public static void executeDataMapJobForClearingDataMaps(CarbonTable carbonTable)
-      throws IOException {
-    String dataMapJobClassName = "org.apache.carbondata.spark.rdd.SparkDataMapJob";
+  private static void executeDataMapJobForClearingSegmentDataMaps(DataMapJob dataMapJob,
+      CarbonTable carbonTable) throws IOException {
+    SegmentStatusManager.ValidAndInvalidSegmentsInfo validAndInvalidSegmentsInfo =
+            getValidAndInvalidSegments(carbonTable, FileFactory.getConfiguration());
+    List<String> invalidSegment = new ArrayList<>();
+    for (Segment segment : validAndInvalidSegmentsInfo.getInvalidSegments()) {
+      invalidSegment.add(segment.getSegmentNo());
+    }
+    DistributableDataMapFormat dataMapFormat = new DistributableDataMapFormat(carbonTable, null,
+        validAndInvalidSegmentsInfo.getValidSegments(), invalidSegment, null, true,
+        DataMapLevel.FG);
+    dataMapJob.execute(dataMapFormat);
+  }
+
+  public static void clearEmbeddedDataMaps(CarbonTable carbonTable) throws IOException {
+    String dataMapJobClassName = "org.apache.carbondata.indexserver.EmbeddedDataMapJob";
     DataMapJob dataMapJob = (DataMapJob) createDataMapJob(dataMapJobClassName);
     if (dataMapJob == null) {
       return;
     }
-    String className = "org.apache.carbondata.core.datamap.DistributableDataMapFormat";
-    SegmentStatusManager.ValidAndInvalidSegmentsInfo validAndInvalidSegmentsInfo =
-        getValidAndInvalidSegments(carbonTable, FileFactory.getConfiguration());
-    List<Segment> validSegments = validAndInvalidSegmentsInfo.getValidSegments();
-    List<Segment> invalidSegments = validAndInvalidSegmentsInfo.getInvalidSegments();
-    DataMapExprWrapper dataMapExprWrapper = null;
-    if (DataMapStoreManager.getInstance().getAllDataMap(carbonTable).size() > 0) {
-      DataMapChooser dataMapChooser = new DataMapChooser(carbonTable);
-      dataMapExprWrapper = dataMapChooser.getAllDataMapsForClear(carbonTable);
-    } else {
+    executeDataMapJobForClearingSegmentDataMaps(dataMapJob, carbonTable);
+  }
+
+  public static void clearDistributedDataMaps(CarbonTable carbonTable)
+      throws IOException {
+    String dataMapJobClassName = "org.apache.carbondata.indexserver.DistributedClearCacheJob";
+    DataMapJob dataMapJob = (DataMapJob) createDataMapJob(dataMapJobClassName);
+    if (dataMapJob == null) {
       return;
     }
-    DistributableDataMapFormat dataMapFormat =
-        createDataMapJob(carbonTable, dataMapExprWrapper, validSegments, invalidSegments, null,
-            className, true);
-    dataMapJob.execute(dataMapFormat, null);
+    executeDataMapJobForClearingSegmentDataMaps(dataMapJob, carbonTable);
   }
 
-  private static DistributableDataMapFormat createDataMapJob(CarbonTable carbonTable,
-      DataMapExprWrapper dataMapExprWrapper, List<Segment> validsegments,
-      List<Segment> invalidSegments, List<PartitionSpec> partitionsToPrune, String clsName,
-      boolean isJobToClearDataMaps) {
-    try {
-      Constructor<?> cons = Class.forName(clsName).getDeclaredConstructors()[0];
-      return (DistributableDataMapFormat) cons
-          .newInstance(carbonTable, dataMapExprWrapper, validsegments, invalidSegments,
-              partitionsToPrune, isJobToClearDataMaps);
-    } catch (Exception e) {
-      throw new RuntimeException(e);
+  public static DataMapJob getEmbeddedJob() {
+    String className = "org.apache.carbondata.indexserver.EmbeddedDataMapJob";
+    DataMapJob dataMapJob = (DataMapJob) DataMapUtil.createDataMapJob(className);
+    if (dataMapJob == null) {
+      throw new ExceptionInInitializerError("Unable to create EmbeddedDataMapJob");
+    }
+    return dataMapJob;
+  }
+
+  public static List<ExtendedBlocklet> executeDataMapJob(CarbonTable carbonTable,
+      FilterResolverIntf resolver, Configuration configuration, DataMapJob dataMapJob,
+      List<PartitionSpec> partitionsToPrune, List<Segment> validSegments,
+      List<Segment> invalidSegments, DataMapLevel level) throws IOException {
+    return executeDataMapJob(carbonTable, resolver, configuration, dataMapJob,partitionsToPrune,
+        validSegments, invalidSegments, level, false);
+  }
+
+  /**
+   * Prune the segments from the already pruned blocklets.
+   * @param segments
+   * @param prunedBlocklets
+   */
+  public static void pruneSegments(List<Segment> segments,
+      List<ExtendedBlocklet> prunedBlocklets) {
+    List<Segment> toBeRemovedSegments = new ArrayList<>();
+    for (Segment segment : segments) {
+      boolean found = false;
+      // Clear the old pruned index files if any present
+      segment.getFilteredIndexShardNames().clear();
+      // Check the segment exist in any of the pruned blocklets.
+      for (ExtendedBlocklet blocklet : prunedBlocklets) {
+        if (blocklet.getSegment().toString().equals(segment.toString())) {
+          found = true;
+          // Set the pruned index file to the segment
+          // for further pruning.
+          String shardName = CarbonTablePath.getShardName(blocklet.getFilePath());
+          segment.setFilteredIndexShardName(shardName);
+        }
+      }
+      // Add to remove segments list if not present in pruned blocklets.
+      if (!found) {
+        toBeRemovedSegments.add(segment);
+      }
     }
+    // Remove all segments which are already pruned from pruned blocklets
+    segments.removeAll(toBeRemovedSegments);
+  }
+
+  static List<ExtendedBlocklet> pruneDataMaps(CarbonTable table,
+      FilterResolverIntf filterResolverIntf, List<Segment> segmentsToLoad,
+      List<PartitionSpec> partitions, List<ExtendedBlocklet> blocklets) throws IOException {
+    List<ExtendedBlocklet> cgDataMaps = pruneDataMaps(table, filterResolverIntf, segmentsToLoad,
+        partitions, blocklets,
+        DataMapLevel.CG);
+    return pruneDataMaps(table, filterResolverIntf, segmentsToLoad,
+        partitions, cgDataMaps,
+        DataMapLevel.FG);
+  }
+
+  static List<ExtendedBlocklet> pruneDataMaps(CarbonTable table,
+      FilterResolverIntf filterResolverIntf, List<Segment> segmentsToLoad,
+      List<PartitionSpec> partitions, List<ExtendedBlocklet> blocklets, DataMapLevel dataMapLevel)
+      throws IOException {
+    DataMapExprWrapper dataMapExprWrapper =
+        new DataMapChooser(table).chooseDataMap(dataMapLevel, filterResolverIntf);
+    if (dataMapExprWrapper != null) {
+      List<ExtendedBlocklet> extendedBlocklets = new ArrayList<>();
+      // Prune segments from already pruned blocklets
+      for (DataMapDistributableWrapper wrapper : dataMapExprWrapper
+          .toDistributable(segmentsToLoad)) {
+        TableDataMap defaultDataMap = DataMapStoreManager.getInstance()
+            .getDataMap(table, wrapper.getDistributable().getDataMapSchema());
+        List<DataMap> dataMaps = defaultDataMap.getTableDataMaps(wrapper.getDistributable());
+        List<ExtendedBlocklet> prunnedBlocklet = new ArrayList<>();
+        if (table.isTransactionalTable()) {
+          prunnedBlocklet.addAll(defaultDataMap.prune(dataMaps, wrapper.getDistributable(),
+              dataMapExprWrapper.getFilterResolverIntf(wrapper.getUniqueId()), partitions));
+        } else {
+          prunnedBlocklet
+              .addAll(defaultDataMap.prune(segmentsToLoad, filterResolverIntf, partitions));
+        }
+        for (ExtendedBlocklet blocklet : prunnedBlocklet) {
+          blocklet.getDetailInfo();
+          blocklet.setDataMapUniqueId(wrapper.getUniqueId());
+        }
+        extendedBlocklets.addAll(prunnedBlocklet);
+      }
+      return dataMapExprWrapper.pruneBlocklets(extendedBlocklets);
+    }
+    for (ExtendedBlocklet blocklet : blocklets) {
+      blocklet.getDetailInfo();
+    }
 
 Review comment:
   done

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[hidden email]


With regards,
Apache Git Services