Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2275#discussion_r189439736 --- Diff: datamap/lucene/src/main/java/org/apache/carbondata/datamap/lucene/LuceneDataMapWriter.java --- @@ -84,20 +91,61 @@ public static final String ROWID_NAME = "rowId"; + private Map<LuceneColumnKeys, Map<Integer, RoaringBitmap>> cache = new HashMap<>(); + + private int cacheSize; + + private ByteBuffer intBuffer = ByteBuffer.allocate(4); + + private boolean storeBlockletWise; + LuceneDataMapWriter(String tablePath, String dataMapName, List<CarbonColumn> indexColumns, - Segment segment, String shardName, boolean isFineGrain) { + Segment segment, String shardName, boolean isFineGrain, int flushSize, + boolean storeBlockletWise) { super(tablePath, dataMapName, indexColumns, segment, shardName); this.isFineGrain = isFineGrain; --- End diff -- ok, removed --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2275#discussion_r189439752 --- Diff: datamap/lucene/src/main/java/org/apache/carbondata/datamap/lucene/LuceneDataMapFactoryBase.java --- @@ -107,13 +138,39 @@ public LuceneDataMapFactoryBase(CarbonTable carbonTable, DataMapSchema dataMapSc // optimizedOperations.add(ExpressionType.LESSTHAN_EQUALTO); // optimizedOperations.add(ExpressionType.NOT); optimizedOperations.add(ExpressionType.TEXT_MATCH); - this.dataMapMeta = new DataMapMeta(indexedColumns, optimizedOperations); - + this.dataMapMeta = new DataMapMeta(indexedCarbonColumns, optimizedOperations); // get analyzer // TODO: how to get analyzer ? analyzer = new StandardAnalyzer(); } + public static int validateAndGetWriteCacheSize(DataMapSchema schema) { + String cacheStr = schema.getProperties().get(FLUSH_CACHE); + if (cacheStr == null) { + cacheStr = FLUSH_CACHE_DEFAULT_SIZE; + } + int cacheSize; + try { + cacheSize = Integer.parseInt(cacheStr); + } catch (NumberFormatException e) { + cacheSize = -1; + } + return cacheSize; + } + + public static boolean validateAndGetStoreBlockletWise(DataMapSchema schema) { + String splitBlockletStr = schema.getProperties().get(SPLIT_BLOCKLET); + if (splitBlockletStr == null) { + splitBlockletStr = SPLIT_BLOCKLET_DEFAULT; + } + boolean splitBlockletWise; + try { + splitBlockletWise = Boolean.parseBoolean(splitBlockletStr); + } catch (NumberFormatException e) { + splitBlockletWise = false; --- End diff -- ok --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2275#discussion_r189440558 --- Diff: datamap/lucene/src/main/java/org/apache/carbondata/datamap/lucene/LuceneDataMapWriter.java --- @@ -175,52 +206,39 @@ public void onBlockletEnd(int blockletId) throws IOException { */ public void onPageAdded(int blockletId, int pageId, int pageSize, ColumnPage[] pages) throws IOException { + // save index data into ram, write into disk after one page finished + int columnsCount = pages.length; + if (columnsCount <= 0) { + LOGGER.warn("No data in the page " + pageId + "with blockletid " + blockletId + + " to write lucene datamap"); + return; + } for (int rowId = 0; rowId < pageSize; rowId++) { - // create a new document - Document doc = new Document(); - // add blocklet Id - doc.add(new IntPoint(BLOCKLETID_NAME, blockletId)); - doc.add(new StoredField(BLOCKLETID_NAME, blockletId)); - //doc.add(new NumericDocValuesField(BLOCKLETID_NAME,blockletId)); - - // add page id and row id in Fine Grain data map - if (isFineGrain) { - // add page Id - doc.add(new IntPoint(PAGEID_NAME, pageId)); - doc.add(new StoredField(PAGEID_NAME, pageId)); - //doc.add(new NumericDocValuesField(PAGEID_NAME,pageId)); - - // add row id - doc.add(new IntPoint(ROWID_NAME, rowId)); - doc.add(new StoredField(ROWID_NAME, rowId)); - //doc.add(new NumericDocValuesField(ROWID_NAME,rowId)); - } - // add indexed columns value into the document - List<CarbonColumn> indexColumns = getIndexColumns(); - for (int i = 0; i < pages.length; i++) { - // add to lucene only if value is not null - if (!pages[i].getNullBits().get(rowId)) { - addField(doc, pages[i].getData(rowId), indexColumns.get(i), Field.Store.NO); + LuceneColumnKeys columns = new LuceneColumnKeys(getIndexColumns().size()); + int i = 0; + for (ColumnPage page : pages) { + if (!page.getNullBits().get(rowId)) { + columns.colValues[i++] = getValue(page, rowId); } } - - // add this document - ramIndexWriter.addDocument(doc); + if (cacheSize > 0) { + addToCache(columns, rowId, pageId, blockletId, cache, intBuffer, storeBlockletWise); + } else { + addData(columns, rowId, pageId, blockletId, intBuffer, ramIndexWriter, getIndexColumns(), + storeBlockletWise); + } + } + if (cacheSize > 0) { + flushCacheIfPossible(); } - } - private boolean addField(Document doc, Object data, CarbonColumn column, Field.Store store) { + private static boolean addField(Document doc, Object key, String fieldName, Field.Store store) { --- End diff -- ok --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on the issue:
https://github.com/apache/carbondata/pull/2275 SDV Build Success , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/5004/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2275 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/5981/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2275 Build Success with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/4823/ --- |
In reply to this post by qiuchenjian-2
|
In reply to this post by qiuchenjian-2
|
Free forum by Nabble | Edit this page |