Indhumathi27 commented on a change in pull request #3688: [CARBONDATA-3765] Refactor Index Metadata for CG and FG Indexes
URL: https://github.com/apache/carbondata/pull/3688#discussion_r405973637 ########## File path: core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java ########## @@ -101,49 +100,29 @@ private DataMapStoreManager() { } /** - * It only gives the visible datamaps - */ - List<TableIndex> getAllVisibleIndexes(CarbonTable carbonTable) throws IOException { - CarbonSessionInfo sessionInfo = ThreadLocalSessionInfo.getCarbonSessionInfo(); - List<TableIndex> allDataMaps = getAllIndexes(carbonTable); - Iterator<TableIndex> dataMapIterator = allDataMaps.iterator(); - while (dataMapIterator.hasNext()) { - TableIndex dataMap = dataMapIterator.next(); - String dbName = carbonTable.getDatabaseName(); - String tableName = carbonTable.getTableName(); - String dmName = dataMap.getDataMapSchema().getDataMapName(); - // TODO: need support get the visible status of datamap without sessionInfo in the future - if (sessionInfo != null) { - boolean isDmVisible = sessionInfo.getSessionParams().getProperty( - String.format("%s%s.%s.%s", CarbonCommonConstants.CARBON_DATAMAP_VISIBLE, - dbName, tableName, dmName), "true").trim().equalsIgnoreCase("true"); - if (!isDmVisible) { - LOGGER.warn(String.format("Ignore invisible datamap %s on table %s.%s", - dmName, dbName, tableName)); - dataMapIterator.remove(); - } - } else { - String message = "Carbon session info is null"; - LOGGER.info(message); - } - } - return allDataMaps; - } - - /** - * It gives all indexes except the default index. + * It gives all indexes except the default index and secondary index. + * Collect's Coarse grain and Fine grain indexes on a table * * @return */ public List<TableIndex> getAllIndexes(CarbonTable carbonTable) throws IOException { - List<DataMapSchema> dataMapSchemas = getDataMapSchemasOfTable(carbonTable); + String indexMeta = carbonTable.getTableInfo().getFactTable().getTableProperties() + .get(carbonTable.getCarbonTableIdentifier().getTableId()); Review comment: Secondary index info is already stored in table with key as table-id. Used the same ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] With regards, Apache Git Services |
In reply to this post by GitBox
ajantha-bhat commented on a change in pull request #3688: [CARBONDATA-3765] Refactor Index Metadata for CG and FG Indexes
URL: https://github.com/apache/carbondata/pull/3688#discussion_r405975524 ########## File path: core/src/main/java/org/apache/carbondata/core/datamap/dev/IndexFactory.java ########## @@ -187,7 +187,8 @@ public void deleteSegmentIndexData(String segmentNo) throws IOException { * 4. INDEX_COLUMNS should be exists in table columns */ public void validate() throws MalformedIndexCommandException { - List<CarbonColumn> indexColumns = carbonTable.getIndexedColumns(dataMapSchema); + List<CarbonColumn> indexColumns = + carbonTable.getIndexedColumns(dataMapSchema.getIndexColumns()); Review comment: datamap schema should have already validated columns. If columns are dropped, we can update the datamap schema. Everytime checking columns in datamap schema is valid or not from all the flows is not efficient ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] With regards, Apache Git Services |
In reply to this post by GitBox
ajantha-bhat commented on a change in pull request #3688: [CARBONDATA-3765] Refactor Index Metadata for CG and FG Indexes
URL: https://github.com/apache/carbondata/pull/3688#discussion_r405976891 ########## File path: core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java ########## @@ -101,49 +100,29 @@ private DataMapStoreManager() { } /** - * It only gives the visible datamaps - */ - List<TableIndex> getAllVisibleIndexes(CarbonTable carbonTable) throws IOException { - CarbonSessionInfo sessionInfo = ThreadLocalSessionInfo.getCarbonSessionInfo(); - List<TableIndex> allDataMaps = getAllIndexes(carbonTable); - Iterator<TableIndex> dataMapIterator = allDataMaps.iterator(); - while (dataMapIterator.hasNext()) { - TableIndex dataMap = dataMapIterator.next(); - String dbName = carbonTable.getDatabaseName(); - String tableName = carbonTable.getTableName(); - String dmName = dataMap.getDataMapSchema().getDataMapName(); - // TODO: need support get the visible status of datamap without sessionInfo in the future - if (sessionInfo != null) { - boolean isDmVisible = sessionInfo.getSessionParams().getProperty( - String.format("%s%s.%s.%s", CarbonCommonConstants.CARBON_DATAMAP_VISIBLE, - dbName, tableName, dmName), "true").trim().equalsIgnoreCase("true"); - if (!isDmVisible) { - LOGGER.warn(String.format("Ignore invisible datamap %s on table %s.%s", - dmName, dbName, tableName)); - dataMapIterator.remove(); - } - } else { - String message = "Carbon session info is null"; - LOGGER.info(message); - } - } - return allDataMaps; - } - - /** - * It gives all indexes except the default index. + * It gives all indexes except the default index and secondary index. + * Collect's Coarse grain and Fine grain indexes on a table * * @return */ public List<TableIndex> getAllIndexes(CarbonTable carbonTable) throws IOException { - List<DataMapSchema> dataMapSchemas = getDataMapSchemasOfTable(carbonTable); + String indexMeta = carbonTable.getTableInfo().getFactTable().getTableProperties() + .get(carbonTable.getCarbonTableIdentifier().getTableId()); + IndexMetadata indexMetadata = IndexMetadata.deserialize(indexMeta); List<TableIndex> indexes = new ArrayList<>(); - if (dataMapSchemas != null) { - for (DataMapSchema dataMapSchema : dataMapSchemas) { - RelationIdentifier identifier = dataMapSchema.getParentTables().get(0); - if (dataMapSchema.isIndex() && identifier.getTableId() - .equals(carbonTable.getTableId())) { - indexes.add(getIndex(carbonTable, dataMapSchema)); + if (null != indexMetadata) { + // get bloom indexes and lucene indexes + for (Map.Entry<String, Map<String, Map<String, String>>> providerEntry : indexMetadata + .getIndexesMap().entrySet()) { + for (Map.Entry<String, Map<String, String>> indexEntry : providerEntry.getValue() + .entrySet()) { + if (!indexEntry.getValue().get(CarbonCommonConstants.INDEX_PROVIDER) Review comment: we are sending bloom and lucene to SI flow. But SI is still not like boom and lucene. So we are excluding here. This is causing multiple branches inside index again. we need to unify this ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] With regards, Apache Git Services |
In reply to this post by GitBox
ajantha-bhat commented on a change in pull request #3688: [CARBONDATA-3765] Refactor Index Metadata for CG and FG Indexes
URL: https://github.com/apache/carbondata/pull/3688#discussion_r405977323 ########## File path: core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java ########## @@ -101,49 +100,29 @@ private DataMapStoreManager() { } /** - * It only gives the visible datamaps - */ - List<TableIndex> getAllVisibleIndexes(CarbonTable carbonTable) throws IOException { - CarbonSessionInfo sessionInfo = ThreadLocalSessionInfo.getCarbonSessionInfo(); - List<TableIndex> allDataMaps = getAllIndexes(carbonTable); - Iterator<TableIndex> dataMapIterator = allDataMaps.iterator(); - while (dataMapIterator.hasNext()) { - TableIndex dataMap = dataMapIterator.next(); - String dbName = carbonTable.getDatabaseName(); - String tableName = carbonTable.getTableName(); - String dmName = dataMap.getDataMapSchema().getDataMapName(); - // TODO: need support get the visible status of datamap without sessionInfo in the future - if (sessionInfo != null) { - boolean isDmVisible = sessionInfo.getSessionParams().getProperty( - String.format("%s%s.%s.%s", CarbonCommonConstants.CARBON_DATAMAP_VISIBLE, - dbName, tableName, dmName), "true").trim().equalsIgnoreCase("true"); - if (!isDmVisible) { - LOGGER.warn(String.format("Ignore invisible datamap %s on table %s.%s", - dmName, dbName, tableName)); - dataMapIterator.remove(); - } - } else { - String message = "Carbon session info is null"; - LOGGER.info(message); - } - } - return allDataMaps; - } - - /** - * It gives all indexes except the default index. + * It gives all indexes except the default index and secondary index. + * Collect's Coarse grain and Fine grain indexes on a table * * @return */ public List<TableIndex> getAllIndexes(CarbonTable carbonTable) throws IOException { Review comment: what is the point in calling this getAllIndex ?, if it is not giving all index (default and SI). may be call this get CGandFG index ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] With regards, Apache Git Services |
In reply to this post by GitBox
ajantha-bhat commented on a change in pull request #3688: [CARBONDATA-3765] Refactor Index Metadata for CG and FG Indexes
URL: https://github.com/apache/carbondata/pull/3688#discussion_r405986131 ########## File path: core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java ########## @@ -1202,21 +1192,64 @@ public boolean isIndexTable() throws IOException { } } + public List<String> getIndexTableNames(String indexProvider) throws IOException { + deserializeIndexMetadata(); + if (null != indexMetadata) { + return indexMetadata.getIndexTables(indexProvider); + } else { + return new ArrayList<>(); + } + } + public String getIndexInfo() throws IOException { + return getIndexInfo(null); + } + + public IndexMetadata getIndexMetadata() throws IOException { + deserializeIndexMetadata(); + return indexMetadata; + } + + public String getIndexInfo(String indexProvider) throws IOException { deserializeIndexMetadata(); if (null != indexMetadata) { - IndexTableInfo[] indexTableInfos = - new IndexTableInfo[indexMetadata.getIndexesMap().entrySet().size()]; - int index = 0; - if (!isIndexTable()) { - for (Map.Entry<String, List<String>> entry : indexMetadata.getIndexesMap().entrySet()) { - indexTableInfos[index] = - new IndexTableInfo(getDatabaseName(), entry.getKey(), entry.getValue()); - index++; + if (null != indexProvider) { + if (null != indexMetadata.getIndexesMap().get(indexProvider)) { + IndexTableInfo[] indexTableInfos = + new IndexTableInfo[indexMetadata.getIndexesMap().get(indexProvider).entrySet() + .size()]; + int index = 0; + if (!isIndexTable()) { Review comment: why this check is required ? `if (!isIndexTable()) {` ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] With regards, Apache Git Services |
In reply to this post by GitBox
ajantha-bhat commented on a change in pull request #3688: [CARBONDATA-3765] Refactor Index Metadata for CG and FG Indexes
URL: https://github.com/apache/carbondata/pull/3688#discussion_r405986131 ########## File path: core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java ########## @@ -1202,21 +1192,64 @@ public boolean isIndexTable() throws IOException { } } + public List<String> getIndexTableNames(String indexProvider) throws IOException { + deserializeIndexMetadata(); + if (null != indexMetadata) { + return indexMetadata.getIndexTables(indexProvider); + } else { + return new ArrayList<>(); + } + } + public String getIndexInfo() throws IOException { + return getIndexInfo(null); + } + + public IndexMetadata getIndexMetadata() throws IOException { + deserializeIndexMetadata(); + return indexMetadata; + } + + public String getIndexInfo(String indexProvider) throws IOException { deserializeIndexMetadata(); if (null != indexMetadata) { - IndexTableInfo[] indexTableInfos = - new IndexTableInfo[indexMetadata.getIndexesMap().entrySet().size()]; - int index = 0; - if (!isIndexTable()) { - for (Map.Entry<String, List<String>> entry : indexMetadata.getIndexesMap().entrySet()) { - indexTableInfos[index] = - new IndexTableInfo(getDatabaseName(), entry.getKey(), entry.getValue()); - index++; + if (null != indexProvider) { + if (null != indexMetadata.getIndexesMap().get(indexProvider)) { + IndexTableInfo[] indexTableInfos = + new IndexTableInfo[indexMetadata.getIndexesMap().get(indexProvider).entrySet() + .size()]; + int index = 0; + if (!isIndexTable()) { Review comment: why this check is required ? `if (!isIndexTable()) {` add some comments ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] With regards, Apache Git Services |
In reply to this post by GitBox
ajantha-bhat commented on a change in pull request #3688: [CARBONDATA-3765] Refactor Index Metadata for CG and FG Indexes
URL: https://github.com/apache/carbondata/pull/3688#discussion_r405989456 ########## File path: integration/spark/src/main/scala/org/apache/carbondata/datamap/IndexRebuildRDD.scala ########## @@ -101,9 +103,11 @@ object IndexRebuildRDD { sparkSession, new RefreshResultImpl(), carbonTable.getTableInfo, + schema, schema.getDataMapName, indexedCarbonColumns.asScala.toArray, - segments2DmStorePath.keySet + segments2DmStorePath.keySet, + schema.getProviderName Review comment: just passing schema is enough, schema.getProviderName, schema.getDataMapName, can be obtained inside from this ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] With regards, Apache Git Services |
In reply to this post by GitBox
Indhumathi27 commented on a change in pull request #3688: [CARBONDATA-3765] Refactor Index Metadata for CG and FG Indexes
URL: https://github.com/apache/carbondata/pull/3688#discussion_r405994284 ########## File path: core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java ########## @@ -101,49 +100,29 @@ private DataMapStoreManager() { } /** - * It only gives the visible datamaps - */ - List<TableIndex> getAllVisibleIndexes(CarbonTable carbonTable) throws IOException { - CarbonSessionInfo sessionInfo = ThreadLocalSessionInfo.getCarbonSessionInfo(); - List<TableIndex> allDataMaps = getAllIndexes(carbonTable); - Iterator<TableIndex> dataMapIterator = allDataMaps.iterator(); - while (dataMapIterator.hasNext()) { - TableIndex dataMap = dataMapIterator.next(); - String dbName = carbonTable.getDatabaseName(); - String tableName = carbonTable.getTableName(); - String dmName = dataMap.getDataMapSchema().getDataMapName(); - // TODO: need support get the visible status of datamap without sessionInfo in the future - if (sessionInfo != null) { - boolean isDmVisible = sessionInfo.getSessionParams().getProperty( - String.format("%s%s.%s.%s", CarbonCommonConstants.CARBON_DATAMAP_VISIBLE, - dbName, tableName, dmName), "true").trim().equalsIgnoreCase("true"); - if (!isDmVisible) { - LOGGER.warn(String.format("Ignore invisible datamap %s on table %s.%s", - dmName, dbName, tableName)); - dataMapIterator.remove(); - } - } else { - String message = "Carbon session info is null"; - LOGGER.info(message); - } - } - return allDataMaps; - } - - /** - * It gives all indexes except the default index. + * It gives all indexes except the default index and secondary index. + * Collect's Coarse grain and Fine grain indexes on a table * * @return */ public List<TableIndex> getAllIndexes(CarbonTable carbonTable) throws IOException { - List<DataMapSchema> dataMapSchemas = getDataMapSchemasOfTable(carbonTable); + String indexMeta = carbonTable.getTableInfo().getFactTable().getTableProperties() + .get(carbonTable.getCarbonTableIdentifier().getTableId()); + IndexMetadata indexMetadata = IndexMetadata.deserialize(indexMeta); List<TableIndex> indexes = new ArrayList<>(); - if (dataMapSchemas != null) { - for (DataMapSchema dataMapSchema : dataMapSchemas) { - RelationIdentifier identifier = dataMapSchema.getParentTables().get(0); - if (dataMapSchema.isIndex() && identifier.getTableId() - .equals(carbonTable.getTableId())) { - indexes.add(getIndex(carbonTable, dataMapSchema)); + if (null != indexMetadata) { + // get bloom indexes and lucene indexes + for (Map.Entry<String, Map<String, Map<String, String>>> providerEntry : indexMetadata + .getIndexesMap().entrySet()) { + for (Map.Entry<String, Map<String, String>> indexEntry : providerEntry.getValue() + .entrySet()) { + if (!indexEntry.getValue().get(CarbonCommonConstants.INDEX_PROVIDER) Review comment: In CarbonTable, we are adding indexInfo with Provider. In SI Flow, only indexes having provider name as "SI" will be sent. Still need to unify the flow? If yes, can you please give some inputs ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] With regards, Apache Git Services |
In reply to this post by GitBox
ajantha-bhat commented on a change in pull request #3688: [CARBONDATA-3765] Refactor Index Metadata for CG and FG Indexes
URL: https://github.com/apache/carbondata/pull/3688#discussion_r405994595 ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/index/CarbonCreateIndexCommand.scala ########## @@ -67,128 +82,195 @@ case class CarbonCreateIndexCommand( throw new MalformedCarbonCommandException("Unsupported operation on non transactional table") } - if (DataMapStoreManager.getInstance().isDataMapExist(parentTable.getTableId, indexName)) { - if (!ifNotExistsSet) { - throw new MalformedIndexCommandException( - s"Index with name ${ indexName } on table " + - s"${parentTable.getDatabaseName}.${parentTable.getTableName} already exists") - } else { - return Seq.empty - } + if (parentTable.isMVTable || parentTable.isIndexTable) { + throw new MalformedIndexCommandException( + "Cannot create index on child table `" + indexName + "`") } if (CarbonUtil.getFormatVersion(parentTable) != ColumnarFormatVersion.V3) { - throw new MalformedCarbonCommandException(s"Unsupported operation on table with " + - s"V1 or V2 format data") + throw new MalformedCarbonCommandException( + s"Unsupported operation on table with V1 or V2 format data") } - dataMapSchema = new DataMapSchema(indexName, indexProviderName) + // get metadata lock to avoid concurrent create index operations + val metadataLock = CarbonLockFactory.getCarbonLockObj( + parentTable.getAbsoluteTableIdentifier, + LockUsage.METADATA_LOCK) - val property = properties.map(x => (x._1.trim, x._2.trim)).asJava - val javaMap = new java.util.HashMap[String, String](property) - javaMap.put(DataMapProperty.DEFERRED_REBUILD, deferredRebuild.toString) - javaMap.put(CarbonCommonConstants.INDEX_COLUMNS, indexModel.columnNames.mkString(",")) - dataMapSchema.setProperties(javaMap) + try { + if (metadataLock.lockWithRetries()) { + LOGGER.info(s"Acquired the metadata lock for table $dbName.$parentTableName") + // get carbon table again to reflect any changes during lock acquire. + parentTable = + CarbonEnv.getInstance(sparkSession).carbonMetaStore + .lookupRelation(Some(dbName), parentTableName)(sparkSession) + .asInstanceOf[CarbonRelation].carbonTable + if (parentTable == null) { + throw new MalformedIndexCommandException(errMsg) + } + val oldIndexMetaData = parentTable.getIndexMetadata + // check whether the column has index created already + if (null != oldIndexMetaData) { + val indexExistsInCarbon = oldIndexMetaData.getIndexTables.asScala.contains(indexName) + if (indexExistsInCarbon) { + throw new MalformedIndexCommandException( + "Index with name `" + indexName + "` already exists on table `" + parentTableName + + "`") + } + } + // set properties + indexSchema.setProperties(indexProperties) + provider = new IndexProvider(parentTable, indexSchema, sparkSession) - if (dataMapSchema.isIndex && parentTable == null) { - throw new MalformedIndexCommandException( - "To create index, main table is required. Use `CREATE INDEX ... ON TABLE ...` ") - } - provider = new IndexProvider(parentTable, dataMapSchema, sparkSession) - if (deferredRebuild && !provider.supportRebuild()) { - throw new MalformedIndexCommandException( - s"DEFERRED REFRESH is not supported on this index $indexName" + - s" with provider ${dataMapSchema.getProviderName}") - } + if (deferredRebuild && !provider.supportRebuild()) { + throw new MalformedIndexCommandException( + "DEFERRED REFRESH is not supported on this index " + indexModel.indexName + + " with provider " + indexProviderName) + } else if (deferredRebuild && provider.supportRebuild()) { + indexProperties.put(CarbonCommonConstants.INDEX_STATUS, IndexStatus.DISABLED.name()) + } - if (parentTable.isMVTable) { - throw new MalformedIndexCommandException( - "Cannot create index on MV table " + parentTable.getTableUniqueName) - } + val isBloomFilter = CarbonIndexProvider.BLOOMFILTER.getIndexProviderName + .equalsIgnoreCase(indexProviderName) - if (parentTable.isIndexTable) { - throw new MalformedIndexCommandException( - "Cannot create index on Secondary Index table") - } + val existingIndexColumn4ThisProvider = Review comment: *for ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] With regards, Apache Git Services |
In reply to this post by GitBox
ajantha-bhat commented on a change in pull request #3688: [CARBONDATA-3765] Refactor Index Metadata for CG and FG Indexes
URL: https://github.com/apache/carbondata/pull/3688#discussion_r405995246 ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/index/CarbonCreateIndexCommand.scala ########## @@ -67,128 +82,195 @@ case class CarbonCreateIndexCommand( throw new MalformedCarbonCommandException("Unsupported operation on non transactional table") } - if (DataMapStoreManager.getInstance().isDataMapExist(parentTable.getTableId, indexName)) { - if (!ifNotExistsSet) { - throw new MalformedIndexCommandException( - s"Index with name ${ indexName } on table " + - s"${parentTable.getDatabaseName}.${parentTable.getTableName} already exists") - } else { - return Seq.empty - } + if (parentTable.isMVTable || parentTable.isIndexTable) { + throw new MalformedIndexCommandException( + "Cannot create index on child table `" + indexName + "`") } if (CarbonUtil.getFormatVersion(parentTable) != ColumnarFormatVersion.V3) { - throw new MalformedCarbonCommandException(s"Unsupported operation on table with " + - s"V1 or V2 format data") + throw new MalformedCarbonCommandException( + s"Unsupported operation on table with V1 or V2 format data") } - dataMapSchema = new DataMapSchema(indexName, indexProviderName) + // get metadata lock to avoid concurrent create index operations + val metadataLock = CarbonLockFactory.getCarbonLockObj( + parentTable.getAbsoluteTableIdentifier, + LockUsage.METADATA_LOCK) - val property = properties.map(x => (x._1.trim, x._2.trim)).asJava - val javaMap = new java.util.HashMap[String, String](property) - javaMap.put(DataMapProperty.DEFERRED_REBUILD, deferredRebuild.toString) - javaMap.put(CarbonCommonConstants.INDEX_COLUMNS, indexModel.columnNames.mkString(",")) - dataMapSchema.setProperties(javaMap) + try { + if (metadataLock.lockWithRetries()) { + LOGGER.info(s"Acquired the metadata lock for table $dbName.$parentTableName") + // get carbon table again to reflect any changes during lock acquire. + parentTable = + CarbonEnv.getInstance(sparkSession).carbonMetaStore + .lookupRelation(Some(dbName), parentTableName)(sparkSession) + .asInstanceOf[CarbonRelation].carbonTable + if (parentTable == null) { + throw new MalformedIndexCommandException(errMsg) + } + val oldIndexMetaData = parentTable.getIndexMetadata + // check whether the column has index created already + if (null != oldIndexMetaData) { + val indexExistsInCarbon = oldIndexMetaData.getIndexTables.asScala.contains(indexName) + if (indexExistsInCarbon) { + throw new MalformedIndexCommandException( + "Index with name `" + indexName + "` already exists on table `" + parentTableName + + "`") + } + } + // set properties + indexSchema.setProperties(indexProperties) + provider = new IndexProvider(parentTable, indexSchema, sparkSession) - if (dataMapSchema.isIndex && parentTable == null) { - throw new MalformedIndexCommandException( - "To create index, main table is required. Use `CREATE INDEX ... ON TABLE ...` ") - } - provider = new IndexProvider(parentTable, dataMapSchema, sparkSession) - if (deferredRebuild && !provider.supportRebuild()) { - throw new MalformedIndexCommandException( - s"DEFERRED REFRESH is not supported on this index $indexName" + - s" with provider ${dataMapSchema.getProviderName}") - } + if (deferredRebuild && !provider.supportRebuild()) { + throw new MalformedIndexCommandException( + "DEFERRED REFRESH is not supported on this index " + indexModel.indexName + + " with provider " + indexProviderName) + } else if (deferredRebuild && provider.supportRebuild()) { + indexProperties.put(CarbonCommonConstants.INDEX_STATUS, IndexStatus.DISABLED.name()) + } - if (parentTable.isMVTable) { - throw new MalformedIndexCommandException( - "Cannot create index on MV table " + parentTable.getTableUniqueName) - } + val isBloomFilter = CarbonIndexProvider.BLOOMFILTER.getIndexProviderName Review comment: keep definition and usage together. move to line 152 ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] With regards, Apache Git Services |
In reply to this post by GitBox
ajantha-bhat commented on a change in pull request #3688: [CARBONDATA-3765] Refactor Index Metadata for CG and FG Indexes
URL: https://github.com/apache/carbondata/pull/3688#discussion_r405998756 ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/index/CarbonRefreshIndexCommand.scala ########## @@ -71,30 +80,81 @@ case class CarbonRefreshIndexCommand( private def refreshIndex( sparkSession: SparkSession, parentTable: CarbonTable, - indexOp: Optional[DataMapSchema]): Unit = { - val schema = indexOp.get + indexMetaData: IndexMetadata): Unit = { + var indexInfo: util.Map[String, String] = new util.HashMap[String, String]() + val allIndexesIterator = indexMetaData.getIndexesMap.entrySet().iterator() + breakable { + while (allIndexesIterator.hasNext) { + val currentIndex = allIndexesIterator.next() + if (!currentIndex.getKey.equalsIgnoreCase(CarbonIndexProvider.SI.getIndexProviderName)) { Review comment: If we are excluding SI everywhere, It is better to have separate map for each provider in `IndexMetadata`. access only Non SI maps. or you can directly lookup non SI providers ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] With regards, Apache Git Services |
In reply to this post by GitBox
ajantha-bhat commented on a change in pull request #3688: [CARBONDATA-3765] Refactor Index Metadata for CG and FG Indexes
URL: https://github.com/apache/carbondata/pull/3688#discussion_r405999033 ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/index/CarbonRefreshIndexCommand.scala ########## @@ -71,30 +80,81 @@ case class CarbonRefreshIndexCommand( private def refreshIndex( sparkSession: SparkSession, parentTable: CarbonTable, - indexOp: Optional[DataMapSchema]): Unit = { - val schema = indexOp.get + indexMetaData: IndexMetadata): Unit = { + var indexInfo: util.Map[String, String] = new util.HashMap[String, String]() + val allIndexesIterator = indexMetaData.getIndexesMap.entrySet().iterator() + breakable { + while (allIndexesIterator.hasNext) { + val currentIndex = allIndexesIterator.next() + if (!currentIndex.getKey.equalsIgnoreCase(CarbonIndexProvider.SI.getIndexProviderName)) { + val indexIterator = currentIndex.getValue.entrySet().iterator() + while (indexIterator.hasNext) { + val indexEntry = indexIterator.next() + if (indexEntry.getKey.equalsIgnoreCase(indexName)) { Review comment: it's map. why are you looping every element. Just check containsKey() no need of iterator here ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] With regards, Apache Git Services |
In reply to this post by GitBox
ajantha-bhat commented on a change in pull request #3688: [CARBONDATA-3765] Refactor Index Metadata for CG and FG Indexes
URL: https://github.com/apache/carbondata/pull/3688#discussion_r405999660 ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/index/CarbonRefreshIndexCommand.scala ########## @@ -71,30 +80,81 @@ case class CarbonRefreshIndexCommand( private def refreshIndex( sparkSession: SparkSession, parentTable: CarbonTable, - indexOp: Optional[DataMapSchema]): Unit = { - val schema = indexOp.get + indexMetaData: IndexMetadata): Unit = { + var indexInfo: util.Map[String, String] = new util.HashMap[String, String]() + val allIndexesIterator = indexMetaData.getIndexesMap.entrySet().iterator() + breakable { + while (allIndexesIterator.hasNext) { + val currentIndex = allIndexesIterator.next() + if (!currentIndex.getKey.equalsIgnoreCase(CarbonIndexProvider.SI.getIndexProviderName)) { + val indexIterator = currentIndex.getValue.entrySet().iterator() + while (indexIterator.hasNext) { + val indexEntry = indexIterator.next() + if (indexEntry.getKey.equalsIgnoreCase(indexName)) { + indexInfo = indexEntry.getValue + break() + } + } + } + } + } + if (indexInfo.isEmpty) { + throw new MalformedIndexCommandException( + "Index with name `" + indexName + "` is not present" + + "on table `" + parentTable.getTableName + "`") + } + val indexProviderName = indexInfo.get(CarbonCommonConstants.INDEX_PROVIDER) + val schema = new DataMapSchema(indexName, indexProviderName) + schema.setProperties(indexInfo) if (!schema.isLazy) { throw new MalformedIndexCommandException( s"Non-lazy index $indexName does not support manual refresh") } val provider = DataMapManager.get().getDataMapProvider(parentTable, schema, sparkSession) provider.rebuild() + // enable bloom or lucene index + // get metadata lock to avoid concurrent create index operations + val metadataLock = CarbonLockFactory.getCarbonLockObj( + parentTable.getAbsoluteTableIdentifier, + LockUsage.METADATA_LOCK) + try { + if (metadataLock.lockWithRetries()) { + LOGGER.info(s"Acquired the metadata lock for table " + + s"${ parentTable.getDatabaseName}.${ parentTable.getTableName }") + val oldIndexInfo = parentTable.getIndexInfo + val updatedIndexInfo = IndexTableInfo.enableIndex(oldIndexInfo, indexName) + + // set index information in parent table + val parentIndexMetadata = + IndexMetadata.deserialize(parentTable.getTableInfo.getFactTable.getTableProperties Review comment: we already deserialize and store it in parent carbon table indexMeta right ? why not directly use it ? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] With regards, Apache Git Services |
In reply to this post by GitBox
ajantha-bhat commented on a change in pull request #3688: [CARBONDATA-3765] Refactor Index Metadata for CG and FG Indexes
URL: https://github.com/apache/carbondata/pull/3688#discussion_r406002402 ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/index/ShowIndexesCommand.scala ########## @@ -54,69 +62,87 @@ case class ShowIndexesCommand( override def processData(sparkSession: SparkSession): Seq[Row] = { val carbonTable = CarbonEnv.getCarbonTable(dbNameOp, tableName)(sparkSession) setAuditTable(carbonTable) - getFileIndexInfo(carbonTable) ++ getSIInfo(sparkSession, carbonTable) - } - - // get info for 'index datamap' - private def getFileIndexInfo(carbonTable: CarbonTable): Seq[Row] = { - val indexes = DataMapStoreManager.getInstance().getDataMapSchemasOfTable(carbonTable).asScala - if (indexes != null && indexes.nonEmpty) { - indexes.map { index => - Row( - index.getDataMapName, - index.getProviderName, - index.getIndexColumns.mkString(","), - index.getPropertiesAsString, - index.getStatus.name(), - index.getSyncStatus - ) - } - } else { - Seq.empty - } + getIndexInfo(sparkSession, carbonTable) } - // get info for SI - private def getSIInfo(sparkSession: SparkSession, carbonTable: CarbonTable): Seq[Row] = { + private def getIndexInfo(sparkSession: SparkSession, carbonTable: CarbonTable): Seq[Row] = { CarbonInternalMetastore.refreshIndexInfo( carbonTable.getDatabaseName, tableName, carbonTable)(sparkSession) - val indexesMap = CarbonInternalScalaUtil.getIndexesMap(carbonTable) - if (null == indexesMap) { - throw new Exception("Secondary index information is not loaded in main table") - } - val indexTableMap = indexesMap.asScala - if (indexTableMap.nonEmpty) { - val indexList = indexTableMap.map { indexInfo => - try { - val isSITableEnabled = sparkSession.sessionState.catalog - .getTableMetadata(TableIdentifier(indexInfo._1, dbNameOp)).storage.properties - .getOrElse("isSITableEnabled", "true").equalsIgnoreCase("true") - if (isSITableEnabled) { - (indexInfo._1, indexInfo._2.asScala.mkString(","), "enabled") - } else { - (indexInfo._1, indexInfo._2.asScala.mkString(","), "disabled") + val indexesMap = CarbonIndexUtil.getIndexesMap(carbonTable) + if (null != indexesMap) { + val indexTableMap = indexesMap.asScala + if (indexTableMap.nonEmpty) { + val secondaryIndex = indexTableMap.get(CarbonIndexProvider.SI.getIndexProviderName) + var finalIndexList: Seq[(String, String, String, String, String, String)] = Seq.empty + + if (secondaryIndex.isDefined && null != secondaryIndex.get) { + val siIterator = secondaryIndex.get.entrySet().iterator() + while (siIterator.hasNext) { + val indexInfo = siIterator.next() + try { + val isSITableEnabled = sparkSession.sessionState.catalog + .getTableMetadata(TableIdentifier(indexInfo.getKey, dbNameOp)).storage.properties + .getOrElse("isSITableEnabled", "true").equalsIgnoreCase("true") + if (isSITableEnabled) { + finalIndexList = finalIndexList :+ + (indexInfo.getKey, "carbondata", indexInfo.getValue + .get(CarbonCommonConstants.INDEX_COLUMNS), "NA", "enabled", "NA") + } else { + finalIndexList = finalIndexList :+ + (indexInfo.getKey, "carbondata", indexInfo.getValue + .get(CarbonCommonConstants + .INDEX_COLUMNS), "NA", "disabled", "NA") + } + } catch { + case ex: Exception => + LOGGER.error(s"Access storage properties from hive failed for index table: ${ + indexInfo.getKey + }") + finalIndexList = finalIndexList :+ + (indexInfo.getKey, "carbondata", indexInfo.getValue + .get(CarbonCommonConstants.INDEX_COLUMNS), "NA", "UNKNOWN", "NA") + } + } + } + + indexesMap.asScala + .filter(map => !map._1.equalsIgnoreCase(CarbonIndexProvider.SI.getIndexProviderName)) Review comment: As mentioned above instead of 1 map, if we keep 3 map (each for it's index provider). No need filter logic ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] With regards, Apache Git Services |
In reply to this post by GitBox
ajantha-bhat commented on a change in pull request #3688: [CARBONDATA-3765] Refactor Index Metadata for CG and FG Indexes
URL: https://github.com/apache/carbondata/pull/3688#discussion_r406010654 ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/hive/CarbonInternalMetastore.scala ########## @@ -136,14 +137,16 @@ object CarbonInternalMetastore { def refreshIndexInfo(dbName: String, tableName: String, carbonTable: CarbonTable, needLock: Boolean = true)(sparkSession: SparkSession): Unit = { - val indexTableExists = CarbonInternalScalaUtil.isIndexTableExists(carbonTable) + val indexTableExists = CarbonIndexUtil.isIndexTableExists(carbonTable) Review comment: this is confusing can you add comment ? index is not an index table ? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] With regards, Apache Git Services |
In reply to this post by GitBox
ajantha-bhat commented on a change in pull request #3688: [CARBONDATA-3765] Refactor Index Metadata for CG and FG Indexes
URL: https://github.com/apache/carbondata/pull/3688#discussion_r406010654 ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/hive/CarbonInternalMetastore.scala ########## @@ -136,14 +137,16 @@ object CarbonInternalMetastore { def refreshIndexInfo(dbName: String, tableName: String, carbonTable: CarbonTable, needLock: Boolean = true)(sparkSession: SparkSession): Unit = { - val indexTableExists = CarbonInternalScalaUtil.isIndexTableExists(carbonTable) + val indexTableExists = CarbonIndexUtil.isIndexTableExists(carbonTable) Review comment: this is confusing can you add comment ? index is not an index table ? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] With regards, Apache Git Services |
In reply to this post by GitBox
ajantha-bhat commented on a change in pull request #3688: [CARBONDATA-3765] Refactor Index Metadata for CG and FG Indexes
URL: https://github.com/apache/carbondata/pull/3688#discussion_r406012114 ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/hive/CarbonInternalMetastore.scala ########## @@ -192,6 +226,42 @@ object CarbonInternalMetastore { LOGGER.error(e.getMessage) } } + if (null != indexExists) { + if (null != carbonTable && (null == indexExists || indexExists.toBoolean)) { Review comment: indexExists is not null, that is when it enters this line. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] With regards, Apache Git Services |
In reply to this post by GitBox
Indhumathi27 commented on a change in pull request #3688: [CARBONDATA-3765] Refactor Index Metadata for CG and FG Indexes
URL: https://github.com/apache/carbondata/pull/3688#discussion_r406015062 ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/index/ShowIndexesCommand.scala ########## @@ -54,69 +62,87 @@ case class ShowIndexesCommand( override def processData(sparkSession: SparkSession): Seq[Row] = { val carbonTable = CarbonEnv.getCarbonTable(dbNameOp, tableName)(sparkSession) setAuditTable(carbonTable) - getFileIndexInfo(carbonTable) ++ getSIInfo(sparkSession, carbonTable) - } - - // get info for 'index datamap' - private def getFileIndexInfo(carbonTable: CarbonTable): Seq[Row] = { - val indexes = DataMapStoreManager.getInstance().getDataMapSchemasOfTable(carbonTable).asScala - if (indexes != null && indexes.nonEmpty) { - indexes.map { index => - Row( - index.getDataMapName, - index.getProviderName, - index.getIndexColumns.mkString(","), - index.getPropertiesAsString, - index.getStatus.name(), - index.getSyncStatus - ) - } - } else { - Seq.empty - } + getIndexInfo(sparkSession, carbonTable) } - // get info for SI - private def getSIInfo(sparkSession: SparkSession, carbonTable: CarbonTable): Seq[Row] = { + private def getIndexInfo(sparkSession: SparkSession, carbonTable: CarbonTable): Seq[Row] = { CarbonInternalMetastore.refreshIndexInfo( carbonTable.getDatabaseName, tableName, carbonTable)(sparkSession) - val indexesMap = CarbonInternalScalaUtil.getIndexesMap(carbonTable) - if (null == indexesMap) { - throw new Exception("Secondary index information is not loaded in main table") - } - val indexTableMap = indexesMap.asScala - if (indexTableMap.nonEmpty) { - val indexList = indexTableMap.map { indexInfo => - try { - val isSITableEnabled = sparkSession.sessionState.catalog - .getTableMetadata(TableIdentifier(indexInfo._1, dbNameOp)).storage.properties - .getOrElse("isSITableEnabled", "true").equalsIgnoreCase("true") - if (isSITableEnabled) { - (indexInfo._1, indexInfo._2.asScala.mkString(","), "enabled") - } else { - (indexInfo._1, indexInfo._2.asScala.mkString(","), "disabled") + val indexesMap = CarbonIndexUtil.getIndexesMap(carbonTable) + if (null != indexesMap) { + val indexTableMap = indexesMap.asScala + if (indexTableMap.nonEmpty) { + val secondaryIndex = indexTableMap.get(CarbonIndexProvider.SI.getIndexProviderName) + var finalIndexList: Seq[(String, String, String, String, String, String)] = Seq.empty + + if (secondaryIndex.isDefined && null != secondaryIndex.get) { + val siIterator = secondaryIndex.get.entrySet().iterator() + while (siIterator.hasNext) { + val indexInfo = siIterator.next() + try { + val isSITableEnabled = sparkSession.sessionState.catalog + .getTableMetadata(TableIdentifier(indexInfo.getKey, dbNameOp)).storage.properties + .getOrElse("isSITableEnabled", "true").equalsIgnoreCase("true") + if (isSITableEnabled) { + finalIndexList = finalIndexList :+ + (indexInfo.getKey, "carbondata", indexInfo.getValue + .get(CarbonCommonConstants.INDEX_COLUMNS), "NA", "enabled", "NA") + } else { + finalIndexList = finalIndexList :+ + (indexInfo.getKey, "carbondata", indexInfo.getValue + .get(CarbonCommonConstants + .INDEX_COLUMNS), "NA", "disabled", "NA") + } + } catch { + case ex: Exception => + LOGGER.error(s"Access storage properties from hive failed for index table: ${ + indexInfo.getKey + }") + finalIndexList = finalIndexList :+ + (indexInfo.getKey, "carbondata", indexInfo.getValue + .get(CarbonCommonConstants.INDEX_COLUMNS), "NA", "UNKNOWN", "NA") + } + } + } + + indexesMap.asScala + .filter(map => !map._1.equalsIgnoreCase(CarbonIndexProvider.SI.getIndexProviderName)) Review comment: Instead of 1 Map, if we keep 3 Map, it will occupy more memory right ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] With regards, Apache Git Services |
In reply to this post by GitBox
ajantha-bhat commented on a change in pull request #3688: [CARBONDATA-3765] Refactor Index Metadata for CG and FG Indexes
URL: https://github.com/apache/carbondata/pull/3688#discussion_r406054448 ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/events/SILoadEventListenerForFailedSegments.scala ########## @@ -67,10 +67,11 @@ class SILoadEventListenerForFailedSegments extends OperationEventListener with L .deserialize(carbonTable.getTableInfo.getFactTable.getTableProperties .get(carbonTable.getCarbonTableIdentifier.getTableId)) val mainTableDetails = SegmentStatusManager.readLoadMetadata(carbonTable.getMetadataPath) + val providerName = CarbonIndexProvider.SI.getIndexProviderName Review comment: ```suggestion val secondaryIndexProviderName = CarbonIndexProvider.SI.getIndexProviderName ``` ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] With regards, Apache Git Services |
In reply to this post by GitBox
ajantha-bhat commented on a change in pull request #3688: [CARBONDATA-3765] Refactor Index Metadata for CG and FG Indexes
URL: https://github.com/apache/carbondata/pull/3688#discussion_r406054448 ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/events/SILoadEventListenerForFailedSegments.scala ########## @@ -67,10 +67,11 @@ class SILoadEventListenerForFailedSegments extends OperationEventListener with L .deserialize(carbonTable.getTableInfo.getFactTable.getTableProperties .get(carbonTable.getCarbonTableIdentifier.getTableId)) val mainTableDetails = SegmentStatusManager.readLoadMetadata(carbonTable.getMetadataPath) + val providerName = CarbonIndexProvider.SI.getIndexProviderName Review comment: ```suggestion val secondaryIndexProvider = CarbonIndexProvider.SI.getIndexProviderName ``` ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [hidden email] With regards, Apache Git Services |
Free forum by Nabble | Edit this page |