[ https://issues.apache.org/jira/browse/CARBONDATA-300?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15595102#comment-15595102 ] ASF GitHub Bot commented on CARBONDATA-300: ------------------------------------------- Github user ravipesala commented on a diff in the pull request: https://github.com/apache/incubator-carbondata/pull/244#discussion_r84473500 --- Diff: processing/src/main/java/org/apache/carbondata/processing/newflow/encoding/impl/RowEncoderImpl.java --- @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.carbondata.processing.newflow.encoding.impl; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; + +import org.apache.carbondata.core.cache.Cache; +import org.apache.carbondata.core.cache.CacheProvider; +import org.apache.carbondata.core.cache.CacheType; +import org.apache.carbondata.core.cache.dictionary.Dictionary; +import org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier; +import org.apache.carbondata.core.constants.IgnoreDictionary; +import org.apache.carbondata.core.util.CarbonTimeStatisticsFactory; +import org.apache.carbondata.processing.newflow.CarbonDataLoadConfiguration; +import org.apache.carbondata.processing.newflow.DataField; +import org.apache.carbondata.processing.newflow.constants.DataLoadProcessorConstants; +import org.apache.carbondata.processing.newflow.encoding.FieldEncoder; +import org.apache.carbondata.processing.newflow.encoding.RowEncoder; +import org.apache.carbondata.processing.newflow.exception.CarbonDataLoadingException; +import org.apache.carbondata.processing.newflow.row.CarbonRow; +import org.apache.carbondata.processing.util.RemoveDictionaryUtil; + +/** + * + */ +public class RowEncoderImpl implements RowEncoder { + + private CarbonDataLoadConfiguration configuration; + + private AbstractDictionaryFieldEncoderImpl[] dictionaryFieldEncoders; + + private NonDictionaryFieldEncoderImpl[] nonDictionaryFieldEncoders; + + private MeasureFieldEncoderImpl[] measureFieldEncoders; + + public RowEncoderImpl(DataField[] fields, CarbonDataLoadConfiguration configuration) { + this.configuration = configuration; + CacheProvider cacheProvider = CacheProvider.getInstance(); + Cache<DictionaryColumnUniqueIdentifier, Dictionary> cache = cacheProvider + .createCache(CacheType.REVERSE_DICTIONARY, + configuration.getTableIdentifier().getStorePath()); + List<AbstractDictionaryFieldEncoderImpl> dictFieldEncoders = new ArrayList<>(); + List<NonDictionaryFieldEncoderImpl> nonDictFieldEncoders = new ArrayList<>(); + List<MeasureFieldEncoderImpl> measureFieldEncoderList = new ArrayList<>(); + + long lruCacheStartTime = System.currentTimeMillis(); + + for (int i = 0; i < fields.length; i++) { + FieldEncoder fieldEncoder = FieldEncoderFactory.getInstance() + .createFieldEncoder(fields[i], cache, + configuration.getTableIdentifier().getCarbonTableIdentifier(), i); + if (fieldEncoder instanceof AbstractDictionaryFieldEncoderImpl) { + dictFieldEncoders.add((AbstractDictionaryFieldEncoderImpl) fieldEncoder); + } else if (fieldEncoder instanceof NonDictionaryFieldEncoderImpl) { + nonDictFieldEncoders.add((NonDictionaryFieldEncoderImpl) fieldEncoder); + } else if (fieldEncoder instanceof MeasureFieldEncoderImpl) { + measureFieldEncoderList.add((MeasureFieldEncoderImpl)fieldEncoder); + } + } + CarbonTimeStatisticsFactory.getLoadStatisticsInstance() + .recordLruCacheLoadTime((System.currentTimeMillis() - lruCacheStartTime) / 1000.0); + dictionaryFieldEncoders = + dictFieldEncoders.toArray(new AbstractDictionaryFieldEncoderImpl[dictFieldEncoders.size()]); + nonDictionaryFieldEncoders = nonDictFieldEncoders + .toArray(new NonDictionaryFieldEncoderImpl[nonDictFieldEncoders.size()]); + measureFieldEncoders = measureFieldEncoderList + .toArray(new MeasureFieldEncoderImpl[measureFieldEncoderList.size()]); + + } + + @Override public CarbonRow encode(CarbonRow row) throws CarbonDataLoadingException { --- End diff -- Ok. Here I will remove it and moved to sort step to keep sort preparation at one place. > 5. Add EncodeProcessorStep which encodes the data with dictionary. > ------------------------------------------------------------------ > > Key: CARBONDATA-300 > URL: https://issues.apache.org/jira/browse/CARBONDATA-300 > Project: CarbonData > Issue Type: Sub-task > Reporter: Ravindra Pesala > Assignee: Ravindra Pesala > Fix For: 0.2.0-incubating > > > Add EncodeProcessorStep which encodes the data with dictionary.This dictionary can be obtained from dictionary interface. -- This message was sent by Atlassian JIRA (v6.3.4#6332) |
Free forum by Nabble | Edit this page |