[GitHub] carbondata pull request #2847: [WIP]Support Gzip as column compressor

classic Classic list List threaded Threaded
64 messages Options
1234
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata issue #2847: [WIP]Support Gzip as column compressor

qiuchenjian-2
Github user CarbonDataQA commented on the issue:

    https://github.com/apache/carbondata/pull/2847
 
    Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder2.1/1634/



---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata issue #2847: [WIP]Support Gzip as column compressor

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:

    https://github.com/apache/carbondata/pull/2847
 
    Build Failed with Spark 2.2.1, Please check CI http://95.216.28.178:8080/job/ApacheCarbonPRBuilder1/1845/



---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata issue #2847: [WIP]Support Gzip as column compressor

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:

    https://github.com/apache/carbondata/pull/2847
 
    Build Failed  with Spark 2.3.1, Please check CI http://136.243.101.176:8080/job/carbondataprbuilder2.3/9894/



---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata issue #2847: [WIP]Support Gzip as column compressor

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:

    https://github.com/apache/carbondata/pull/2847
 
    Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder2.1/1641/



---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata issue #2847: [WIP]Support Gzip as column compressor

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:

    https://github.com/apache/carbondata/pull/2847
 
    Build Success with Spark 2.3.1, Please check CI http://136.243.101.176:8080/job/carbondataprbuilder2.3/9901/



---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata issue #2847: [WIP]Support Gzip as column compressor

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:

    https://github.com/apache/carbondata/pull/2847
 
    Build Success with Spark 2.2.1, Please check CI http://95.216.28.178:8080/job/ApacheCarbonPRBuilder1/1852/



---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2847: [WIP]Support Gzip as column compressor

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user shardul-cr7 commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2847#discussion_r238971644
 
    --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/compression/GzipCompressor.java ---
    @@ -0,0 +1,203 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *    http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.carbondata.core.datastore.compression;
    +
    +import java.io.ByteArrayInputStream;
    +import java.io.ByteArrayOutputStream;
    +import java.io.IOException;
    +import java.nio.ByteBuffer;
    +import java.nio.DoubleBuffer;
    +import java.nio.FloatBuffer;
    +import java.nio.IntBuffer;
    +import java.nio.LongBuffer;
    +import java.nio.ShortBuffer;
    +
    +import org.apache.carbondata.core.util.ByteUtil;
    +
    +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
    +import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream;
    +
    +public class GzipCompressor implements Compressor {
    +
    +  public GzipCompressor() {
    +  }
    +
    +  @Override public String getName() {
    +    return "gzip";
    +  }
    +
    +  /*
    +   * Method called for compressing the data and
    +   * return a byte array
    +   */
    +  private byte[] compressData(byte[] data) {
    +
    +    ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
    +    try {
    +      GzipCompressorOutputStream gzipCompressorOutputStream =
    +          new GzipCompressorOutputStream(byteArrayOutputStream);
    +      try {
    +        gzipCompressorOutputStream.write(data);
    +      } catch (IOException e) {
    +        throw new RuntimeException("Error during Compression step " + e.getMessage());
    +      } finally {
    +        gzipCompressorOutputStream.close();
    +      }
    +    } catch (IOException e) {
    +      throw new RuntimeException("Error during Compression step " + e.getMessage());
    +    }
    +
    +    return byteArrayOutputStream.toByteArray();
    +  }
    +
    +  /*
    +   * Method called for decompressing the data and
    +   * return a byte array
    +   */
    +  private byte[] decompressData(byte[] data) {
    +
    +    ByteArrayInputStream byteArrayOutputStream = new ByteArrayInputStream(data);
    +    ByteArrayOutputStream byteOutputStream = new ByteArrayOutputStream();
    +
    --- End diff --
   
    done!



---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2847: [WIP]Support Gzip as column compressor

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user shardul-cr7 commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2847#discussion_r238971669
 
    --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/compression/GzipCompressor.java ---
    @@ -0,0 +1,203 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *    http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.carbondata.core.datastore.compression;
    +
    +import java.io.ByteArrayInputStream;
    +import java.io.ByteArrayOutputStream;
    +import java.io.IOException;
    +import java.nio.ByteBuffer;
    +import java.nio.DoubleBuffer;
    +import java.nio.FloatBuffer;
    +import java.nio.IntBuffer;
    +import java.nio.LongBuffer;
    +import java.nio.ShortBuffer;
    +
    +import org.apache.carbondata.core.util.ByteUtil;
    +
    +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
    +import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream;
    +
    +public class GzipCompressor implements Compressor {
    +
    +  public GzipCompressor() {
    +  }
    +
    +  @Override public String getName() {
    +    return "gzip";
    +  }
    +
    +  /*
    +   * Method called for compressing the data and
    --- End diff --
   
    done!



---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata issue #2847: [WIP]Support Gzip as column compressor

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:

    https://github.com/apache/carbondata/pull/2847
 
    Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder2.1/1645/



---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata issue #2847: [WIP]Support Gzip as column compressor

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:

    https://github.com/apache/carbondata/pull/2847
 
    Build Success with Spark 2.3.1, Please check CI http://136.243.101.176:8080/job/carbondataprbuilder2.3/9905/



---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata issue #2847: [WIP]Support Gzip as column compressor

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:

    https://github.com/apache/carbondata/pull/2847
 
    Build Success with Spark 2.2.1, Please check CI http://95.216.28.178:8080/job/ApacheCarbonPRBuilder1/1856/



---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2847: [CARBONDATA-3005]Support Gzip as column compr...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user kumarvishal09 commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2847#discussion_r240097367
 
    --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/compression/GzipCompressor.java ---
    @@ -0,0 +1,138 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *    http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.carbondata.core.datastore.compression;
    +
    +import java.io.ByteArrayInputStream;
    +import java.io.ByteArrayOutputStream;
    +import java.io.IOException;
    +
    +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
    +import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream;
    +
    +/**
    + * Codec Class for performing Gzip Compression
    + */
    +public class GzipCompressor extends AbstractCompressor {
    +
    +  public GzipCompressor() {
    --- End diff --
   
    why this empty constructor is required ??


---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2847: [CARBONDATA-3005]Support Gzip as column compr...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user kumarvishal09 commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2847#discussion_r240098067
 
    --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/compression/CompressorFactory.java ---
    @@ -35,8 +35,8 @@
       private final Map<String, Compressor> allSupportedCompressors = new HashMap<>();
     
       public enum NativeSupportedCompressor {
    -    SNAPPY("snappy", SnappyCompressor.class),
    -    ZSTD("zstd", ZstdCompressor.class);
    +    SNAPPY("snappy", SnappyCompressor.class), ZSTD("zstd", ZstdCompressor.class), GZIP("gzip",
    --- End diff --
   
    Move each compressor to new line


---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2847: [CARBONDATA-3005]Support Gzip as column compr...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user kumarvishal09 commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2847#discussion_r240098824
 
    --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/compression/GzipCompressor.java ---
    @@ -0,0 +1,138 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *    http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.carbondata.core.datastore.compression;
    +
    +import java.io.ByteArrayInputStream;
    +import java.io.ByteArrayOutputStream;
    +import java.io.IOException;
    +
    +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
    +import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream;
    +
    +/**
    + * Codec Class for performing Gzip Compression
    + */
    +public class GzipCompressor extends AbstractCompressor {
    +
    +  public GzipCompressor() {
    +  }
    +
    +  @Override public String getName() {
    +    return "gzip";
    +  }
    +
    +  /**
    +   * This method takes the Byte Array data and Compresses in gzip format
    +   *
    +   * @param data Data Byte Array passed for compression
    +   * @return Compressed Byte Array
    +   */
    +  private byte[] compressData(byte[] data) {
    +    ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
    +    try {
    +      GzipCompressorOutputStream gzipCompressorOutputStream =
    +          new GzipCompressorOutputStream(byteArrayOutputStream);
    +      try {
    +        /**
    +         * Below api will write bytes from specified byte array to the gzipCompressorOutputStream
    +         * The output stream will compress the given byte array.
    +         */
    +        gzipCompressorOutputStream.write(data);
    +      } catch (IOException e) {
    +        throw new RuntimeException("Error during Compression step " + e.getMessage());
    +      } finally {
    +        gzipCompressorOutputStream.close();
    +      }
    +    } catch (IOException e) {
    +      throw new RuntimeException("Error during Compression step " + e.getMessage());
    +    }
    +    return byteArrayOutputStream.toByteArray();
    +  }
    +
    +  /**
    +   * This method takes the Byte Array data and Deompresses in gzip format
    +   *
    +   * @param data   Data Byte Array for Compression
    +   * @param offset Start value of Data Byte Array
    +   * @param length Size of Byte Array
    +   * @return
    +   */
    +  private byte[] decompressData(byte[] data, int offset, int length) {
    +    ByteArrayInputStream byteArrayOutputStream = new ByteArrayInputStream(data, offset, length);
    +    ByteArrayOutputStream byteOutputStream = new ByteArrayOutputStream();
    +    try {
    +      GzipCompressorInputStream gzipCompressorInputStream =
    +          new GzipCompressorInputStream(byteArrayOutputStream);
    +      byte[] buffer = new byte[1024];
    +      int len;
    +      /**
    +       * Reads the next byte of the data from the input stream and stores them into buffer
    +       * Data is then read from the buffer and put into byteOutputStream from a offset.
    +       */
    +      while ((len = gzipCompressorInputStream.read(buffer)) != -1) {
    +        byteOutputStream.write(buffer, 0, len);
    +      }
    +    } catch (IOException e) {
    +      throw new RuntimeException("Error during Decompression step " + e.getMessage());
    +    }
    +    return byteOutputStream.toByteArray();
    +  }
    +
    +  @Override public byte[] compressByte(byte[] unCompInput) {
    +    return compressData(unCompInput);
    +  }
    +
    +  @Override public byte[] compressByte(byte[] unCompInput, int byteSize) {
    +    return compressData(unCompInput);
    +  }
    +
    +  @Override public byte[] unCompressByte(byte[] compInput) {
    +    return decompressData(compInput, 0, compInput.length);
    +  }
    +
    +  @Override public byte[] unCompressByte(byte[] compInput, int offset, int length) {
    +    return decompressData(compInput, offset, length);
    +  }
    +
    +  @Override public long rawUncompress(byte[] input, byte[] output) {
    +    //gzip api doesnt have rawUncompress yet.
    +    throw new RuntimeException("Not implemented rawUcompress for gzip yet");
    +  }
    +
    +  @Override public long maxCompressedLength(long inputSize) {
    +    if (inputSize < Integer.MAX_VALUE) {
    +      return inputSize;
    +    } else {
    +      throw new RuntimeException("compress input oversize for gzip");
    +    }
    +  }
    +
    +  @Override public boolean supportUnsafe() {
    --- End diff --
   
    Please move this default implantation to AbstractCompressor and override only in SnappyCompressor class from other classes remove this implementation


---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2847: [CARBONDATA-3005]Support Gzip as column compr...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user kumarvishal09 commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2847#discussion_r240100410
 
    --- Diff: integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataWithCompression.scala ---
    @@ -168,6 +168,7 @@ class TestLoadDataWithCompression extends QueryTest with BeforeAndAfterEach with
       private val tableName = "load_test_with_compressor"
       private var executorService: ExecutorService = _
       private val csvDataDir = s"$integrationPath/spark2/target/csv_load_compression"
    +  private val compressors = Array("snappy","zstd","gzip")
    --- End diff --
   
    Please don't remove any test case add new testcase for Zstd


---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2847: [CARBONDATA-3005]Support Gzip as column compr...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user kumarvishal09 commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2847#discussion_r240100527
 
    --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/compression/GzipCompressor.java ---
    @@ -0,0 +1,138 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *    http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.carbondata.core.datastore.compression;
    +
    +import java.io.ByteArrayInputStream;
    +import java.io.ByteArrayOutputStream;
    +import java.io.IOException;
    +
    +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
    +import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream;
    +
    +/**
    + * Codec Class for performing Gzip Compression
    + */
    +public class GzipCompressor extends AbstractCompressor {
    +
    +  public GzipCompressor() {
    +  }
    +
    +  @Override public String getName() {
    +    return "gzip";
    +  }
    +
    +  /**
    +   * This method takes the Byte Array data and Compresses in gzip format
    +   *
    +   * @param data Data Byte Array passed for compression
    +   * @return Compressed Byte Array
    +   */
    +  private byte[] compressData(byte[] data) {
    +    ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
    +    try {
    +      GzipCompressorOutputStream gzipCompressorOutputStream =
    +          new GzipCompressorOutputStream(byteArrayOutputStream);
    +      try {
    +        /**
    +         * Below api will write bytes from specified byte array to the gzipCompressorOutputStream
    +         * The output stream will compress the given byte array.
    +         */
    +        gzipCompressorOutputStream.write(data);
    +      } catch (IOException e) {
    +        throw new RuntimeException("Error during Compression step " + e.getMessage());
    +      } finally {
    +        gzipCompressorOutputStream.close();
    +      }
    +    } catch (IOException e) {
    +      throw new RuntimeException("Error during Compression step " + e.getMessage());
    +    }
    +    return byteArrayOutputStream.toByteArray();
    +  }
    +
    +  /**
    +   * This method takes the Byte Array data and Deompresses in gzip format
    +   *
    +   * @param data   Data Byte Array for Compression
    +   * @param offset Start value of Data Byte Array
    +   * @param length Size of Byte Array
    +   * @return
    +   */
    +  private byte[] decompressData(byte[] data, int offset, int length) {
    +    ByteArrayInputStream byteArrayOutputStream = new ByteArrayInputStream(data, offset, length);
    +    ByteArrayOutputStream byteOutputStream = new ByteArrayOutputStream();
    +    try {
    +      GzipCompressorInputStream gzipCompressorInputStream =
    +          new GzipCompressorInputStream(byteArrayOutputStream);
    +      byte[] buffer = new byte[1024];
    +      int len;
    +      /**
    +       * Reads the next byte of the data from the input stream and stores them into buffer
    +       * Data is then read from the buffer and put into byteOutputStream from a offset.
    +       */
    +      while ((len = gzipCompressorInputStream.read(buffer)) != -1) {
    +        byteOutputStream.write(buffer, 0, len);
    +      }
    +    } catch (IOException e) {
    +      throw new RuntimeException("Error during Decompression step " + e.getMessage());
    +    }
    +    return byteOutputStream.toByteArray();
    +  }
    +
    +  @Override public byte[] compressByte(byte[] unCompInput) {
    +    return compressData(unCompInput);
    +  }
    +
    +  @Override public byte[] compressByte(byte[] unCompInput, int byteSize) {
    +    return compressData(unCompInput);
    +  }
    +
    +  @Override public byte[] unCompressByte(byte[] compInput) {
    +    return decompressData(compInput, 0, compInput.length);
    +  }
    +
    +  @Override public byte[] unCompressByte(byte[] compInput, int offset, int length) {
    +    return decompressData(compInput, offset, length);
    +  }
    +
    +  @Override public long rawUncompress(byte[] input, byte[] output) {
    +    //gzip api doesnt have rawUncompress yet.
    +    throw new RuntimeException("Not implemented rawUcompress for gzip yet");
    +  }
    +
    +  @Override public long maxCompressedLength(long inputSize) {
    +    if (inputSize < Integer.MAX_VALUE) {
    --- End diff --
   
    Please add some comments for this peace of code


---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2847: [CARBONDATA-3005]Support Gzip as column compr...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user shardul-cr7 commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2847#discussion_r240102212
 
    --- Diff: integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataWithCompression.scala ---
    @@ -168,6 +168,7 @@ class TestLoadDataWithCompression extends QueryTest with BeforeAndAfterEach with
       private val tableName = "load_test_with_compressor"
       private var executorService: ExecutorService = _
       private val csvDataDir = s"$integrationPath/spark2/target/csv_load_compression"
    +  private val compressors = Array("snappy","zstd","gzip")
    --- End diff --
   
    No test cases were removed. Just changed the test case name of "test with snappy and offheap" was changed to "test different compressors and offheap".


---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2847: [CARBONDATA-3005]Support Gzip as column compr...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user shardul-cr7 commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2847#discussion_r240130373
 
    --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/compression/GzipCompressor.java ---
    @@ -0,0 +1,138 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *    http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.carbondata.core.datastore.compression;
    +
    +import java.io.ByteArrayInputStream;
    +import java.io.ByteArrayOutputStream;
    +import java.io.IOException;
    +
    +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
    +import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream;
    +
    +/**
    + * Codec Class for performing Gzip Compression
    + */
    +public class GzipCompressor extends AbstractCompressor {
    +
    +  public GzipCompressor() {
    --- End diff --
   
    Removed.


---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2847: [CARBONDATA-3005]Support Gzip as column compr...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user shardul-cr7 commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2847#discussion_r240130469
 
    --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/compression/GzipCompressor.java ---
    @@ -0,0 +1,138 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *    http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.carbondata.core.datastore.compression;
    +
    +import java.io.ByteArrayInputStream;
    +import java.io.ByteArrayOutputStream;
    +import java.io.IOException;
    +
    +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
    +import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream;
    +
    +/**
    + * Codec Class for performing Gzip Compression
    + */
    +public class GzipCompressor extends AbstractCompressor {
    +
    +  public GzipCompressor() {
    +  }
    +
    +  @Override public String getName() {
    +    return "gzip";
    +  }
    +
    +  /**
    +   * This method takes the Byte Array data and Compresses in gzip format
    +   *
    +   * @param data Data Byte Array passed for compression
    +   * @return Compressed Byte Array
    +   */
    +  private byte[] compressData(byte[] data) {
    +    ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
    +    try {
    +      GzipCompressorOutputStream gzipCompressorOutputStream =
    +          new GzipCompressorOutputStream(byteArrayOutputStream);
    +      try {
    +        /**
    +         * Below api will write bytes from specified byte array to the gzipCompressorOutputStream
    +         * The output stream will compress the given byte array.
    +         */
    +        gzipCompressorOutputStream.write(data);
    +      } catch (IOException e) {
    +        throw new RuntimeException("Error during Compression step " + e.getMessage());
    +      } finally {
    +        gzipCompressorOutputStream.close();
    +      }
    +    } catch (IOException e) {
    +      throw new RuntimeException("Error during Compression step " + e.getMessage());
    +    }
    +    return byteArrayOutputStream.toByteArray();
    +  }
    +
    +  /**
    +   * This method takes the Byte Array data and Deompresses in gzip format
    +   *
    +   * @param data   Data Byte Array for Compression
    +   * @param offset Start value of Data Byte Array
    +   * @param length Size of Byte Array
    +   * @return
    +   */
    +  private byte[] decompressData(byte[] data, int offset, int length) {
    +    ByteArrayInputStream byteArrayOutputStream = new ByteArrayInputStream(data, offset, length);
    +    ByteArrayOutputStream byteOutputStream = new ByteArrayOutputStream();
    +    try {
    +      GzipCompressorInputStream gzipCompressorInputStream =
    +          new GzipCompressorInputStream(byteArrayOutputStream);
    +      byte[] buffer = new byte[1024];
    +      int len;
    +      /**
    +       * Reads the next byte of the data from the input stream and stores them into buffer
    +       * Data is then read from the buffer and put into byteOutputStream from a offset.
    +       */
    +      while ((len = gzipCompressorInputStream.read(buffer)) != -1) {
    +        byteOutputStream.write(buffer, 0, len);
    +      }
    +    } catch (IOException e) {
    +      throw new RuntimeException("Error during Decompression step " + e.getMessage());
    +    }
    +    return byteOutputStream.toByteArray();
    +  }
    +
    +  @Override public byte[] compressByte(byte[] unCompInput) {
    +    return compressData(unCompInput);
    +  }
    +
    +  @Override public byte[] compressByte(byte[] unCompInput, int byteSize) {
    +    return compressData(unCompInput);
    +  }
    +
    +  @Override public byte[] unCompressByte(byte[] compInput) {
    +    return decompressData(compInput, 0, compInput.length);
    +  }
    +
    +  @Override public byte[] unCompressByte(byte[] compInput, int offset, int length) {
    +    return decompressData(compInput, offset, length);
    +  }
    +
    +  @Override public long rawUncompress(byte[] input, byte[] output) {
    +    //gzip api doesnt have rawUncompress yet.
    +    throw new RuntimeException("Not implemented rawUcompress for gzip yet");
    +  }
    +
    +  @Override public long maxCompressedLength(long inputSize) {
    +    if (inputSize < Integer.MAX_VALUE) {
    --- End diff --
   
    Done.


---
Reply | Threaded
Open this post in threaded view
|

[GitHub] carbondata pull request #2847: [CARBONDATA-3005]Support Gzip as column compr...

qiuchenjian-2
In reply to this post by qiuchenjian-2
Github user shardul-cr7 commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2847#discussion_r240130514
 
    --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/compression/GzipCompressor.java ---
    @@ -0,0 +1,138 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *    http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.carbondata.core.datastore.compression;
    +
    +import java.io.ByteArrayInputStream;
    +import java.io.ByteArrayOutputStream;
    +import java.io.IOException;
    +
    +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
    +import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream;
    +
    +/**
    + * Codec Class for performing Gzip Compression
    + */
    +public class GzipCompressor extends AbstractCompressor {
    +
    +  public GzipCompressor() {
    +  }
    +
    +  @Override public String getName() {
    +    return "gzip";
    +  }
    +
    +  /**
    +   * This method takes the Byte Array data and Compresses in gzip format
    +   *
    +   * @param data Data Byte Array passed for compression
    +   * @return Compressed Byte Array
    +   */
    +  private byte[] compressData(byte[] data) {
    +    ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
    +    try {
    +      GzipCompressorOutputStream gzipCompressorOutputStream =
    +          new GzipCompressorOutputStream(byteArrayOutputStream);
    +      try {
    +        /**
    +         * Below api will write bytes from specified byte array to the gzipCompressorOutputStream
    +         * The output stream will compress the given byte array.
    +         */
    +        gzipCompressorOutputStream.write(data);
    +      } catch (IOException e) {
    +        throw new RuntimeException("Error during Compression step " + e.getMessage());
    +      } finally {
    +        gzipCompressorOutputStream.close();
    +      }
    +    } catch (IOException e) {
    +      throw new RuntimeException("Error during Compression step " + e.getMessage());
    +    }
    +    return byteArrayOutputStream.toByteArray();
    +  }
    +
    +  /**
    +   * This method takes the Byte Array data and Deompresses in gzip format
    +   *
    +   * @param data   Data Byte Array for Compression
    +   * @param offset Start value of Data Byte Array
    +   * @param length Size of Byte Array
    +   * @return
    +   */
    +  private byte[] decompressData(byte[] data, int offset, int length) {
    +    ByteArrayInputStream byteArrayOutputStream = new ByteArrayInputStream(data, offset, length);
    +    ByteArrayOutputStream byteOutputStream = new ByteArrayOutputStream();
    +    try {
    +      GzipCompressorInputStream gzipCompressorInputStream =
    +          new GzipCompressorInputStream(byteArrayOutputStream);
    +      byte[] buffer = new byte[1024];
    +      int len;
    +      /**
    +       * Reads the next byte of the data from the input stream and stores them into buffer
    +       * Data is then read from the buffer and put into byteOutputStream from a offset.
    +       */
    +      while ((len = gzipCompressorInputStream.read(buffer)) != -1) {
    +        byteOutputStream.write(buffer, 0, len);
    +      }
    +    } catch (IOException e) {
    +      throw new RuntimeException("Error during Decompression step " + e.getMessage());
    +    }
    +    return byteOutputStream.toByteArray();
    +  }
    +
    +  @Override public byte[] compressByte(byte[] unCompInput) {
    +    return compressData(unCompInput);
    +  }
    +
    +  @Override public byte[] compressByte(byte[] unCompInput, int byteSize) {
    +    return compressData(unCompInput);
    +  }
    +
    +  @Override public byte[] unCompressByte(byte[] compInput) {
    +    return decompressData(compInput, 0, compInput.length);
    +  }
    +
    +  @Override public byte[] unCompressByte(byte[] compInput, int offset, int length) {
    +    return decompressData(compInput, offset, length);
    +  }
    +
    +  @Override public long rawUncompress(byte[] input, byte[] output) {
    +    //gzip api doesnt have rawUncompress yet.
    +    throw new RuntimeException("Not implemented rawUcompress for gzip yet");
    +  }
    +
    +  @Override public long maxCompressedLength(long inputSize) {
    +    if (inputSize < Integer.MAX_VALUE) {
    +      return inputSize;
    +    } else {
    +      throw new RuntimeException("compress input oversize for gzip");
    +    }
    +  }
    +
    +  @Override public boolean supportUnsafe() {
    --- End diff --
   
    Done.


---
1234