Apache CarbonData Dev Mailing List archive › Apache CarbonData JIRA issues

[GitHub] carbondata pull request #2847: [WIP]Support Gzip as column compressor

Classic

List

Threaded

64 messages Options

1234

qiuchenjian-2

[GitHub] carbondata issue #2847: [WIP]Support Gzip as column compressor

Github user CarbonDataQA commented on the issue:

https://github.com/apache/carbondata/pull/2847

Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder2.1/1634/

---

qiuchenjian-2

[GitHub] carbondata issue #2847: [WIP]Support Gzip as column compressor

In reply to this post by qiuchenjian-2

Github user CarbonDataQA commented on the issue:

https://github.com/apache/carbondata/pull/2847

Build Failed with Spark 2.2.1, Please check CI http://95.216.28.178:8080/job/ApacheCarbonPRBuilder1/1845/

---

qiuchenjian-2

[GitHub] carbondata issue #2847: [WIP]Support Gzip as column compressor

In reply to this post by qiuchenjian-2

Github user CarbonDataQA commented on the issue:

https://github.com/apache/carbondata/pull/2847

Build Failed with Spark 2.3.1, Please check CI http://136.243.101.176:8080/job/carbondataprbuilder2.3/9894/

---

qiuchenjian-2

[GitHub] carbondata issue #2847: [WIP]Support Gzip as column compressor

In reply to this post by qiuchenjian-2

qiuchenjian-2

[GitHub] carbondata issue #2847: [WIP]Support Gzip as column compressor

In reply to this post by qiuchenjian-2

Github user CarbonDataQA commented on the issue:

https://github.com/apache/carbondata/pull/2847

Build Success with Spark 2.3.1, Please check CI http://136.243.101.176:8080/job/carbondataprbuilder2.3/9901/

---

qiuchenjian-2

[GitHub] carbondata issue #2847: [WIP]Support Gzip as column compressor

In reply to this post by qiuchenjian-2

Github user CarbonDataQA commented on the issue:

https://github.com/apache/carbondata/pull/2847

Build Success with Spark 2.2.1, Please check CI http://95.216.28.178:8080/job/ApacheCarbonPRBuilder1/1852/

---

qiuchenjian-2

[GitHub] carbondata pull request #2847: [WIP]Support Gzip as column compressor

In reply to this post by qiuchenjian-2

Github user shardul-cr7 commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2847#discussion_r238971644

--- Diff: core/src/main/java/org/apache/carbondata/core/datastore/compression/GzipCompressor.java ---
@@ -0,0 +1,203 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.core.datastore.compression;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.DoubleBuffer;
+import java.nio.FloatBuffer;
+import java.nio.IntBuffer;
+import java.nio.LongBuffer;
+import java.nio.ShortBuffer;
+
+import org.apache.carbondata.core.util.ByteUtil;
+
+import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
+import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream;
+
+public class GzipCompressor implements Compressor {
+
+ public GzipCompressor() {
+ }
+
+ @Override public String getName() {
+ return "gzip";
+ }
+
+ /*
+ * Method called for compressing the data and
+ * return a byte array
+ */
+ private byte[] compressData(byte[] data) {
+
+ ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
+ try {
+ GzipCompressorOutputStream gzipCompressorOutputStream =
+ new GzipCompressorOutputStream(byteArrayOutputStream);
+ try {
+ gzipCompressorOutputStream.write(data);
+ } catch (IOException e) {
+ throw new RuntimeException("Error during Compression step " + e.getMessage());
+ } finally {
+ gzipCompressorOutputStream.close();
+ }
+ } catch (IOException e) {
+ throw new RuntimeException("Error during Compression step " + e.getMessage());
+ }
+
+ return byteArrayOutputStream.toByteArray();
+ }
+
+ /*
+ * Method called for decompressing the data and
+ * return a byte array
+ */
+ private byte[] decompressData(byte[] data) {
+
+ ByteArrayInputStream byteArrayOutputStream = new ByteArrayInputStream(data);
+ ByteArrayOutputStream byteOutputStream = new ByteArrayOutputStream();
+
--- End diff --

done!

---

qiuchenjian-2

[GitHub] carbondata pull request #2847: [WIP]Support Gzip as column compressor

In reply to this post by qiuchenjian-2

Github user shardul-cr7 commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2847#discussion_r238971669

--- Diff: core/src/main/java/org/apache/carbondata/core/datastore/compression/GzipCompressor.java ---
@@ -0,0 +1,203 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.core.datastore.compression;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.DoubleBuffer;
+import java.nio.FloatBuffer;
+import java.nio.IntBuffer;
+import java.nio.LongBuffer;
+import java.nio.ShortBuffer;
+
+import org.apache.carbondata.core.util.ByteUtil;
+
+import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
+import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream;
+
+public class GzipCompressor implements Compressor {
+
+ public GzipCompressor() {
+ }
+
+ @Override public String getName() {
+ return "gzip";
+ }
+
+ /*
+ * Method called for compressing the data and
--- End diff --

done!

---

qiuchenjian-2

[GitHub] carbondata issue #2847: [WIP]Support Gzip as column compressor

In reply to this post by qiuchenjian-2

qiuchenjian-2

[GitHub] carbondata issue #2847: [WIP]Support Gzip as column compressor

In reply to this post by qiuchenjian-2

qiuchenjian-2

[GitHub] carbondata issue #2847: [WIP]Support Gzip as column compressor

In reply to this post by qiuchenjian-2

qiuchenjian-2

[GitHub] carbondata pull request #2847: [CARBONDATA-3005]Support Gzip as column compr...

In reply to this post by qiuchenjian-2

Github user kumarvishal09 commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2847#discussion_r240097367

--- Diff: core/src/main/java/org/apache/carbondata/core/datastore/compression/GzipCompressor.java ---
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.core.datastore.compression;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+
+import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
+import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream;
+
+/**
+ * Codec Class for performing Gzip Compression
+ */
+public class GzipCompressor extends AbstractCompressor {
+
+ public GzipCompressor() {
--- End diff --

why this empty constructor is required ??

---

qiuchenjian-2

[GitHub] carbondata pull request #2847: [CARBONDATA-3005]Support Gzip as column compr...

In reply to this post by qiuchenjian-2

Github user kumarvishal09 commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2847#discussion_r240098067

--- Diff: core/src/main/java/org/apache/carbondata/core/datastore/compression/CompressorFactory.java ---
@@ -35,8 +35,8 @@
private final Map<String, Compressor> allSupportedCompressors = new HashMap<>();

public enum NativeSupportedCompressor {
- SNAPPY("snappy", SnappyCompressor.class),
- ZSTD("zstd", ZstdCompressor.class);
+ SNAPPY("snappy", SnappyCompressor.class), ZSTD("zstd", ZstdCompressor.class), GZIP("gzip",
--- End diff --

Move each compressor to new line

---

qiuchenjian-2

[GitHub] carbondata pull request #2847: [CARBONDATA-3005]Support Gzip as column compr...

In reply to this post by qiuchenjian-2

Github user kumarvishal09 commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2847#discussion_r240098824

--- Diff: core/src/main/java/org/apache/carbondata/core/datastore/compression/GzipCompressor.java ---
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.core.datastore.compression;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+
+import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
+import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream;
+
+/**
+ * Codec Class for performing Gzip Compression
+ */
+public class GzipCompressor extends AbstractCompressor {
+
+ public GzipCompressor() {
+ }
+
+ @Override public String getName() {
+ return "gzip";
+ }
+
+ /**
+ * This method takes the Byte Array data and Compresses in gzip format
+ *
+ * @param data Data Byte Array passed for compression
+ * @return Compressed Byte Array
+ */
+ private byte[] compressData(byte[] data) {
+ ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
+ try {
+ GzipCompressorOutputStream gzipCompressorOutputStream =
+ new GzipCompressorOutputStream(byteArrayOutputStream);
+ try {
+ /**
+ * Below api will write bytes from specified byte array to the gzipCompressorOutputStream
+ * The output stream will compress the given byte array.
+ */
+ gzipCompressorOutputStream.write(data);
+ } catch (IOException e) {
+ throw new RuntimeException("Error during Compression step " + e.getMessage());
+ } finally {
+ gzipCompressorOutputStream.close();
+ }
+ } catch (IOException e) {
+ throw new RuntimeException("Error during Compression step " + e.getMessage());
+ }
+ return byteArrayOutputStream.toByteArray();
+ }
+
+ /**
+ * This method takes the Byte Array data and Deompresses in gzip format
+ *
+ * @param data Data Byte Array for Compression
+ * @param offset Start value of Data Byte Array
+ * @param length Size of Byte Array
+ * @return
+ */
+ private byte[] decompressData(byte[] data, int offset, int length) {
+ ByteArrayInputStream byteArrayOutputStream = new ByteArrayInputStream(data, offset, length);
+ ByteArrayOutputStream byteOutputStream = new ByteArrayOutputStream();
+ try {
+ GzipCompressorInputStream gzipCompressorInputStream =
+ new GzipCompressorInputStream(byteArrayOutputStream);
+ byte[] buffer = new byte[1024];
+ int len;
+ /**
+ * Reads the next byte of the data from the input stream and stores them into buffer
+ * Data is then read from the buffer and put into byteOutputStream from a offset.
+ */
+ while ((len = gzipCompressorInputStream.read(buffer)) != -1) {
+ byteOutputStream.write(buffer, 0, len);
+ }
+ } catch (IOException e) {
+ throw new RuntimeException("Error during Decompression step " + e.getMessage());
+ }
+ return byteOutputStream.toByteArray();
+ }
+
+ @Override public byte[] compressByte(byte[] unCompInput) {
+ return compressData(unCompInput);
+ }
+
+ @Override public byte[] compressByte(byte[] unCompInput, int byteSize) {
+ return compressData(unCompInput);
+ }
+
+ @Override public byte[] unCompressByte(byte[] compInput) {
+ return decompressData(compInput, 0, compInput.length);
+ }
+
+ @Override public byte[] unCompressByte(byte[] compInput, int offset, int length) {
+ return decompressData(compInput, offset, length);
+ }
+
+ @Override public long rawUncompress(byte[] input, byte[] output) {
+ //gzip api doesnt have rawUncompress yet.
+ throw new RuntimeException("Not implemented rawUcompress for gzip yet");
+ }
+
+ @Override public long maxCompressedLength(long inputSize) {
+ if (inputSize < Integer.MAX_VALUE) {
+ return inputSize;
+ } else {
+ throw new RuntimeException("compress input oversize for gzip");
+ }
+ }
+
+ @Override public boolean supportUnsafe() {
--- End diff --

Please move this default implantation to AbstractCompressor and override only in SnappyCompressor class from other classes remove this implementation

---

qiuchenjian-2

[GitHub] carbondata pull request #2847: [CARBONDATA-3005]Support Gzip as column compr...

In reply to this post by qiuchenjian-2

Github user kumarvishal09 commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2847#discussion_r240100410

--- Diff: integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataWithCompression.scala ---
@@ -168,6 +168,7 @@ class TestLoadDataWithCompression extends QueryTest with BeforeAndAfterEach with
private val tableName = "load_test_with_compressor"
private var executorService: ExecutorService = _
private val csvDataDir = s"$integrationPath/spark2/target/csv_load_compression"
+ private val compressors = Array("snappy","zstd","gzip")
--- End diff --

Please don't remove any test case add new testcase for Zstd

---

qiuchenjian-2

[GitHub] carbondata pull request #2847: [CARBONDATA-3005]Support Gzip as column compr...

In reply to this post by qiuchenjian-2

Github user kumarvishal09 commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2847#discussion_r240100527

--- Diff: core/src/main/java/org/apache/carbondata/core/datastore/compression/GzipCompressor.java ---
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.core.datastore.compression;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+
+import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
+import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream;
+
+/**
+ * Codec Class for performing Gzip Compression
+ */
+public class GzipCompressor extends AbstractCompressor {
+
+ public GzipCompressor() {
+ }
+
+ @Override public String getName() {
+ return "gzip";
+ }
+
+ /**
+ * This method takes the Byte Array data and Compresses in gzip format
+ *
+ * @param data Data Byte Array passed for compression
+ * @return Compressed Byte Array
+ */
+ private byte[] compressData(byte[] data) {
+ ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
+ try {
+ GzipCompressorOutputStream gzipCompressorOutputStream =
+ new GzipCompressorOutputStream(byteArrayOutputStream);
+ try {
+ /**
+ * Below api will write bytes from specified byte array to the gzipCompressorOutputStream
+ * The output stream will compress the given byte array.
+ */
+ gzipCompressorOutputStream.write(data);
+ } catch (IOException e) {
+ throw new RuntimeException("Error during Compression step " + e.getMessage());
+ } finally {
+ gzipCompressorOutputStream.close();
+ }
+ } catch (IOException e) {
+ throw new RuntimeException("Error during Compression step " + e.getMessage());
+ }
+ return byteArrayOutputStream.toByteArray();
+ }
+
+ /**
+ * This method takes the Byte Array data and Deompresses in gzip format
+ *
+ * @param data Data Byte Array for Compression
+ * @param offset Start value of Data Byte Array
+ * @param length Size of Byte Array
+ * @return
+ */
+ private byte[] decompressData(byte[] data, int offset, int length) {
+ ByteArrayInputStream byteArrayOutputStream = new ByteArrayInputStream(data, offset, length);
+ ByteArrayOutputStream byteOutputStream = new ByteArrayOutputStream();
+ try {
+ GzipCompressorInputStream gzipCompressorInputStream =
+ new GzipCompressorInputStream(byteArrayOutputStream);
+ byte[] buffer = new byte[1024];
+ int len;
+ /**
+ * Reads the next byte of the data from the input stream and stores them into buffer
+ * Data is then read from the buffer and put into byteOutputStream from a offset.
+ */
+ while ((len = gzipCompressorInputStream.read(buffer)) != -1) {
+ byteOutputStream.write(buffer, 0, len);
+ }
+ } catch (IOException e) {
+ throw new RuntimeException("Error during Decompression step " + e.getMessage());
+ }
+ return byteOutputStream.toByteArray();
+ }
+
+ @Override public byte[] compressByte(byte[] unCompInput) {
+ return compressData(unCompInput);
+ }
+
+ @Override public byte[] compressByte(byte[] unCompInput, int byteSize) {
+ return compressData(unCompInput);
+ }
+
+ @Override public byte[] unCompressByte(byte[] compInput) {
+ return decompressData(compInput, 0, compInput.length);
+ }
+
+ @Override public byte[] unCompressByte(byte[] compInput, int offset, int length) {
+ return decompressData(compInput, offset, length);
+ }
+
+ @Override public long rawUncompress(byte[] input, byte[] output) {
+ //gzip api doesnt have rawUncompress yet.
+ throw new RuntimeException("Not implemented rawUcompress for gzip yet");
+ }
+
+ @Override public long maxCompressedLength(long inputSize) {
+ if (inputSize < Integer.MAX_VALUE) {
--- End diff --

Please add some comments for this peace of code

---

qiuchenjian-2

[GitHub] carbondata pull request #2847: [CARBONDATA-3005]Support Gzip as column compr...

In reply to this post by qiuchenjian-2

Github user shardul-cr7 commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2847#discussion_r240102212

--- Diff: integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataWithCompression.scala ---
@@ -168,6 +168,7 @@ class TestLoadDataWithCompression extends QueryTest with BeforeAndAfterEach with
private val tableName = "load_test_with_compressor"
private var executorService: ExecutorService = _
private val csvDataDir = s"$integrationPath/spark2/target/csv_load_compression"
+ private val compressors = Array("snappy","zstd","gzip")
--- End diff --

No test cases were removed. Just changed the test case name of "test with snappy and offheap" was changed to "test different compressors and offheap".

---

qiuchenjian-2

[GitHub] carbondata pull request #2847: [CARBONDATA-3005]Support Gzip as column compr...

In reply to this post by qiuchenjian-2

Github user shardul-cr7 commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2847#discussion_r240130373

--- Diff: core/src/main/java/org/apache/carbondata/core/datastore/compression/GzipCompressor.java ---
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.core.datastore.compression;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+
+import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
+import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream;
+
+/**
+ * Codec Class for performing Gzip Compression
+ */
+public class GzipCompressor extends AbstractCompressor {
+
+ public GzipCompressor() {
--- End diff --

Removed.

---

qiuchenjian-2

[GitHub] carbondata pull request #2847: [CARBONDATA-3005]Support Gzip as column compr...

In reply to this post by qiuchenjian-2

Github user shardul-cr7 commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2847#discussion_r240130469

--- Diff: core/src/main/java/org/apache/carbondata/core/datastore/compression/GzipCompressor.java ---
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.core.datastore.compression;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+
+import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
+import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream;
+
+/**
+ * Codec Class for performing Gzip Compression
+ */
+public class GzipCompressor extends AbstractCompressor {
+
+ public GzipCompressor() {
+ }
+
+ @Override public String getName() {
+ return "gzip";
+ }
+
+ /**
+ * This method takes the Byte Array data and Compresses in gzip format
+ *
+ * @param data Data Byte Array passed for compression
+ * @return Compressed Byte Array
+ */
+ private byte[] compressData(byte[] data) {
+ ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
+ try {
+ GzipCompressorOutputStream gzipCompressorOutputStream =
+ new GzipCompressorOutputStream(byteArrayOutputStream);
+ try {
+ /**
+ * Below api will write bytes from specified byte array to the gzipCompressorOutputStream
+ * The output stream will compress the given byte array.
+ */
+ gzipCompressorOutputStream.write(data);
+ } catch (IOException e) {
+ throw new RuntimeException("Error during Compression step " + e.getMessage());
+ } finally {
+ gzipCompressorOutputStream.close();
+ }
+ } catch (IOException e) {
+ throw new RuntimeException("Error during Compression step " + e.getMessage());
+ }
+ return byteArrayOutputStream.toByteArray();
+ }
+
+ /**
+ * This method takes the Byte Array data and Deompresses in gzip format
+ *
+ * @param data Data Byte Array for Compression
+ * @param offset Start value of Data Byte Array
+ * @param length Size of Byte Array
+ * @return
+ */
+ private byte[] decompressData(byte[] data, int offset, int length) {
+ ByteArrayInputStream byteArrayOutputStream = new ByteArrayInputStream(data, offset, length);
+ ByteArrayOutputStream byteOutputStream = new ByteArrayOutputStream();
+ try {
+ GzipCompressorInputStream gzipCompressorInputStream =
+ new GzipCompressorInputStream(byteArrayOutputStream);
+ byte[] buffer = new byte[1024];
+ int len;
+ /**
+ * Reads the next byte of the data from the input stream and stores them into buffer
+ * Data is then read from the buffer and put into byteOutputStream from a offset.
+ */
+ while ((len = gzipCompressorInputStream.read(buffer)) != -1) {
+ byteOutputStream.write(buffer, 0, len);
+ }
+ } catch (IOException e) {
+ throw new RuntimeException("Error during Decompression step " + e.getMessage());
+ }
+ return byteOutputStream.toByteArray();
+ }
+
+ @Override public byte[] compressByte(byte[] unCompInput) {
+ return compressData(unCompInput);
+ }
+
+ @Override public byte[] compressByte(byte[] unCompInput, int byteSize) {
+ return compressData(unCompInput);
+ }
+
+ @Override public byte[] unCompressByte(byte[] compInput) {
+ return decompressData(compInput, 0, compInput.length);
+ }
+
+ @Override public byte[] unCompressByte(byte[] compInput, int offset, int length) {
+ return decompressData(compInput, offset, length);
+ }
+
+ @Override public long rawUncompress(byte[] input, byte[] output) {
+ //gzip api doesnt have rawUncompress yet.
+ throw new RuntimeException("Not implemented rawUcompress for gzip yet");
+ }
+
+ @Override public long maxCompressedLength(long inputSize) {
+ if (inputSize < Integer.MAX_VALUE) {
--- End diff --

Done.

---

qiuchenjian-2

[GitHub] carbondata pull request #2847: [CARBONDATA-3005]Support Gzip as column compr...

In reply to this post by qiuchenjian-2

Github user shardul-cr7 commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2847#discussion_r240130514

--- Diff: core/src/main/java/org/apache/carbondata/core/datastore/compression/GzipCompressor.java ---
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.core.datastore.compression;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+
+import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
+import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream;
+
+/**
+ * Codec Class for performing Gzip Compression
+ */
+public class GzipCompressor extends AbstractCompressor {
+
+ public GzipCompressor() {
+ }
+
+ @Override public String getName() {
+ return "gzip";
+ }
+
+ /**
+ * This method takes the Byte Array data and Compresses in gzip format
+ *
+ * @param data Data Byte Array passed for compression
+ * @return Compressed Byte Array
+ */
+ private byte[] compressData(byte[] data) {
+ ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
+ try {
+ GzipCompressorOutputStream gzipCompressorOutputStream =
+ new GzipCompressorOutputStream(byteArrayOutputStream);
+ try {
+ /**
+ * Below api will write bytes from specified byte array to the gzipCompressorOutputStream
+ * The output stream will compress the given byte array.
+ */
+ gzipCompressorOutputStream.write(data);
+ } catch (IOException e) {
+ throw new RuntimeException("Error during Compression step " + e.getMessage());
+ } finally {
+ gzipCompressorOutputStream.close();
+ }
+ } catch (IOException e) {
+ throw new RuntimeException("Error during Compression step " + e.getMessage());
+ }
+ return byteArrayOutputStream.toByteArray();
+ }
+
+ /**
+ * This method takes the Byte Array data and Deompresses in gzip format
+ *
+ * @param data Data Byte Array for Compression
+ * @param offset Start value of Data Byte Array
+ * @param length Size of Byte Array
+ * @return
+ */
+ private byte[] decompressData(byte[] data, int offset, int length) {
+ ByteArrayInputStream byteArrayOutputStream = new ByteArrayInputStream(data, offset, length);
+ ByteArrayOutputStream byteOutputStream = new ByteArrayOutputStream();
+ try {
+ GzipCompressorInputStream gzipCompressorInputStream =
+ new GzipCompressorInputStream(byteArrayOutputStream);
+ byte[] buffer = new byte[1024];
+ int len;
+ /**
+ * Reads the next byte of the data from the input stream and stores them into buffer
+ * Data is then read from the buffer and put into byteOutputStream from a offset.
+ */
+ while ((len = gzipCompressorInputStream.read(buffer)) != -1) {
+ byteOutputStream.write(buffer, 0, len);
+ }
+ } catch (IOException e) {
+ throw new RuntimeException("Error during Decompression step " + e.getMessage());
+ }
+ return byteOutputStream.toByteArray();
+ }
+
+ @Override public byte[] compressByte(byte[] unCompInput) {
+ return compressData(unCompInput);
+ }
+
+ @Override public byte[] compressByte(byte[] unCompInput, int byteSize) {
+ return compressData(unCompInput);
+ }
+
+ @Override public byte[] unCompressByte(byte[] compInput) {
+ return decompressData(compInput, 0, compInput.length);
+ }
+
+ @Override public byte[] unCompressByte(byte[] compInput, int offset, int length) {
+ return decompressData(compInput, offset, length);
+ }
+
+ @Override public long rawUncompress(byte[] input, byte[] output) {
+ //gzip api doesnt have rawUncompress yet.
+ throw new RuntimeException("Not implemented rawUcompress for gzip yet");
+ }
+
+ @Override public long maxCompressedLength(long inputSize) {
+ if (inputSize < Integer.MAX_VALUE) {
+ return inputSize;
+ } else {
+ throw new RuntimeException("compress input oversize for gzip");
+ }
+ }
+
+ @Override public boolean supportUnsafe() {
--- End diff --

Done.

---

1234