GitHub user shardul-cr7 opened a pull request:
https://github.com/apache/carbondata/pull/2847 [WIP]Support Gzip as column compressor Gzip compressed file size is less than that of snappy but takes more time. Data generated by tpch-dbgen(lineitem) **Load Performance Comparisons (Compression)** *Test Case 1* *File Size 3.9G* *Records ~30M* | Codec Used | Load Time | File Size After Load | | ------ | ------ | ------ | | Snappy | 156s | 101M | Zstd| 153s | 2.2M | Gzip| 163s | 12.1M *Test Case 2* *File Size 7.8G* *Records ~60M* | Codec Used | Load Time | File Size After Load | | ------ | ------ | ------ | | Snappy | 336s | 203.6M | Zstd| 352s | 4.3M | Gzip| 354s | 12.1M **Query Performance (Decompression)** *Test Case 1* | Codec Used | Full Scan Time | ------ | ------ | Snappy | 16.108s | Zstd| 14.595s | Gzip| 14.313s *Test Case 2* | Codec Used | Full Scan Time | ------ | ------ | Snappy | 23.559s | Zstd| 23.913s | Gzip| 26.741s Be sure to do all of the following checklist to help us incorporate your contribution quickly and easily: - [ ] Any interfaces changed? - [ ] Any backward compatibility impacted? - [ ] Document update required? - [x] Testing done added some testcases - [ ] For large changes, please consider breaking it into sub-tasks under an umbrella JIRA. You can merge this pull request into a Git repository by running: $ git pull https://github.com/shardul-cr7/carbondata b010 Alternatively you can review and apply these changes as the patch at: https://github.com/apache/carbondata/pull/2847.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #2847 ---- commit 6ad88ccc5663353d16372d91878d7efb223b16d6 Author: shardul-cr7 <shardulsingh22@...> Date: 2018-10-23T11:57:47Z [WIP]Support Gzip ---- --- |
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2847 Build Success with Spark 2.3.1, Please check CI http://136.243.101.176:8080/job/carbondataprbuilder2.3/9235/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2847 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder2.1/974/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2847 Build Success with Spark 2.2.1, Please check CI http://95.216.28.178:8080/job/ApacheCarbonPRBuilder1/1187/ --- |
In reply to this post by qiuchenjian-2
Github user xuchuanyin commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2847#discussion_r227615128 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/compression/GzipCompressor.java --- @@ -0,0 +1,201 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.datastore.compression; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.DoubleBuffer; +import java.nio.FloatBuffer; +import java.nio.IntBuffer; +import java.nio.LongBuffer; +import java.nio.ShortBuffer; + +import org.apache.carbondata.core.util.ByteUtil; + +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; +import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream; + +public class GzipCompressor implements Compressor { + + public GzipCompressor() { + } + + @Override public String getName() { + return "gzip"; + } + + /* + * Method called for compressing the data and + * return a byte array + */ + private byte[] compressData(byte[] data) { + + ByteArrayOutputStream bt = new ByteArrayOutputStream(); + try { + GzipCompressorOutputStream gzos = new GzipCompressorOutputStream(bt); + try { + gzos.write(data); + } catch (IOException e) { + e.printStackTrace(); + } finally { + gzos.close(); + } + } catch (IOException e) { + e.printStackTrace(); + } + + return bt.toByteArray(); + } + + /* + * Method called for decompressing the data and + * return a byte array + */ + private byte[] decompressData(byte[] data) { + + ByteArrayInputStream bt = new ByteArrayInputStream(data); + ByteArrayOutputStream bot = new ByteArrayOutputStream(); + + try { + GzipCompressorInputStream gzis = new GzipCompressorInputStream(bt); + byte[] buffer = new byte[1024]; + int len; + + while ((len = gzis.read(buffer)) != -1) { + bot.write(buffer, 0, len); + } + + } catch (IOException e) { + e.printStackTrace(); + } + + return bot.toByteArray(); + } + + @Override public byte[] compressByte(byte[] unCompInput) { + return compressData(unCompInput); + } + + @Override public byte[] compressByte(byte[] unCompInput, int byteSize) { + return compressData(unCompInput); + } + + @Override public byte[] unCompressByte(byte[] compInput) { + return decompressData(compInput); + } + + @Override public byte[] unCompressByte(byte[] compInput, int offset, int length) { + byte[] data = new byte[length]; + System.arraycopy(compInput, offset, data, 0, length); + return decompressData(data); + } + + @Override public byte[] compressShort(short[] unCompInput) { + ByteBuffer unCompBuffer = ByteBuffer.allocate(unCompInput.length * ByteUtil.SIZEOF_SHORT); + unCompBuffer.asShortBuffer().put(unCompInput); + return compressData(unCompBuffer.array()); + } + + @Override public short[] unCompressShort(byte[] compInput, int offset, int length) { + byte[] unCompArray = unCompressByte(compInput, offset, length); + ShortBuffer unCompBuffer = ByteBuffer.wrap(unCompArray).asShortBuffer(); + short[] shorts = new short[unCompArray.length / ByteUtil.SIZEOF_SHORT]; + unCompBuffer.get(shorts); + return shorts; + } + + @Override public byte[] compressInt(int[] unCompInput) { + ByteBuffer unCompBuffer = ByteBuffer.allocate(unCompInput.length * ByteUtil.SIZEOF_INT); + unCompBuffer.asIntBuffer().put(unCompInput); + return compressData(unCompBuffer.array()); + } + + @Override public int[] unCompressInt(byte[] compInput, int offset, int length) { + byte[] unCompArray = unCompressByte(compInput, offset, length); + IntBuffer unCompBuffer = ByteBuffer.wrap(unCompArray).asIntBuffer(); + int[] ints = new int[unCompArray.length / ByteUtil.SIZEOF_INT]; + unCompBuffer.get(ints); + return ints; + } + + @Override public byte[] compressLong(long[] unCompInput) { + ByteBuffer unCompBuffer = ByteBuffer.allocate(unCompInput.length * ByteUtil.SIZEOF_LONG); + unCompBuffer.asLongBuffer().put(unCompInput); + return compressData(unCompBuffer.array()); + } + + @Override public long[] unCompressLong(byte[] compInput, int offset, int length) { + byte[] unCompArray = unCompressByte(compInput, offset, length); + LongBuffer unCompBuffer = ByteBuffer.wrap(unCompArray).asLongBuffer(); + long[] longs = new long[unCompArray.length / ByteUtil.SIZEOF_LONG]; + unCompBuffer.get(longs); + return longs; + } + + @Override public byte[] compressFloat(float[] unCompInput) { + ByteBuffer unCompBuffer = ByteBuffer.allocate(unCompInput.length * ByteUtil.SIZEOF_FLOAT); + unCompBuffer.asFloatBuffer().put(unCompInput); + return compressData(unCompBuffer.array()); + } + + @Override public float[] unCompressFloat(byte[] compInput, int offset, int length) { + byte[] unCompArray = unCompressByte(compInput, offset, length); + FloatBuffer unCompBuffer = ByteBuffer.wrap(unCompArray).asFloatBuffer(); + float[] floats = new float[unCompArray.length / ByteUtil.SIZEOF_FLOAT]; + unCompBuffer.get(floats); + return floats; + } + + @Override public byte[] compressDouble(double[] unCompInput) { + ByteBuffer unCompBuffer = ByteBuffer.allocate(unCompInput.length * ByteUtil.SIZEOF_DOUBLE); + unCompBuffer.asDoubleBuffer().put(unCompInput); + return compressData(unCompBuffer.array()); + } + + @Override public double[] unCompressDouble(byte[] compInput, int offset, int length) { + byte[] unCompArray = unCompressByte(compInput, offset, length); + DoubleBuffer unCompBuffer = ByteBuffer.wrap(unCompArray).asDoubleBuffer(); + double[] doubles = new double[unCompArray.length / ByteUtil.SIZEOF_DOUBLE]; + unCompBuffer.get(doubles); + return doubles; + } + + @Override public long rawCompress(long inputAddress, int inputSize, long outputAddress) + throws IOException { + throw new RuntimeException("Not implemented rawUncompress for gzip yet"); + } + + @Override public long rawUncompress(byte[] input, byte[] output) throws IOException { + //gzip api doesnt have rawCompress yet. --- End diff -- if it is so, just throw exception, otherwise JVM may crash if you pass the illegal address/length --- |
In reply to this post by qiuchenjian-2
Github user xuchuanyin commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2847#discussion_r227615513 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/compression/GzipCompressor.java --- @@ -0,0 +1,201 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.datastore.compression; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.DoubleBuffer; +import java.nio.FloatBuffer; +import java.nio.IntBuffer; +import java.nio.LongBuffer; +import java.nio.ShortBuffer; + +import org.apache.carbondata.core.util.ByteUtil; + +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; +import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream; + +public class GzipCompressor implements Compressor { + + public GzipCompressor() { + } + + @Override public String getName() { + return "gzip"; + } + + /* + * Method called for compressing the data and + * return a byte array + */ + private byte[] compressData(byte[] data) { + + ByteArrayOutputStream bt = new ByteArrayOutputStream(); + try { + GzipCompressorOutputStream gzos = new GzipCompressorOutputStream(bt); + try { + gzos.write(data); + } catch (IOException e) { + e.printStackTrace(); + } finally { + gzos.close(); + } + } catch (IOException e) { + e.printStackTrace(); --- End diff -- please optimize the logging! --- |
In reply to this post by qiuchenjian-2
Github user xuchuanyin commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2847#discussion_r227615581 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/compression/GzipCompressor.java --- @@ -0,0 +1,201 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.datastore.compression; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.DoubleBuffer; +import java.nio.FloatBuffer; +import java.nio.IntBuffer; +import java.nio.LongBuffer; +import java.nio.ShortBuffer; + +import org.apache.carbondata.core.util.ByteUtil; + +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; +import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream; + +public class GzipCompressor implements Compressor { + + public GzipCompressor() { + } + + @Override public String getName() { + return "gzip"; + } + + /* + * Method called for compressing the data and + * return a byte array + */ + private byte[] compressData(byte[] data) { + + ByteArrayOutputStream bt = new ByteArrayOutputStream(); + try { + GzipCompressorOutputStream gzos = new GzipCompressorOutputStream(bt); + try { + gzos.write(data); + } catch (IOException e) { + e.printStackTrace(); + } finally { + gzos.close(); + } + } catch (IOException e) { + e.printStackTrace(); + } + + return bt.toByteArray(); + } + + /* + * Method called for decompressing the data and + * return a byte array + */ + private byte[] decompressData(byte[] data) { + + ByteArrayInputStream bt = new ByteArrayInputStream(data); + ByteArrayOutputStream bot = new ByteArrayOutputStream(); + + try { + GzipCompressorInputStream gzis = new GzipCompressorInputStream(bt); + byte[] buffer = new byte[1024]; + int len; + + while ((len = gzis.read(buffer)) != -1) { + bot.write(buffer, 0, len); + } + + } catch (IOException e) { + e.printStackTrace(); + } + + return bot.toByteArray(); --- End diff -- `bot` not closed --- |
In reply to this post by qiuchenjian-2
Github user xuchuanyin commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2847#discussion_r227615561 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/compression/GzipCompressor.java --- @@ -0,0 +1,201 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.datastore.compression; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.DoubleBuffer; +import java.nio.FloatBuffer; +import java.nio.IntBuffer; +import java.nio.LongBuffer; +import java.nio.ShortBuffer; + +import org.apache.carbondata.core.util.ByteUtil; + +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; +import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream; + +public class GzipCompressor implements Compressor { + + public GzipCompressor() { + } + + @Override public String getName() { + return "gzip"; + } + + /* + * Method called for compressing the data and + * return a byte array + */ + private byte[] compressData(byte[] data) { + + ByteArrayOutputStream bt = new ByteArrayOutputStream(); + try { + GzipCompressorOutputStream gzos = new GzipCompressorOutputStream(bt); + try { + gzos.write(data); + } catch (IOException e) { + e.printStackTrace(); + } finally { + gzos.close(); + } + } catch (IOException e) { + e.printStackTrace(); + } + + return bt.toByteArray(); + } + + /* + * Method called for decompressing the data and + * return a byte array + */ + private byte[] decompressData(byte[] data) { + + ByteArrayInputStream bt = new ByteArrayInputStream(data); + ByteArrayOutputStream bot = new ByteArrayOutputStream(); + + try { + GzipCompressorInputStream gzis = new GzipCompressorInputStream(bt); + byte[] buffer = new byte[1024]; + int len; + + while ((len = gzis.read(buffer)) != -1) { + bot.write(buffer, 0, len); + } + + } catch (IOException e) { + e.printStackTrace(); --- End diff -- please optimize the logging! --- |
In reply to this post by qiuchenjian-2
Github user xuchuanyin commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2847#discussion_r227615489 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/compression/GzipCompressor.java --- @@ -0,0 +1,201 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.datastore.compression; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.DoubleBuffer; +import java.nio.FloatBuffer; +import java.nio.IntBuffer; +import java.nio.LongBuffer; +import java.nio.ShortBuffer; + +import org.apache.carbondata.core.util.ByteUtil; + +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; +import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream; + +public class GzipCompressor implements Compressor { + + public GzipCompressor() { + } + + @Override public String getName() { + return "gzip"; + } + + /* + * Method called for compressing the data and + * return a byte array + */ + private byte[] compressData(byte[] data) { + + ByteArrayOutputStream bt = new ByteArrayOutputStream(); + try { + GzipCompressorOutputStream gzos = new GzipCompressorOutputStream(bt); + try { + gzos.write(data); + } catch (IOException e) { + e.printStackTrace(); --- End diff -- please optimize the logging! --- |
In reply to this post by qiuchenjian-2
Github user xuchuanyin commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2847#discussion_r227615426 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/compression/GzipCompressor.java --- @@ -0,0 +1,201 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.datastore.compression; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.DoubleBuffer; +import java.nio.FloatBuffer; +import java.nio.IntBuffer; +import java.nio.LongBuffer; +import java.nio.ShortBuffer; + +import org.apache.carbondata.core.util.ByteUtil; + +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; +import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream; + +public class GzipCompressor implements Compressor { + + public GzipCompressor() { + } + + @Override public String getName() { + return "gzip"; + } + + /* + * Method called for compressing the data and + * return a byte array + */ + private byte[] compressData(byte[] data) { + + ByteArrayOutputStream bt = new ByteArrayOutputStream(); + try { + GzipCompressorOutputStream gzos = new GzipCompressorOutputStream(bt); + try { + gzos.write(data); + } catch (IOException e) { + e.printStackTrace(); + } finally { + gzos.close(); + } + } catch (IOException e) { + e.printStackTrace(); + } + + return bt.toByteArray(); --- End diff -- why `bt` is still open? --- |
In reply to this post by qiuchenjian-2
Github user shardul-cr7 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2847#discussion_r227658842 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/compression/GzipCompressor.java --- @@ -0,0 +1,201 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.datastore.compression; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.DoubleBuffer; +import java.nio.FloatBuffer; +import java.nio.IntBuffer; +import java.nio.LongBuffer; +import java.nio.ShortBuffer; + +import org.apache.carbondata.core.util.ByteUtil; + +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; +import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream; + +public class GzipCompressor implements Compressor { + + public GzipCompressor() { + } + + @Override public String getName() { + return "gzip"; + } + + /* + * Method called for compressing the data and + * return a byte array + */ + private byte[] compressData(byte[] data) { + + ByteArrayOutputStream bt = new ByteArrayOutputStream(); + try { + GzipCompressorOutputStream gzos = new GzipCompressorOutputStream(bt); + try { + gzos.write(data); + } catch (IOException e) { + e.printStackTrace(); + } finally { + gzos.close(); + } + } catch (IOException e) { + e.printStackTrace(); + } + + return bt.toByteArray(); --- End diff -- ByteArrayOutputStream.close() does nothing. It's implementation in java is like this: public void close() throws IOException { } I can close it but I'll have to copy the stream to byte Array and return that byte array which can be a costly operation. --- |
In reply to this post by qiuchenjian-2
Github user shardul-cr7 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2847#discussion_r227658900 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/compression/GzipCompressor.java --- @@ -0,0 +1,201 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.datastore.compression; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.DoubleBuffer; +import java.nio.FloatBuffer; +import java.nio.IntBuffer; +import java.nio.LongBuffer; +import java.nio.ShortBuffer; + +import org.apache.carbondata.core.util.ByteUtil; + +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; +import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream; + +public class GzipCompressor implements Compressor { + + public GzipCompressor() { + } + + @Override public String getName() { + return "gzip"; + } + + /* + * Method called for compressing the data and + * return a byte array + */ + private byte[] compressData(byte[] data) { + + ByteArrayOutputStream bt = new ByteArrayOutputStream(); + try { + GzipCompressorOutputStream gzos = new GzipCompressorOutputStream(bt); + try { + gzos.write(data); + } catch (IOException e) { + e.printStackTrace(); --- End diff -- ok will do that! --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2847 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder2.1/994/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2847 Build Success with Spark 2.2.1, Please check CI http://95.216.28.178:8080/job/ApacheCarbonPRBuilder1/1207/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2847 Build Success with Spark 2.3.1, Please check CI http://136.243.101.176:8080/job/carbondataprbuilder2.3/9260/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2847 Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder2.1/1633/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2847 Build Failed with Spark 2.3.1, Please check CI http://136.243.101.176:8080/job/carbondataprbuilder2.3/9893/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/2847 Build Failed with Spark 2.2.1, Please check CI http://95.216.28.178:8080/job/ApacheCarbonPRBuilder1/1844/ --- |
In reply to this post by qiuchenjian-2
Github user akashrn5 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2847#discussion_r238657955 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/compression/GzipCompressor.java --- @@ -0,0 +1,203 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.datastore.compression; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.DoubleBuffer; +import java.nio.FloatBuffer; +import java.nio.IntBuffer; +import java.nio.LongBuffer; +import java.nio.ShortBuffer; + +import org.apache.carbondata.core.util.ByteUtil; + +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; +import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream; + +public class GzipCompressor implements Compressor { + + public GzipCompressor() { + } + + @Override public String getName() { + return "gzip"; + } + + /* + * Method called for compressing the data and + * return a byte array + */ + private byte[] compressData(byte[] data) { + + ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); + try { + GzipCompressorOutputStream gzipCompressorOutputStream = + new GzipCompressorOutputStream(byteArrayOutputStream); + try { + gzipCompressorOutputStream.write(data); + } catch (IOException e) { + throw new RuntimeException("Error during Compression step " + e.getMessage()); + } finally { + gzipCompressorOutputStream.close(); + } + } catch (IOException e) { + throw new RuntimeException("Error during Compression step " + e.getMessage()); + } + + return byteArrayOutputStream.toByteArray(); + } + + /* + * Method called for decompressing the data and + * return a byte array + */ + private byte[] decompressData(byte[] data) { + + ByteArrayInputStream byteArrayOutputStream = new ByteArrayInputStream(data); + ByteArrayOutputStream byteOutputStream = new ByteArrayOutputStream(); + --- End diff -- remove empty line --- |
In reply to this post by qiuchenjian-2
Github user akashrn5 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2847#discussion_r238658593 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/compression/GzipCompressor.java --- @@ -0,0 +1,203 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.datastore.compression; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.DoubleBuffer; +import java.nio.FloatBuffer; +import java.nio.IntBuffer; +import java.nio.LongBuffer; +import java.nio.ShortBuffer; + +import org.apache.carbondata.core.util.ByteUtil; + +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; +import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream; + +public class GzipCompressor implements Compressor { + + public GzipCompressor() { + } + + @Override public String getName() { + return "gzip"; + } + + /* + * Method called for compressing the data and --- End diff -- change the comment as starndard doc --- |
Free forum by Nabble | Edit this page |