Skip to content

Commit 1d4856f

Browse files
liyafan82emkornfield
authored andcommitted
ARROW-11899: [Java] Refactor the compression codec implementation into core/Arrow specific parts
This issue is in response to the discussion in https://github.com/apache/arrow/pull/8949/files#r588049088 We want to refactor the compression codec related code into two parts: one for the core compression logic, and the other for Arrow specific logic. This will make it easier to support other compression types. Closes apache#9769 from liyafan82/fly_0322_ref Authored-by: liyafan82 <fan_li_ya@foxmail.com> Signed-off-by: Micah Kornfield <emkornfield@gmail.com>
1 parent b2fa55d commit 1d4856f

2 files changed

Lines changed: 127 additions & 74 deletions

File tree

java/compression/src/main/java/org/apache/arrow/compression/Lz4CompressionCodec.java

Lines changed: 11 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,8 @@
2525

2626
import org.apache.arrow.memory.ArrowBuf;
2727
import org.apache.arrow.memory.BufferAllocator;
28-
import org.apache.arrow.memory.util.MemoryUtil;
2928
import org.apache.arrow.util.Preconditions;
30-
import org.apache.arrow.vector.compression.CompressionCodec;
29+
import org.apache.arrow.vector.compression.AbstractCompressionCodec;
3130
import org.apache.arrow.vector.compression.CompressionUtil;
3231
import org.apache.commons.compress.compressors.lz4.FramedLZ4CompressorInputStream;
3332
import org.apache.commons.compress.compressors.lz4.FramedLZ4CompressorOutputStream;
@@ -38,109 +37,47 @@
3837
/**
3938
* Compression codec for the LZ4 algorithm.
4039
*/
41-
public class Lz4CompressionCodec implements CompressionCodec {
40+
public class Lz4CompressionCodec extends AbstractCompressionCodec {
4241

4342
@Override
44-
public ArrowBuf compress(BufferAllocator allocator, ArrowBuf uncompressedBuffer) {
43+
protected ArrowBuf doCompress(BufferAllocator allocator, ArrowBuf uncompressedBuffer) {
4544
Preconditions.checkArgument(uncompressedBuffer.writerIndex() <= Integer.MAX_VALUE,
46-
"The uncompressed buffer size exceeds the integer limit");
45+
"The uncompressed buffer size exceeds the integer limit %s.", Integer.MAX_VALUE);
4746

48-
if (uncompressedBuffer.writerIndex() == 0L) {
49-
// shortcut for empty buffer
50-
ArrowBuf compressedBuffer = allocator.buffer(CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH);
51-
compressedBuffer.setLong(0, 0);
52-
compressedBuffer.writerIndex(CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH);
53-
uncompressedBuffer.close();
54-
return compressedBuffer;
55-
}
56-
57-
try {
58-
ArrowBuf compressedBuffer = doCompress(allocator, uncompressedBuffer);
59-
long compressedLength = compressedBuffer.writerIndex() - CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH;
60-
if (compressedLength > uncompressedBuffer.writerIndex()) {
61-
// compressed buffer is larger, send the raw buffer
62-
compressedBuffer.close();
63-
compressedBuffer = CompressionUtil.packageRawBuffer(allocator, uncompressedBuffer);
64-
}
65-
66-
uncompressedBuffer.close();
67-
return compressedBuffer;
68-
} catch (IOException e) {
69-
throw new RuntimeException(e);
70-
}
71-
}
72-
73-
private ArrowBuf doCompress(BufferAllocator allocator, ArrowBuf uncompressedBuffer) throws IOException {
7447
byte[] inBytes = new byte[(int) uncompressedBuffer.writerIndex()];
7548
PlatformDependent.copyMemory(uncompressedBuffer.memoryAddress(), inBytes, 0, uncompressedBuffer.writerIndex());
7649
ByteArrayOutputStream baos = new ByteArrayOutputStream();
7750
try (InputStream in = new ByteArrayInputStream(inBytes);
7851
OutputStream out = new FramedLZ4CompressorOutputStream(baos)) {
7952
IOUtils.copy(in, out);
53+
} catch (IOException e) {
54+
throw new RuntimeException(e);
8055
}
8156

8257
byte[] outBytes = baos.toByteArray();
8358

8459
ArrowBuf compressedBuffer = allocator.buffer(CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH + outBytes.length);
85-
86-
long uncompressedLength = uncompressedBuffer.writerIndex();
87-
if (!MemoryUtil.LITTLE_ENDIAN) {
88-
uncompressedLength = Long.reverseBytes(uncompressedLength);
89-
}
90-
// first 8 bytes reserved for uncompressed length, according to the specification
91-
compressedBuffer.setLong(0, uncompressedLength);
92-
9360
PlatformDependent.copyMemory(
9461
outBytes, 0, compressedBuffer.memoryAddress() + CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH, outBytes.length);
9562
compressedBuffer.writerIndex(CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH + outBytes.length);
9663
return compressedBuffer;
9764
}
9865

9966
@Override
100-
public ArrowBuf decompress(BufferAllocator allocator, ArrowBuf compressedBuffer) {
67+
protected ArrowBuf doDecompress(BufferAllocator allocator, ArrowBuf compressedBuffer) {
10168
Preconditions.checkArgument(compressedBuffer.writerIndex() <= Integer.MAX_VALUE,
102-
"The compressed buffer size exceeds the integer limit");
103-
104-
Preconditions.checkArgument(compressedBuffer.writerIndex() >= CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH,
105-
"Not enough data to decompress.");
69+
"The compressed buffer size exceeds the integer limit %s", Integer.MAX_VALUE);
10670

107-
long decompressedLength = compressedBuffer.getLong(0);
108-
if (!MemoryUtil.LITTLE_ENDIAN) {
109-
decompressedLength = Long.reverseBytes(decompressedLength);
110-
}
111-
112-
if (decompressedLength == 0L) {
113-
// shortcut for empty buffer
114-
compressedBuffer.close();
115-
return allocator.getEmpty();
116-
}
117-
118-
if (decompressedLength == CompressionUtil.NO_COMPRESSION_LENGTH) {
119-
// no compression
120-
return CompressionUtil.extractUncompressedBuffer(compressedBuffer);
121-
}
122-
123-
try {
124-
ArrowBuf decompressedBuffer = doDecompress(allocator, compressedBuffer);
125-
compressedBuffer.close();
126-
return decompressedBuffer;
127-
} catch (IOException e) {
128-
throw new RuntimeException(e);
129-
}
130-
}
131-
132-
private ArrowBuf doDecompress(BufferAllocator allocator, ArrowBuf compressedBuffer) throws IOException {
133-
long decompressedLength = compressedBuffer.getLong(0);
134-
if (!MemoryUtil.LITTLE_ENDIAN) {
135-
decompressedLength = Long.reverseBytes(decompressedLength);
136-
}
71+
long decompressedLength = readUncompressedLength(compressedBuffer);
13772

13873
byte[] inBytes = new byte[(int) (compressedBuffer.writerIndex() - CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH)];
13974
PlatformDependent.copyMemory(
14075
compressedBuffer.memoryAddress() + CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH, inBytes, 0, inBytes.length);
14176
ByteArrayOutputStream out = new ByteArrayOutputStream((int) decompressedLength);
14277
try (InputStream in = new FramedLZ4CompressorInputStream(new ByteArrayInputStream(inBytes))) {
14378
IOUtils.copy(in, out);
79+
} catch (IOException e) {
80+
throw new RuntimeException(e);
14481
}
14582

14683
byte[] outBytes = out.toByteArray();
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.arrow.vector.compression;
19+
20+
import org.apache.arrow.memory.ArrowBuf;
21+
import org.apache.arrow.memory.BufferAllocator;
22+
import org.apache.arrow.memory.util.MemoryUtil;
23+
import org.apache.arrow.util.Preconditions;
24+
25+
/**
26+
* The base class for concrete compression codecs, providing
27+
* common logic for all compression codecs.
28+
*/
29+
public abstract class AbstractCompressionCodec implements CompressionCodec {
30+
31+
@Override
32+
public ArrowBuf compress(BufferAllocator allocator, ArrowBuf uncompressedBuffer) {
33+
if (uncompressedBuffer.writerIndex() == 0L) {
34+
// shortcut for empty buffer
35+
ArrowBuf compressedBuffer = allocator.buffer(CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH);
36+
compressedBuffer.setLong(0, 0);
37+
compressedBuffer.writerIndex(CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH);
38+
uncompressedBuffer.close();
39+
return compressedBuffer;
40+
}
41+
42+
ArrowBuf compressedBuffer = doCompress(allocator, uncompressedBuffer);
43+
long compressedLength = compressedBuffer.writerIndex() - CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH;
44+
long uncompressedLength = uncompressedBuffer.writerIndex();
45+
46+
if (compressedLength > uncompressedLength) {
47+
// compressed buffer is larger, send the raw buffer
48+
compressedBuffer.close();
49+
compressedBuffer = CompressionUtil.packageRawBuffer(allocator, uncompressedBuffer);
50+
} else {
51+
writeUncompressedLength(compressedBuffer, uncompressedLength);
52+
}
53+
54+
uncompressedBuffer.close();
55+
return compressedBuffer;
56+
}
57+
58+
@Override
59+
public ArrowBuf decompress(BufferAllocator allocator, ArrowBuf compressedBuffer) {
60+
Preconditions.checkArgument(compressedBuffer.writerIndex() >= CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH,
61+
"Not enough data to decompress.");
62+
63+
long decompressedLength = readUncompressedLength(compressedBuffer);
64+
65+
if (decompressedLength == 0L) {
66+
// shortcut for empty buffer
67+
compressedBuffer.close();
68+
return allocator.getEmpty();
69+
}
70+
71+
if (decompressedLength == CompressionUtil.NO_COMPRESSION_LENGTH) {
72+
// no compression
73+
return CompressionUtil.extractUncompressedBuffer(compressedBuffer);
74+
}
75+
76+
ArrowBuf decompressedBuffer = doDecompress(allocator, compressedBuffer);
77+
compressedBuffer.close();
78+
return decompressedBuffer;
79+
}
80+
81+
protected void writeUncompressedLength(ArrowBuf compressedBuffer, long uncompressedLength) {
82+
if (!MemoryUtil.LITTLE_ENDIAN) {
83+
uncompressedLength = Long.reverseBytes(uncompressedLength);
84+
}
85+
// first 8 bytes reserved for uncompressed length, according to the specification
86+
compressedBuffer.setLong(0, uncompressedLength);
87+
}
88+
89+
protected long readUncompressedLength(ArrowBuf compressedBuffer) {
90+
long decompressedLength = compressedBuffer.getLong(0);
91+
if (!MemoryUtil.LITTLE_ENDIAN) {
92+
decompressedLength = Long.reverseBytes(decompressedLength);
93+
}
94+
return decompressedLength;
95+
}
96+
97+
/**
98+
* The method that actually performs the data compression.
99+
* The layout of the returned compressed buffer is the compressed data,
100+
* plus 8 bytes reserved at the beginning of the buffer for the uncompressed data size.
101+
* <p>
102+
* Please note that this method is not responsible for releasing the uncompressed buffer.
103+
* </p>
104+
*/
105+
protected abstract ArrowBuf doCompress(BufferAllocator allocator, ArrowBuf uncompressedBuffer);
106+
107+
/**
108+
* The method that actually performs the data decompression.
109+
* The layout of the compressed buffer is the compressed data,
110+
* plus 8 bytes at the beginning of the buffer storing the uncompressed data size.
111+
* <p>
112+
* Please note that this method is not responsible for releasing the compressed buffer.
113+
* </p>
114+
*/
115+
protected abstract ArrowBuf doDecompress(BufferAllocator allocator, ArrowBuf compressedBuffer);
116+
}

0 commit comments

Comments
 (0)