Skip to content

Commit 728073b

Browse files
authored
Merge pull request #718 from zweibieren/master
corrected UncompressInputStream
2 parents 313e4de + 4941102 commit 728073b

File tree

5 files changed

+341
-70
lines changed

5 files changed

+341
-70
lines changed

biojava-core/src/main/java/org/biojava/nbio/core/util/UncompressInputStream.java

Lines changed: 100 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -75,10 +75,24 @@
7575
* @author Ronald Tschalar
7676
* @author Unidata Program Center
7777
* @author Richard Holland - making LZW_MAGIC package-visible.
78+
*
79+
* @version 0.3-5 2008/01/19
80+
* @author Fred Hansen (zweibieren@yahoo.com)
81+
* Fixed available() and the EOF condition for mainloop.
82+
* Also added some comments.
83+
*
84+
* @version 1.0 2018/01/08
85+
* @author Fred Hansen (zweibieren@yahoo.com)
86+
* added uncompress(InputStream,OutputStream)
87+
* and called it from main(String[])
88+
* and uncompress(String, FileOutputStream)
89+
* normalize indentation
90+
* rewrite skip method
91+
* amend logging code in uncompress(String, FileOutputStream)
7892
*/
7993
public class UncompressInputStream extends FilterInputStream {
80-
81-
private final static Logger logger = LoggerFactory.getLogger(UncompressInputStream.class);
94+
private final static Logger logger
95+
= LoggerFactory.getLogger(UncompressInputStream.class);
8296

8397
/**
8498
* @param is the input stream to decompress
@@ -90,10 +104,9 @@ public UncompressInputStream(InputStream is) throws IOException {
90104
}
91105

92106

93-
byte[] one = new byte[1];
94-
95107
@Override
96-
public synchronized int read() throws IOException {
108+
public synchronized int read() throws IOException {
109+
byte[] one = new byte[1];
97110
int b = read(one, 0, 1);
98111
if (b == 1)
99112
return (one[0] & 0xff);
@@ -108,7 +121,7 @@ public synchronized int read() throws IOException {
108121

109122
private int[] tab_prefix;
110123
private byte[] tab_suffix;
111-
private int[] zeros = new int[256];
124+
final private int[] zeros = new int[256];
112125
private byte[] stack;
113126

114127
// various state
@@ -123,20 +136,27 @@ public synchronized int read() throws IOException {
123136
private int stackp;
124137
private int free_ent;
125138

126-
// input buffer
127-
private byte[] data = new byte[10000];
128-
private int bit_pos = 0, end = 0, got = 0;
139+
/* input buffer
140+
The input stream must be considered in chunks
141+
Each chunk is of length eight times the current code length.
142+
Thus the chunk contains eight codes; NOT on byte boundaries.
143+
*/
144+
final private byte[] data = new byte[10000];
145+
private int
146+
bit_pos = 0, // current bitwise location in bitstream
147+
end = 0, // index of next byte to fill in data
148+
got = 0; // number of bytes gotten by most recent read()
129149
private boolean eof = false;
130150
private static final int EXTRA = 64;
131151

132152

133153
@Override
134-
public synchronized int read(byte[] buf, int off, int len)
154+
public synchronized int read(byte[] buf, int off, int len)
135155
throws IOException {
136156
if (eof) return -1;
137157
int start = off;
138158

139-
/* Using local copies of various variables speeds things up by as
159+
/* Using local copies of various variables speeds things up by as
140160
* much as 30% !
141161
*/
142162
int[] l_tab_prefix = tab_prefix;
@@ -153,9 +173,7 @@ public synchronized int read(byte[] buf, int off, int len)
153173
byte[] l_data = data;
154174
int l_bit_pos = bit_pos;
155175

156-
157-
// empty stack if stuff still left
158-
176+
// empty stack if stuff still left
159177
int s_size = l_stack.length - l_stackp;
160178
if (s_size > 0) {
161179
int num = (s_size >= len) ? len : s_size;
@@ -170,17 +188,15 @@ public synchronized int read(byte[] buf, int off, int len)
170188
return off - start;
171189
}
172190

173-
174-
// loop, filling local buffer until enough data has been decompressed
175-
191+
// loop, filling local buffer until enough data has been decompressed
176192
main_loop: do {
177193
if (end < EXTRA) fill();
178194

179-
int bit_in = (got > 0) ? (end - end % l_n_bits) << 3 :
180-
(end << 3) - (l_n_bits - 1);
195+
int bit_end = (got > 0)
196+
? (end - end % l_n_bits) << 3 // set to a "chunk" boundary
197+
: (end << 3) - (l_n_bits - 1); // no more data, set to last code
181198

182-
while (l_bit_pos < bit_in) {
183-
// handle 1-byte reads correctly
199+
while (l_bit_pos < bit_end) { // handle 1-byte reads correctly
184200
if (len == 0) {
185201
n_bits = l_n_bits;
186202
maxcode = l_maxcode;
@@ -326,7 +342,10 @@ public synchronized int read(byte[] buf, int off, int len)
326342
}
327343

328344
l_bit_pos = resetbuf(l_bit_pos);
329-
} while (got > 0);
345+
} while
346+
// old code: (got>0) fails if code width expands near EOF
347+
(got > 0 // usually true
348+
|| l_bit_pos < (end << 3) - (l_n_bits - 1)); // last few bytes
330349

331350
n_bits = l_n_bits;
332351
maxcode = l_maxcode;
@@ -346,37 +365,35 @@ public synchronized int read(byte[] buf, int off, int len)
346365
* Moves the unread data in the buffer to the beginning and resets
347366
* the pointers.
348367
*/
349-
private final int resetbuf(int bit_pos) {
368+
private int resetbuf(int bit_pos) {
350369
int pos = bit_pos >> 3;
351370
System.arraycopy(data, pos, data, 0, end - pos);
352371
end -= pos;
353372
return 0;
354373
}
355374

356375

357-
private final void fill() throws IOException {
376+
private void fill() throws IOException {
358377
got = in.read(data, end, data.length - 1 - end);
359378
if (got > 0) end += got;
360379
}
361380

362381

363382
@Override
364-
public synchronized long skip(long num) throws IOException {
365-
byte[] tmp = new byte[(int) num];
366-
int got = read(tmp, 0, (int) num);
367-
368-
if (got > 0)
369-
return got;
370-
else
371-
return 0L;
383+
public synchronized long skip(long num) throws IOException {
384+
return Math.max(0, read(new byte[(int) num]));
372385
}
373386

374387

375388
@Override
376-
public synchronized int available() throws IOException {
389+
public synchronized int available() throws IOException {
377390
if (eof) return 0;
378-
379-
return in.available();
391+
// the old code was: return in.available();
392+
// it fails because this.read() can return bytes
393+
// even after in.available() is zero
394+
// -- zweibieren
395+
int avail = in.available();
396+
return (avail == 0) ? 1 : avail;
380397
}
381398

382399

@@ -389,8 +406,7 @@ public synchronized int available() throws IOException {
389406
private static final int HDR_BLOCK_MODE = 0x80;
390407

391408
private void parse_header() throws IOException {
392-
// read in and check magic number
393-
409+
// read in and check magic number
394410
int t = in.read();
395411
if (t < 0) throw new EOFException("Failed to read magic number");
396412
int magic = (t & 0xff) << 8;
@@ -402,9 +418,7 @@ private void parse_header() throws IOException {
402418
"magic number 0x" +
403419
Integer.toHexString(magic) + ")");
404420

405-
406-
// read in header byte
407-
421+
// read in header byte
408422
int header = in.read();
409423
if (header < 0) throw new EOFException("Failed to read header");
410424

@@ -425,9 +439,7 @@ private void parse_header() throws IOException {
425439
logger.debug("block mode: {}", block_mode);
426440
logger.debug("max bits: {}", maxbits);
427441

428-
429-
// initialize stuff
430-
442+
// initialize stuff
431443
maxmaxcode = 1 << maxbits;
432444
n_bits = INIT_BITS;
433445
maxcode = (1 << n_bits) - 1;
@@ -451,59 +463,77 @@ private void parse_header() throws IOException {
451463
* @return false
452464
*/
453465
@Override
454-
public boolean markSupported() {
466+
public boolean markSupported() {
455467
return false;
456468
}
457469

458-
static public void uncompress( String fileInName, FileOutputStream out) throws IOException {
470+
/**
471+
* Read a named file and uncompress it.
472+
* @param fileInName Name of compressed file.
473+
* @param out A destination for the result. It is closed after data is sent.
474+
* @return number of bytes sent to the output stream,
475+
* @throws IOException for any error
476+
*/
477+
public static long uncompress(String fileInName, FileOutputStream out)
478+
throws IOException {
459479
long start = System.currentTimeMillis();
460-
461-
InputStream in = new UncompressInputStream( new FileInputStream(fileInName));
462-
463-
// int total = 0;
464-
byte[] buffer = new byte[100000];
465-
while (true) {
466-
int bytesRead = in.read(buffer);
467-
if (bytesRead == -1) break;
468-
out.write(buffer, 0, bytesRead);
469-
// total += bytesRead;
480+
long total;
481+
try (InputStream fin = new FileInputStream(fileInName)) {
482+
total = uncompress(fin, out);
470483
}
471-
in.close();
472484
out.close();
473485

474486
if (debugTiming) {
475487
long end = System.currentTimeMillis();
476-
// logger.debug("Decompressed " + total + " bytes");
477-
logger.warn("Time: {} seconds", (end - start) / 1000);
488+
logger.info("Decompressed {} bytes", total);
489+
UncompressInputStream.logger.info("Time: {} seconds", (end - start) / 1000);
478490
}
491+
return total;
479492
}
480493

494+
/**
495+
* Read an input stream and uncompress it to an output stream.
496+
* @param in the incoming InputStream. It is NOT closed.
497+
* @param out the destination OutputStream. It is NOT closed.
498+
* @return number of bytes sent to the output stream
499+
* @throws IOException for any error
500+
*/
501+
public static long uncompress(InputStream in, OutputStream out)
502+
throws IOException {
503+
UncompressInputStream ucis = new UncompressInputStream(in);
504+
long total = 0;
505+
byte[] buffer = new byte[100000];
506+
while (true) {
507+
int bytesRead = ucis.read(buffer);
508+
if (bytesRead == -1) break;
509+
out.write(buffer, 0, bytesRead);
510+
total += bytesRead;
511+
}
512+
return total;
513+
}
481514

482515
private static final boolean debugTiming = false;
483516

517+
/**
518+
* Reads a file, uncompresses it, and sends the result to stdout.
519+
* Also writes trivial statistics to stderr.
520+
* @param args An array with one String element, the name of the file to read.
521+
* @throws IOException for any failure
522+
*/
484523
public static void main(String[] args) throws Exception {
485524
if (args.length != 1) {
486525
logger.info("Usage: UncompressInputStream <file>");
487526
System.exit(1);
488527
}
489-
490-
InputStream in =
491-
new UncompressInputStream(new FileInputStream(args[0]));
492-
493-
byte[] buf = new byte[100000];
494-
int tot = 0;
495528
long beg = System.currentTimeMillis();
496529

497-
while (true) {
498-
int got = in.read(buf);
499-
if (got < 0) break;
500-
System.out.write(buf, 0, got);
501-
tot += got;
530+
long tot;
531+
try (InputStream in = new FileInputStream(args[0])) {
532+
tot = uncompress(in, System.out);
502533
}
503534

504535
long end = System.currentTimeMillis();
505536
logger.info("Decompressed {} bytes", tot);
506537
logger.info("Time: {} seconds", (end - beg) / 1000);
507-
in.close();
508538
}
509539
}
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
package org.biojava.nbio.core.util;
2+
3+
import java.io.BufferedInputStream;
4+
import org.junit.Test;
5+
import static org.junit.Assert.*;
6+
7+
import java.io.ByteArrayOutputStream;
8+
import java.io.InputStream;
9+
import org.junit.Assert;
10+
11+
public class TestUncompressInputStream {
12+
13+
/**
14+
* The file compress_text.txt.lzc is the output of:
15+
* <code>
16+
* cat compress_test.txt | compress > compress_test.txt.lzc
17+
* </code>
18+
* The original compress_test.txt contains text {@value #TEXT_IN_FILE}
19+
*/
20+
private static final String TEST_FILE = "org/biojava/nbio/core/util/compress_test.txt.lzc";
21+
private static final String TEXT_IN_FILE = "Test of biojava uncompress.\n";
22+
23+
private static final String BIGGER_TEST_FILE = "org/biojava/nbio/core/util/build-copy.xml.Z";
24+
private static final String ORIG_OF_BIGGER_TEST_FILE = "org/biojava/nbio/core/util/build.xml";
25+
26+
@Test
27+
public void testUncompression() throws Exception {
28+
29+
InputStream is = this.getClass().getClassLoader().getResourceAsStream(TEST_FILE);
30+
ByteArrayOutputStream baos = new ByteArrayOutputStream();
31+
UncompressInputStream.uncompress(is, baos);
32+
String decompressedText = baos.toString();
33+
34+
assertEquals(TEXT_IN_FILE, decompressedText);
35+
36+
is = this.getClass().getClassLoader().getResourceAsStream(BIGGER_TEST_FILE);
37+
baos = new ByteArrayOutputStream();
38+
UncompressInputStream.uncompress(is, baos);
39+
40+
ByteArrayOutputStream obaos = new ByteArrayOutputStream();
41+
try (BufferedInputStream oin = new BufferedInputStream(
42+
this.getClass().getClassLoader()
43+
.getResourceAsStream(ORIG_OF_BIGGER_TEST_FILE));) {
44+
byte[] buf = new byte[100000];
45+
int len;
46+
while ((len = oin.read(buf)) >= 0)
47+
obaos.write(buf, 0, len);
48+
}
49+
50+
Assert.assertArrayEquals(baos.toByteArray(), obaos.toByteArray());
51+
}
52+
}
Binary file not shown.

0 commit comments

Comments
 (0)