11package org .dbpedia .extraction .util
22
3- import java .io .{InputStream ,OutputStream }
3+ import org .apache .commons .compress .compressors .bzip2 .{BZip2CompressorInputStream ,BZip2CompressorOutputStream }
4+ import java .util .zip .{GZIPInputStream ,GZIPOutputStream }
5+ import java .io .{InputStream ,OutputStream ,Reader ,Writer ,OutputStreamWriter ,InputStreamReader }
6+ import scala .io .Codec
7+ import org .dbpedia .extraction .util .RichReader .wrapReader
8+ import java .nio .charset .Charset
49
10+ /**
11+ * TODO: modify the bzip code such that there are no run-time dependencies on commons-compress.
12+ * Users should be able to use .gz files without having commons-compress on the classpath.
13+ * Even better, look for several different bzip2 implementations on the classpath...
14+ */
515object IOUtils {
616
17+ /**
18+ * Map from file suffix (without "." dot) to output stream wrapper
19+ */
20+ val zippers = Map [String , OutputStream => OutputStream ] (
21+ " gz" -> { new GZIPOutputStream (_) },
22+ " bz2" -> { new BZip2CompressorOutputStream (_) }
23+ )
24+
25+ /**
26+ * Map from file suffix (without "." dot) to input stream wrapper
27+ */
28+ val unzippers = Map [String , InputStream => InputStream ] (
29+ " gz" -> { new GZIPInputStream (_) },
30+ " bz2" -> { new BZip2CompressorInputStream (_, true ) }
31+ )
32+
33+ /**
34+ * use opener on file, wrap in un/zipper stream if necessary
35+ */
36+ private def open [T ](file : FileLike [_], opener : FileLike [_] => T , wrappers : Map [String , T => T ]): T = {
37+ val name = file.name
38+ val suffix = name.substring(name.lastIndexOf('.' ) + 1 )
39+ wrappers.getOrElse(suffix, identity[T ] _)(opener(file))
40+ }
41+
42+ /**
43+ * open output stream, wrap in zipper stream if file suffix indicates compressed file.
44+ */
45+ def outputStream (file : FileLike [_]): OutputStream =
46+ open(file, _.outputStream(), zippers)
47+
48+ /**
49+ * open input stream, wrap in unzipper stream if file suffix indicates compressed file.
50+ */
51+ def inputStream (file : FileLike [_]): InputStream =
52+ open(file, _.inputStream(), unzippers)
53+
54+ /**
55+ * open output stream, wrap in zipper stream if file suffix indicates compressed file,
56+ * wrap in writer.
57+ */
58+ def writer (file : FileLike [_], charset : Charset = Codec .UTF8 .charSet): Writer =
59+ new OutputStreamWriter (outputStream(file), charset)
60+
61+ /**
62+ * open input stream, wrap in unzipper stream if file suffix indicates compressed file,
63+ * wrap in reader.
64+ */
65+ def reader (file : FileLike [_], charset : Charset = Codec .UTF8 .charSet): Reader =
66+ new InputStreamReader (inputStream(file), charset)
67+
68+ def readLines (file : FileLike [_])(proc : String => Unit ): Unit = {
69+ val reader = this .reader(file)
70+ try {
71+ for (line <- reader) {
72+ proc(line)
73+ }
74+ }
75+ finally reader.close()
76+ }
77+
778 /**
879 * Copy all bytes from input to output. Don't close any stream.
980 */
10- def copy (in : InputStream , out : OutputStream ) : Unit = {
81+ def copy (in : InputStream , out : OutputStream ) : Unit = {
1182 val buf = new Array [Byte ](1 << 20 ) // 1 MB
1283 while (true )
1384 {
@@ -21,4 +92,4 @@ object IOUtils {
2192 }
2293 }
2394
24- }
95+ }
0 commit comments