Skip to content

Commit 100bcb1

Browse files
committed
pulledIOUtils last updates from origin/dump
1 parent 95dd250 commit 100bcb1

File tree

1 file changed

+74
-3
lines changed

1 file changed

+74
-3
lines changed

core/src/main/scala/org/dbpedia/extraction/util/IOUtils.scala

Lines changed: 74 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,84 @@
11
package org.dbpedia.extraction.util
22

3-
import java.io.{InputStream,OutputStream}
3+
import org.apache.commons.compress.compressors.bzip2.{BZip2CompressorInputStream,BZip2CompressorOutputStream}
4+
import java.util.zip.{GZIPInputStream,GZIPOutputStream}
5+
import java.io.{InputStream,OutputStream,Reader,Writer,OutputStreamWriter,InputStreamReader}
6+
import scala.io.Codec
7+
import org.dbpedia.extraction.util.RichReader.wrapReader
8+
import java.nio.charset.Charset
49

10+
/**
11+
* TODO: modify the bzip code such that there are no run-time dependencies on commons-compress.
12+
* Users should be able to use .gz files without having commons-compress on the classpath.
13+
* Even better, look for several different bzip2 implementations on the classpath...
14+
*/
515
object IOUtils {
616

17+
/**
18+
* Map from file suffix (without "." dot) to output stream wrapper
19+
*/
20+
val zippers = Map[String, OutputStream => OutputStream] (
21+
"gz" -> { new GZIPOutputStream(_) },
22+
"bz2" -> { new BZip2CompressorOutputStream(_) }
23+
)
24+
25+
/**
26+
* Map from file suffix (without "." dot) to input stream wrapper
27+
*/
28+
val unzippers = Map[String, InputStream => InputStream] (
29+
"gz" -> { new GZIPInputStream(_) },
30+
"bz2" -> { new BZip2CompressorInputStream(_, true) }
31+
)
32+
33+
/**
34+
* use opener on file, wrap in un/zipper stream if necessary
35+
*/
36+
private def open[T](file: FileLike[_], opener: FileLike[_] => T, wrappers: Map[String, T => T]): T = {
37+
val name = file.name
38+
val suffix = name.substring(name.lastIndexOf('.') + 1)
39+
wrappers.getOrElse(suffix, identity[T] _)(opener(file))
40+
}
41+
42+
/**
43+
* open output stream, wrap in zipper stream if file suffix indicates compressed file.
44+
*/
45+
def outputStream(file: FileLike[_]): OutputStream =
46+
open(file, _.outputStream(), zippers)
47+
48+
/**
49+
* open input stream, wrap in unzipper stream if file suffix indicates compressed file.
50+
*/
51+
def inputStream(file: FileLike[_]): InputStream =
52+
open(file, _.inputStream(), unzippers)
53+
54+
/**
55+
* open output stream, wrap in zipper stream if file suffix indicates compressed file,
56+
* wrap in writer.
57+
*/
58+
def writer(file: FileLike[_], charset: Charset = Codec.UTF8.charSet): Writer =
59+
new OutputStreamWriter(outputStream(file), charset)
60+
61+
/**
62+
* open input stream, wrap in unzipper stream if file suffix indicates compressed file,
63+
* wrap in reader.
64+
*/
65+
def reader(file: FileLike[_], charset: Charset = Codec.UTF8.charSet): Reader =
66+
new InputStreamReader(inputStream(file), charset)
67+
68+
def readLines(file: FileLike[_])(proc: String => Unit): Unit = {
69+
val reader = this.reader(file)
70+
try {
71+
for (line <- reader) {
72+
proc(line)
73+
}
74+
}
75+
finally reader.close()
76+
}
77+
778
/**
879
* Copy all bytes from input to output. Don't close any stream.
980
*/
10-
def copy(in : InputStream, out : OutputStream) : Unit = {
81+
def copy(in: InputStream, out: OutputStream) : Unit = {
1182
val buf = new Array[Byte](1 << 20) // 1 MB
1283
while (true)
1384
{
@@ -21,4 +92,4 @@ object IOUtils {
2192
}
2293
}
2394

24-
}
95+
}

0 commit comments

Comments
 (0)