-
Notifications
You must be signed in to change notification settings - Fork 507
Expand file tree
/
Copy pathRowCount.java
More file actions
96 lines (87 loc) · 3.29 KB
/
RowCount.java
File metadata and controls
96 lines (87 loc) · 3.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.orc.tools;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.DefaultParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.orc.OrcFile;
import org.apache.orc.Reader;
import java.io.IOException;
/**
* Given a set of paths, finds all of the "*.orc" files under them and prints the number of rows in each file.
*/
public class RowCount {
public static void main(Configuration conf, String[] args) throws Exception {
Options opts = createOptions();
CommandLine cli = new DefaultParser().parse(opts, args);
if (cli.hasOption('h')) {
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp("count", opts);
return;
}
boolean ignoreExtension = cli.hasOption("ignoreExtension");
String[] files = cli.getArgs();
int bad = 0;
for(String root: files) {
Path rootPath = new Path(root);
FileSystem fs = rootPath.getFileSystem(conf);
for(RemoteIterator<LocatedFileStatus> itr = fs.listFiles(rootPath, true); itr.hasNext(); ) {
LocatedFileStatus status = itr.next();
if (status.isFile() && (ignoreExtension || status.getPath().getName().endsWith(".orc"))) {
Path filename = status.getPath();
try (Reader reader = OrcFile.createReader(filename, OrcFile.readerOptions(conf))) {
System.out.println(String.format("%s %d",
filename.toString(), reader.getNumberOfRows()));
} catch (IOException ioe) {
bad += 1;
System.err.println("Failed to read " + filename);
}
}
}
}
if (bad > 0) {
System.exit(1);
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
if (Runtime.version().feature() > 21) {
conf.setIfUnset("fs.file.impl.disable.cache", "true");
}
main(conf, args);
}
private static Options createOptions() {
Options result = new Options();
result.addOption(Option.builder("i")
.longOpt("ignoreExtension")
.desc("Ignore ORC file extension")
.build());
result.addOption(Option.builder("h")
.longOpt("help")
.desc("Print help message")
.build());
return result;
}
}