-
Notifications
You must be signed in to change notification settings - Fork 507
Expand file tree
/
Copy pathOrcTail.java
More file actions
224 lines (195 loc) · 7.02 KB
/
OrcTail.java
File metadata and controls
224 lines (195 loc) · 7.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.orc.impl;
import org.apache.orc.CompressionKind;
import org.apache.orc.OrcFile;
import org.apache.orc.OrcProto;
import org.apache.orc.OrcUtils;
import org.apache.orc.Reader;
import org.apache.orc.StripeInformation;
import org.apache.orc.StripeStatistics;
import org.apache.orc.TypeDescription;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
// TODO: Make OrcTail implement FileMetadata or Reader interface
/**
* @since 1.2.0
*/
public final class OrcTail {
private static final Logger LOG = LoggerFactory.getLogger(OrcTail.class);
// postscript + footer - Serialized in OrcSplit
private final OrcProto.FileTail fileTail;
// serialized representation of metadata, footer and postscript
private final BufferChunk serializedTail;
private final TypeDescription schema;
// used to invalidate cache entries
private final long fileModificationTime;
private final Reader reader;
public OrcTail(OrcProto.FileTail fileTail,
ByteBuffer serializedTail) throws IOException {
this(fileTail, serializedTail, -1);
}
public OrcTail(OrcProto.FileTail fileTail, ByteBuffer serializedTail,
long fileModificationTime) throws IOException {
this(fileTail,
new BufferChunk(serializedTail, getStripeStatisticsOffset(fileTail)),
fileModificationTime);
}
public OrcTail(OrcProto.FileTail fileTail, BufferChunk serializedTail,
long fileModificationTime) throws IOException {
this(fileTail, serializedTail, fileModificationTime, null);
}
public OrcTail(OrcProto.FileTail fileTail, BufferChunk serializedTail,
long fileModificationTime, Reader reader) throws IOException {
this.fileTail = fileTail;
this.serializedTail = serializedTail;
this.fileModificationTime = fileModificationTime;
List<OrcProto.Type> types = getTypes();
OrcUtils.isValidTypeTree(types, 0);
this.schema = OrcUtils.convertTypeFromProtobuf(types, 0);
this.reader = reader;
}
public ByteBuffer getSerializedTail() {
if (serializedTail.next == null) {
return serializedTail.getData();
} else {
// make a single buffer...
int len = 0;
for(BufferChunk chunk=serializedTail;
chunk != null;
chunk = (BufferChunk) chunk.next) {
len += chunk.getLength();
}
ByteBuffer result = ByteBuffer.allocate(len);
for(BufferChunk chunk=serializedTail;
chunk != null;
chunk = (BufferChunk) chunk.next) {
ByteBuffer tmp = chunk.getData();
result.put(tmp.array(), tmp.arrayOffset() + tmp.position(),
tmp.remaining());
}
result.flip();
return result;
}
}
/**
* Gets the buffer chunks that correspond to the stripe statistics,
* file tail, and post script.
* @return the shared buffers with the contents of the file tail
*/
public BufferChunk getTailBuffer() {
return serializedTail;
}
public long getFileModificationTime() {
return fileModificationTime;
}
public OrcProto.Footer getFooter() {
return fileTail.getFooter();
}
public OrcProto.PostScript getPostScript() {
return fileTail.getPostscript();
}
public OrcFile.WriterVersion getWriterVersion() {
OrcProto.PostScript ps = fileTail.getPostscript();
OrcProto.Footer footer = fileTail.getFooter();
OrcFile.WriterImplementation writer =
OrcFile.WriterImplementation.from(footer.getWriter());
return OrcFile.WriterVersion.from(writer, ps.getWriterVersion());
}
public List<StripeInformation> getStripes() {
return OrcUtils.convertProtoStripesToStripes(getFooter().getStripesList());
}
public CompressionKind getCompressionKind() {
return CompressionKind.valueOf(fileTail.getPostscript().getCompression().name());
}
public int getCompressionBufferSize() {
OrcProto.PostScript postScript = fileTail.getPostscript();
return ReaderImpl.getCompressionBlockSize(postScript);
}
public int getMetadataSize() {
return (int) getPostScript().getMetadataLength();
}
public List<OrcProto.Type> getTypes() {
return getFooter().getTypesList();
}
public TypeDescription getSchema() {
return schema;
}
public OrcProto.FileTail getFileTail() {
return fileTail;
}
static long getMetadataOffset(OrcProto.FileTail tail) {
OrcProto.PostScript ps = tail.getPostscript();
return tail.getFileLength()
- 1
- tail.getPostscriptLength()
- ps.getFooterLength()
- ps.getMetadataLength();
}
static long getStripeStatisticsOffset(OrcProto.FileTail tail) {
OrcProto.PostScript ps = tail.getPostscript();
return getMetadataOffset(tail) - ps.getStripeStatisticsLength();
}
/**
* Get the file offset of the metadata section of footer.
* @return the byte offset of the start of the metadata
*/
public long getMetadataOffset() {
return getMetadataOffset(fileTail);
}
/**
* Get the file offset of the stripe statistics.
* @return the byte offset of the start of the stripe statistics
*/
public long getStripeStatisticsOffset() {
return getStripeStatisticsOffset(fileTail);
}
/**
* Get the position of the end of the file.
* @return the byte length of the file
*/
public long getFileLength() {
return fileTail.getFileLength();
}
public OrcProto.FileTail getMinimalFileTail() {
OrcProto.FileTail.Builder fileTailBuilder = OrcProto.FileTail.newBuilder(fileTail);
OrcProto.Footer.Builder footerBuilder = OrcProto.Footer.newBuilder(fileTail.getFooter());
footerBuilder.clearStatistics();
fileTailBuilder.setFooter(footerBuilder.build());
return fileTailBuilder.build();
}
/**
* Get the stripe statistics from the file tail.
* This code is for compatibility with ORC 1.5.
* @return the stripe statistics
* @deprecated the user should use Reader.getStripeStatistics instead.
*/
@Deprecated
public List<StripeStatistics> getStripeStatistics() throws IOException {
if (reader == null) {
LOG.warn("Please use Reader.getStripeStatistics or give `Reader` to OrcTail constructor.");
return new ArrayList<>();
} else {
return reader.getStripeStatistics();
}
}
}