Skip to content

Commit b01bcf2

Browse files
lidavidmpitrou
andcommitted
ARROW-11772: [C++] Provide reentrant IPC file reader
This provides an async-reentrant generator of record batches from an IPC file reader, intended to support Datasets once it becomes async itself. IPC messages are read on an IO thread pool, then decoded on the CPU thread pool. All dictionaries must be read at the start, then record batches can be read independently. Closes apache#9656 from lidavidm/arrow-11772 Lead-authored-by: David Li <li.davidm96@gmail.com> Co-authored-by: Antoine Pitrou <antoine@python.org> Signed-off-by: David Li <li.davidm96@gmail.com>
1 parent c5a9f55 commit b01bcf2

13 files changed

Lines changed: 801 additions & 256 deletions

File tree

cpp/src/arrow/buffer.cc

Lines changed: 0 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
#include <cstdint>
2222
#include <utility>
2323

24-
#include "arrow/memory_pool.h"
2524
#include "arrow/result.h"
2625
#include "arrow/status.h"
2726
#include "arrow/util/bit_util.h"
@@ -171,112 +170,6 @@ MutableBuffer::MutableBuffer(const std::shared_ptr<Buffer>& parent, const int64_
171170
parent_ = parent;
172171
}
173172

174-
// -----------------------------------------------------------------------
175-
// Pool buffer and allocation
176-
177-
/// A Buffer whose lifetime is tied to a particular MemoryPool
178-
class PoolBuffer final : public ResizableBuffer {
179-
public:
180-
explicit PoolBuffer(std::shared_ptr<MemoryManager> mm, MemoryPool* pool)
181-
: ResizableBuffer(nullptr, 0, std::move(mm)), pool_(pool) {}
182-
183-
~PoolBuffer() override {
184-
uint8_t* ptr = mutable_data();
185-
if (ptr) {
186-
pool_->Free(ptr, capacity_);
187-
}
188-
}
189-
190-
Status Reserve(const int64_t capacity) override {
191-
if (capacity < 0) {
192-
return Status::Invalid("Negative buffer capacity: ", capacity);
193-
}
194-
uint8_t* ptr = mutable_data();
195-
if (!ptr || capacity > capacity_) {
196-
int64_t new_capacity = BitUtil::RoundUpToMultipleOf64(capacity);
197-
if (ptr) {
198-
RETURN_NOT_OK(pool_->Reallocate(capacity_, new_capacity, &ptr));
199-
} else {
200-
RETURN_NOT_OK(pool_->Allocate(new_capacity, &ptr));
201-
}
202-
data_ = ptr;
203-
capacity_ = new_capacity;
204-
}
205-
return Status::OK();
206-
}
207-
208-
Status Resize(const int64_t new_size, bool shrink_to_fit = true) override {
209-
if (ARROW_PREDICT_FALSE(new_size < 0)) {
210-
return Status::Invalid("Negative buffer resize: ", new_size);
211-
}
212-
uint8_t* ptr = mutable_data();
213-
if (ptr && shrink_to_fit && new_size <= size_) {
214-
// Buffer is non-null and is not growing, so shrink to the requested size without
215-
// excess space.
216-
int64_t new_capacity = BitUtil::RoundUpToMultipleOf64(new_size);
217-
if (capacity_ != new_capacity) {
218-
// Buffer hasn't got yet the requested size.
219-
RETURN_NOT_OK(pool_->Reallocate(capacity_, new_capacity, &ptr));
220-
data_ = ptr;
221-
capacity_ = new_capacity;
222-
}
223-
} else {
224-
RETURN_NOT_OK(Reserve(new_size));
225-
}
226-
size_ = new_size;
227-
228-
return Status::OK();
229-
}
230-
231-
static std::shared_ptr<PoolBuffer> MakeShared(MemoryPool* pool) {
232-
std::shared_ptr<MemoryManager> mm;
233-
if (pool == nullptr) {
234-
pool = default_memory_pool();
235-
mm = default_cpu_memory_manager();
236-
} else {
237-
mm = CPUDevice::memory_manager(pool);
238-
}
239-
return std::make_shared<PoolBuffer>(std::move(mm), pool);
240-
}
241-
242-
static std::unique_ptr<PoolBuffer> MakeUnique(MemoryPool* pool) {
243-
std::shared_ptr<MemoryManager> mm;
244-
if (pool == nullptr) {
245-
pool = default_memory_pool();
246-
mm = default_cpu_memory_manager();
247-
} else {
248-
mm = CPUDevice::memory_manager(pool);
249-
}
250-
return std::unique_ptr<PoolBuffer>(new PoolBuffer(std::move(mm), pool));
251-
}
252-
253-
private:
254-
MemoryPool* pool_;
255-
};
256-
257-
namespace {
258-
// A utility that does most of the work of the `AllocateBuffer` and
259-
// `AllocateResizableBuffer` methods. The argument `buffer` should be a smart pointer to
260-
// a PoolBuffer.
261-
template <typename BufferPtr, typename PoolBufferPtr>
262-
inline Result<BufferPtr> ResizePoolBuffer(PoolBufferPtr&& buffer, const int64_t size) {
263-
RETURN_NOT_OK(buffer->Resize(size));
264-
buffer->ZeroPadding();
265-
return std::move(buffer);
266-
}
267-
268-
} // namespace
269-
270-
Result<std::unique_ptr<Buffer>> AllocateBuffer(const int64_t size, MemoryPool* pool) {
271-
return ResizePoolBuffer<std::unique_ptr<Buffer>>(PoolBuffer::MakeUnique(pool), size);
272-
}
273-
274-
Result<std::unique_ptr<ResizableBuffer>> AllocateResizableBuffer(const int64_t size,
275-
MemoryPool* pool) {
276-
return ResizePoolBuffer<std::unique_ptr<ResizableBuffer>>(PoolBuffer::MakeUnique(pool),
277-
size);
278-
}
279-
280173
Result<std::shared_ptr<Buffer>> AllocateBitmap(int64_t length, MemoryPool* pool) {
281174
ARROW_ASSIGN_OR_RAISE(auto buf, AllocateBuffer(BitUtil::BytesForBits(length), pool));
282175
// Zero out any trailing bits

cpp/src/arrow/dataset/file_ipc.cc

Lines changed: 80 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,21 @@ static inline Result<std::shared_ptr<ipc::RecordBatchFileReader>> OpenReader(
5959
return reader;
6060
}
6161

62+
static inline Future<std::shared_ptr<ipc::RecordBatchFileReader>> OpenReaderAsync(
63+
const FileSource& source,
64+
const ipc::IpcReadOptions& options = default_read_options()) {
65+
ARROW_ASSIGN_OR_RAISE(auto input, source.Open());
66+
auto path = source.path();
67+
return ipc::RecordBatchFileReader::OpenAsync(std::move(input), options)
68+
.Then([](const std::shared_ptr<ipc::RecordBatchFileReader>& reader)
69+
-> Result<std::shared_ptr<ipc::RecordBatchFileReader>> { return reader; },
70+
[path](const Status& status)
71+
-> Result<std::shared_ptr<ipc::RecordBatchFileReader>> {
72+
return status.WithMessage("Could not open IPC input source '", path,
73+
"': ", status.message());
74+
});
75+
}
76+
6277
static inline Result<std::vector<int>> GetIncludedFields(
6378
const Schema& schema, const std::vector<std::string>& materialized_fields) {
6479
std::vector<int> included_fields;
@@ -73,6 +88,26 @@ static inline Result<std::vector<int>> GetIncludedFields(
7388
return included_fields;
7489
}
7590

91+
static inline Result<ipc::IpcReadOptions> GetReadOptions(
92+
const Schema& schema, const FileFormat& format, const ScanOptions& scan_options) {
93+
ARROW_ASSIGN_OR_RAISE(
94+
auto ipc_scan_options,
95+
GetFragmentScanOptions<IpcFragmentScanOptions>(
96+
kIpcTypeName, &scan_options, format.default_fragment_scan_options));
97+
auto options =
98+
ipc_scan_options->options ? *ipc_scan_options->options : default_read_options();
99+
options.memory_pool = scan_options.pool;
100+
if (!options.included_fields.empty()) {
101+
// Cannot set them here
102+
ARROW_LOG(WARNING) << "IpcFragmentScanOptions.options->included_fields was set "
103+
"but will be ignored; included_fields are derived from "
104+
"fields referenced by the scan";
105+
}
106+
ARROW_ASSIGN_OR_RAISE(options.included_fields,
107+
GetIncludedFields(schema, scan_options.MaterializedFields()));
108+
return options;
109+
}
110+
76111
/// \brief A ScanTask backed by an Ipc file.
77112
class IpcScanTask : public ScanTask {
78113
public:
@@ -83,28 +118,11 @@ class IpcScanTask : public ScanTask {
83118
Result<RecordBatchIterator> Execute() override {
84119
struct Impl {
85120
static Result<RecordBatchIterator> Make(const FileSource& source,
86-
FileFormat* format,
87-
const ScanOptions* scan_options) {
121+
const FileFormat& format,
122+
const ScanOptions& scan_options) {
88123
ARROW_ASSIGN_OR_RAISE(auto reader, OpenReader(source));
89-
90-
ARROW_ASSIGN_OR_RAISE(
91-
auto ipc_scan_options,
92-
GetFragmentScanOptions<IpcFragmentScanOptions>(
93-
kIpcTypeName, scan_options, format->default_fragment_scan_options));
94-
auto options = ipc_scan_options->options ? *ipc_scan_options->options
95-
: default_read_options();
96-
options.memory_pool = scan_options->pool;
97-
options.use_threads = false;
98-
if (!options.included_fields.empty()) {
99-
// Cannot set them here
100-
ARROW_LOG(WARNING) << "IpcFragmentScanOptions.options->included_fields was set "
101-
"but will be ignored; included_fields are derived from "
102-
"fields referenced by the scan";
103-
}
104-
ARROW_ASSIGN_OR_RAISE(
105-
options.included_fields,
106-
GetIncludedFields(*reader->schema(), scan_options->MaterializedFields()));
107-
124+
ARROW_ASSIGN_OR_RAISE(auto options,
125+
GetReadOptions(*reader->schema(), format, scan_options));
108126
ARROW_ASSIGN_OR_RAISE(reader, OpenReader(source, options));
109127
return RecordBatchIterator(Impl{std::move(reader), 0});
110128
}
@@ -121,9 +139,9 @@ class IpcScanTask : public ScanTask {
121139
int i_;
122140
};
123141

124-
return Impl::Make(
125-
source_, internal::checked_pointer_cast<FileFragment>(fragment_)->format().get(),
126-
options_.get());
142+
return Impl::Make(source_,
143+
*internal::checked_pointer_cast<FileFragment>(fragment_)->format(),
144+
*options_);
127145
}
128146

129147
private:
@@ -173,6 +191,44 @@ Result<ScanTaskIterator> IpcFileFormat::ScanFile(
173191
return IpcScanTaskIterator::Make(options, fragment);
174192
}
175193

194+
Result<RecordBatchGenerator> IpcFileFormat::ScanBatchesAsync(
195+
const std::shared_ptr<ScanOptions>& options,
196+
const std::shared_ptr<FileFragment>& file) const {
197+
auto self = shared_from_this();
198+
auto source = file->source();
199+
auto open_reader = OpenReaderAsync(source);
200+
auto reopen_reader = [self, options,
201+
source](std::shared_ptr<ipc::RecordBatchFileReader> reader)
202+
-> Future<std::shared_ptr<ipc::RecordBatchFileReader>> {
203+
ARROW_ASSIGN_OR_RAISE(auto options,
204+
GetReadOptions(*reader->schema(), *self, *options));
205+
return OpenReader(source, options);
206+
};
207+
auto readahead_level = options->batch_readahead;
208+
auto default_fragment_scan_options = this->default_fragment_scan_options;
209+
auto open_generator = [=](const std::shared_ptr<ipc::RecordBatchFileReader>& reader)
210+
-> Result<RecordBatchGenerator> {
211+
ARROW_ASSIGN_OR_RAISE(
212+
auto ipc_scan_options,
213+
GetFragmentScanOptions<IpcFragmentScanOptions>(kIpcTypeName, options.get(),
214+
default_fragment_scan_options));
215+
216+
RecordBatchGenerator generator;
217+
if (ipc_scan_options->cache_options) {
218+
// Transferring helps performance when coalescing
219+
ARROW_ASSIGN_OR_RAISE(
220+
generator, reader->GetRecordBatchGenerator(
221+
/*coalesce=*/true, options->io_context,
222+
*ipc_scan_options->cache_options, internal::GetCpuThreadPool()));
223+
} else {
224+
ARROW_ASSIGN_OR_RAISE(generator, reader->GetRecordBatchGenerator(
225+
/*coalesce=*/false, options->io_context));
226+
}
227+
return MakeReadaheadGenerator(std::move(generator), readahead_level);
228+
};
229+
return MakeFromFuture(open_reader.Then(reopen_reader).Then(open_generator));
230+
}
231+
176232
Future<util::optional<int64_t>> IpcFileFormat::CountRows(
177233
const std::shared_ptr<FileFragment>& file, compute::Expression predicate,
178234
std::shared_ptr<ScanOptions> options) {

cpp/src/arrow/dataset/file_ipc.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "arrow/dataset/file_base.h"
2626
#include "arrow/dataset/type_fwd.h"
2727
#include "arrow/dataset/visibility.h"
28+
#include "arrow/io/type_fwd.h"
2829
#include "arrow/ipc/type_fwd.h"
2930
#include "arrow/result.h"
3031

@@ -56,6 +57,10 @@ class ARROW_DS_EXPORT IpcFileFormat : public FileFormat {
5657
const std::shared_ptr<ScanOptions>& options,
5758
const std::shared_ptr<FileFragment>& fragment) const override;
5859

60+
Result<RecordBatchGenerator> ScanBatchesAsync(
61+
const std::shared_ptr<ScanOptions>& options,
62+
const std::shared_ptr<FileFragment>& file) const override;
63+
5964
Future<util::optional<int64_t>> CountRows(
6065
const std::shared_ptr<FileFragment>& file, compute::Expression predicate,
6166
std::shared_ptr<ScanOptions> options) override;
@@ -75,6 +80,9 @@ class ARROW_DS_EXPORT IpcFragmentScanOptions : public FragmentScanOptions {
7580
/// Options passed to the IPC file reader.
7681
/// included_fields, memory_pool, and use_threads are ignored.
7782
std::shared_ptr<ipc::IpcReadOptions> options;
83+
/// If present, the async scanner will enable I/O coalescing.
84+
/// This is ignored by the sync scanner.
85+
std::shared_ptr<io::CacheOptions> cache_options;
7886
};
7987

8088
class ARROW_DS_EXPORT IpcFileWriteOptions : public FileWriteOptions {

cpp/src/arrow/io/memory.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -344,8 +344,8 @@ Result<std::shared_ptr<Buffer>> BufferReader::DoReadAt(int64_t position, int64_t
344344
DCHECK_GE(nbytes, 0);
345345

346346
// Arrange for data to be paged in
347-
RETURN_NOT_OK(::arrow::internal::MemoryAdviseWillNeed(
348-
{{const_cast<uint8_t*>(data_ + position), static_cast<size_t>(nbytes)}}));
347+
// RETURN_NOT_OK(::arrow::internal::MemoryAdviseWillNeed(
348+
// {{const_cast<uint8_t*>(data_ + position), static_cast<size_t>(nbytes)}}));
349349

350350
if (nbytes > 0 && buffer_ != nullptr) {
351351
return SliceBuffer(buffer_, position, nbytes);

cpp/src/arrow/io/type_fwd.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ struct FileMode {
2727
};
2828

2929
struct IOContext;
30+
struct CacheOptions;
3031

3132
/// EXPERIMENTAL: convenience global singleton for default IOContext settings
3233
ARROW_EXPORT

cpp/src/arrow/ipc/message.cc

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include "arrow/ipc/util.h"
3434
#include "arrow/status.h"
3535
#include "arrow/util/endian.h"
36+
#include "arrow/util/future.h"
3637
#include "arrow/util/logging.h"
3738
#include "arrow/util/ubsan.h"
3839

@@ -324,6 +325,60 @@ Result<std::unique_ptr<Message>> ReadMessage(int64_t offset, int32_t metadata_le
324325
}
325326
}
326327

328+
Future<std::shared_ptr<Message>> ReadMessageAsync(int64_t offset, int32_t metadata_length,
329+
int64_t body_length,
330+
io::RandomAccessFile* file,
331+
const io::IOContext& context) {
332+
struct State {
333+
std::unique_ptr<Message> result;
334+
std::shared_ptr<MessageDecoderListener> listener;
335+
std::shared_ptr<MessageDecoder> decoder;
336+
};
337+
auto state = std::make_shared<State>();
338+
state->listener = std::make_shared<AssignMessageDecoderListener>(&state->result);
339+
state->decoder = std::make_shared<MessageDecoder>(state->listener);
340+
341+
if (metadata_length < state->decoder->next_required_size()) {
342+
return Status::Invalid("metadata_length should be at least ",
343+
state->decoder->next_required_size());
344+
}
345+
return file->ReadAsync(context, offset, metadata_length + body_length)
346+
.Then([=](std::shared_ptr<Buffer> metadata) -> Result<std::shared_ptr<Message>> {
347+
if (metadata->size() < metadata_length) {
348+
return Status::Invalid("Expected to read ", metadata_length,
349+
" metadata bytes but got ", metadata->size());
350+
}
351+
ARROW_RETURN_NOT_OK(
352+
state->decoder->Consume(SliceBuffer(metadata, 0, metadata_length)));
353+
switch (state->decoder->state()) {
354+
case MessageDecoder::State::INITIAL:
355+
return std::move(state->result);
356+
case MessageDecoder::State::METADATA_LENGTH:
357+
return Status::Invalid("metadata length is missing. File offset: ", offset,
358+
", metadata length: ", metadata_length);
359+
case MessageDecoder::State::METADATA:
360+
return Status::Invalid("flatbuffer size ",
361+
state->decoder->next_required_size(),
362+
" invalid. File offset: ", offset,
363+
", metadata length: ", metadata_length);
364+
case MessageDecoder::State::BODY: {
365+
auto body = SliceBuffer(metadata, metadata_length, body_length);
366+
if (body->size() < state->decoder->next_required_size()) {
367+
return Status::IOError("Expected to be able to read ",
368+
state->decoder->next_required_size(),
369+
" bytes for message body, got ", body->size());
370+
}
371+
RETURN_NOT_OK(state->decoder->Consume(body));
372+
return std::move(state->result);
373+
}
374+
case MessageDecoder::State::EOS:
375+
return Status::Invalid("Unexpected empty message in IPC file format");
376+
default:
377+
return Status::Invalid("Unexpected state: ", state->decoder->state());
378+
}
379+
});
380+
}
381+
327382
Status AlignStream(io::InputStream* stream, int32_t alignment) {
328383
ARROW_ASSIGN_OR_RAISE(int64_t position, stream->Tell());
329384
return stream->Advance(PaddedLength(position, alignment) - position);

cpp/src/arrow/ipc/message.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -459,6 +459,11 @@ Result<std::unique_ptr<Message>> ReadMessage(const int64_t offset,
459459
const int32_t metadata_length,
460460
io::RandomAccessFile* file);
461461

462+
ARROW_EXPORT
463+
Future<std::shared_ptr<Message>> ReadMessageAsync(
464+
const int64_t offset, const int32_t metadata_length, const int64_t body_length,
465+
io::RandomAccessFile* file, const io::IOContext& context = io::default_io_context());
466+
462467
/// \brief Advance stream to an 8-byte offset if its position is not a multiple
463468
/// of 8 already
464469
/// \param[in] stream an input stream

0 commit comments

Comments
 (0)