@@ -59,6 +59,21 @@ static inline Result<std::shared_ptr<ipc::RecordBatchFileReader>> OpenReader(
5959 return reader;
6060}
6161
62+ static inline Future<std::shared_ptr<ipc::RecordBatchFileReader>> OpenReaderAsync (
63+ const FileSource& source,
64+ const ipc::IpcReadOptions& options = default_read_options()) {
65+ ARROW_ASSIGN_OR_RAISE (auto input, source.Open ());
66+ auto path = source.path ();
67+ return ipc::RecordBatchFileReader::OpenAsync (std::move (input), options)
68+ .Then ([](const std::shared_ptr<ipc::RecordBatchFileReader>& reader)
69+ -> Result<std::shared_ptr<ipc::RecordBatchFileReader>> { return reader; },
70+ [path](const Status& status)
71+ -> Result<std::shared_ptr<ipc::RecordBatchFileReader>> {
72+ return status.WithMessage (" Could not open IPC input source '" , path,
73+ " ': " , status.message ());
74+ });
75+ }
76+
6277static inline Result<std::vector<int >> GetIncludedFields (
6378 const Schema& schema, const std::vector<std::string>& materialized_fields) {
6479 std::vector<int > included_fields;
@@ -73,6 +88,26 @@ static inline Result<std::vector<int>> GetIncludedFields(
7388 return included_fields;
7489}
7590
91+ static inline Result<ipc::IpcReadOptions> GetReadOptions (
92+ const Schema& schema, const FileFormat& format, const ScanOptions& scan_options) {
93+ ARROW_ASSIGN_OR_RAISE (
94+ auto ipc_scan_options,
95+ GetFragmentScanOptions<IpcFragmentScanOptions>(
96+ kIpcTypeName , &scan_options, format.default_fragment_scan_options ));
97+ auto options =
98+ ipc_scan_options->options ? *ipc_scan_options->options : default_read_options ();
99+ options.memory_pool = scan_options.pool ;
100+ if (!options.included_fields .empty ()) {
101+ // Cannot set them here
102+ ARROW_LOG (WARNING ) << " IpcFragmentScanOptions.options->included_fields was set "
103+ " but will be ignored; included_fields are derived from "
104+ " fields referenced by the scan" ;
105+ }
106+ ARROW_ASSIGN_OR_RAISE (options.included_fields ,
107+ GetIncludedFields (schema, scan_options.MaterializedFields ()));
108+ return options;
109+ }
110+
76111// / \brief A ScanTask backed by an Ipc file.
77112class IpcScanTask : public ScanTask {
78113 public:
@@ -83,28 +118,11 @@ class IpcScanTask : public ScanTask {
83118 Result<RecordBatchIterator> Execute () override {
84119 struct Impl {
85120 static Result<RecordBatchIterator> Make (const FileSource& source,
86- FileFormat* format,
87- const ScanOptions* scan_options) {
121+ const FileFormat& format,
122+ const ScanOptions& scan_options) {
88123 ARROW_ASSIGN_OR_RAISE (auto reader, OpenReader (source));
89-
90- ARROW_ASSIGN_OR_RAISE (
91- auto ipc_scan_options,
92- GetFragmentScanOptions<IpcFragmentScanOptions>(
93- kIpcTypeName , scan_options, format->default_fragment_scan_options ));
94- auto options = ipc_scan_options->options ? *ipc_scan_options->options
95- : default_read_options ();
96- options.memory_pool = scan_options->pool ;
97- options.use_threads = false ;
98- if (!options.included_fields .empty ()) {
99- // Cannot set them here
100- ARROW_LOG (WARNING ) << " IpcFragmentScanOptions.options->included_fields was set "
101- " but will be ignored; included_fields are derived from "
102- " fields referenced by the scan" ;
103- }
104- ARROW_ASSIGN_OR_RAISE (
105- options.included_fields ,
106- GetIncludedFields (*reader->schema (), scan_options->MaterializedFields ()));
107-
124+ ARROW_ASSIGN_OR_RAISE (auto options,
125+ GetReadOptions (*reader->schema (), format, scan_options));
108126 ARROW_ASSIGN_OR_RAISE (reader, OpenReader (source, options));
109127 return RecordBatchIterator (Impl{std::move (reader), 0 });
110128 }
@@ -121,9 +139,9 @@ class IpcScanTask : public ScanTask {
121139 int i_;
122140 };
123141
124- return Impl::Make (
125- source_, internal::checked_pointer_cast<FileFragment>(fragment_)->format (). get (),
126- options_. get () );
142+ return Impl::Make (source_,
143+ * internal::checked_pointer_cast<FileFragment>(fragment_)->format (),
144+ * options_);
127145 }
128146
129147 private:
@@ -173,6 +191,44 @@ Result<ScanTaskIterator> IpcFileFormat::ScanFile(
173191 return IpcScanTaskIterator::Make (options, fragment);
174192}
175193
194+ Result<RecordBatchGenerator> IpcFileFormat::ScanBatchesAsync (
195+ const std::shared_ptr<ScanOptions>& options,
196+ const std::shared_ptr<FileFragment>& file) const {
197+ auto self = shared_from_this ();
198+ auto source = file->source ();
199+ auto open_reader = OpenReaderAsync (source);
200+ auto reopen_reader = [self, options,
201+ source](std::shared_ptr<ipc::RecordBatchFileReader> reader)
202+ -> Future<std::shared_ptr<ipc::RecordBatchFileReader>> {
203+ ARROW_ASSIGN_OR_RAISE (auto options,
204+ GetReadOptions (*reader->schema (), *self, *options));
205+ return OpenReader (source, options);
206+ };
207+ auto readahead_level = options->batch_readahead ;
208+ auto default_fragment_scan_options = this ->default_fragment_scan_options ;
209+ auto open_generator = [=](const std::shared_ptr<ipc::RecordBatchFileReader>& reader)
210+ -> Result<RecordBatchGenerator> {
211+ ARROW_ASSIGN_OR_RAISE (
212+ auto ipc_scan_options,
213+ GetFragmentScanOptions<IpcFragmentScanOptions>(kIpcTypeName , options.get (),
214+ default_fragment_scan_options));
215+
216+ RecordBatchGenerator generator;
217+ if (ipc_scan_options->cache_options ) {
218+ // Transferring helps performance when coalescing
219+ ARROW_ASSIGN_OR_RAISE (
220+ generator, reader->GetRecordBatchGenerator (
221+ /* coalesce=*/ true , options->io_context ,
222+ *ipc_scan_options->cache_options , internal::GetCpuThreadPool ()));
223+ } else {
224+ ARROW_ASSIGN_OR_RAISE (generator, reader->GetRecordBatchGenerator (
225+ /* coalesce=*/ false , options->io_context ));
226+ }
227+ return MakeReadaheadGenerator (std::move (generator), readahead_level);
228+ };
229+ return MakeFromFuture (open_reader.Then (reopen_reader).Then (open_generator));
230+ }
231+
176232Future<util::optional<int64_t >> IpcFileFormat::CountRows (
177233 const std::shared_ptr<FileFragment>& file, compute::Expression predicate,
178234 std::shared_ptr<ScanOptions> options) {
0 commit comments