Skip to content

Commit 1b4d73f

Browse files
committed
ARROW-11935: [C++] Add push generator
A push generator has a producer end which pushes values to a queue, and a consumer end (the generator itself) which yields futures that receive the values pushed by the producer. Closes apache#9714 from pitrou/ARROW-11935-push-gen Authored-by: Antoine Pitrou <antoine@python.org> Signed-off-by: Antoine Pitrou <antoine@python.org>
1 parent ee24399 commit 1b4d73f

8 files changed

Lines changed: 360 additions & 75 deletions

File tree

cpp/src/arrow/csv/reader_test.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include "arrow/io/memory.h"
3131
#include "arrow/status.h"
3232
#include "arrow/table.h"
33+
#include "arrow/testing/future_util.h"
3334
#include "arrow/testing/gtest_util.h"
3435
#include "arrow/util/future.h"
3536
#include "arrow/util/thread_pool.h"

cpp/src/arrow/filesystem/test_util.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "arrow/filesystem/util_internal.h"
2929
#include "arrow/io/interfaces.h"
3030
#include "arrow/status.h"
31+
#include "arrow/testing/future_util.h"
3132
#include "arrow/testing/gtest_util.h"
3233
#include "arrow/util/future.h"
3334

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
#pragma once
19+
20+
#include "arrow/testing/gtest_util.h"
21+
#include "arrow/util/future.h"
22+
23+
// This macro should be called by futures that are expected to
24+
// complete pretty quickly. 2 seconds is the default max wait
25+
// here. Anything longer than that and it's a questionable
26+
// unit test anyways.
27+
#define ASSERT_FINISHES_IMPL(fut) \
28+
do { \
29+
ASSERT_TRUE(fut.Wait(10)); \
30+
if (!fut.is_finished()) { \
31+
FAIL() << "Future did not finish in a timely fashion"; \
32+
} \
33+
} while (false)
34+
35+
#define ASSERT_FINISHES_OK(expr) \
36+
do { \
37+
auto&& _fut = (expr); \
38+
ASSERT_TRUE(_fut.Wait(10)); \
39+
if (!_fut.is_finished()) { \
40+
FAIL() << "Future did not finish in a timely fashion"; \
41+
} \
42+
auto _st = _fut.status(); \
43+
if (!_st.ok()) { \
44+
FAIL() << "'" ARROW_STRINGIFY(expr) "' failed with " << _st.ToString(); \
45+
} \
46+
} while (false)
47+
48+
#define ASSERT_FINISHES_AND_RAISES(ENUM, expr) \
49+
do { \
50+
auto&& fut = (expr); \
51+
ASSERT_FINISHES_IMPL(fut); \
52+
ASSERT_RAISES(ENUM, fut.status()); \
53+
} while (false)
54+
55+
#define ASSERT_FINISHES_OK_AND_ASSIGN_IMPL(lhs, rexpr, future_name) \
56+
auto future_name = (rexpr); \
57+
ASSERT_FINISHES_IMPL(future_name); \
58+
ASSERT_OK_AND_ASSIGN(lhs, future_name.result());
59+
60+
#define ASSERT_FINISHES_OK_AND_ASSIGN(lhs, rexpr) \
61+
ASSERT_FINISHES_OK_AND_ASSIGN_IMPL(lhs, rexpr, \
62+
ARROW_ASSIGN_OR_RAISE_NAME(_fut, __COUNTER__))
63+
64+
#define ASSERT_FINISHES_OK_AND_EQ(expected, expr) \
65+
do { \
66+
ASSERT_FINISHES_OK_AND_ASSIGN(auto _actual, (expr)); \
67+
ASSERT_EQ(expected, _actual); \
68+
} while (0)
69+
70+
namespace arrow {
71+
72+
template <typename T>
73+
void AssertNotFinished(const Future<T>& fut) {
74+
ASSERT_FALSE(IsFutureFinished(fut.state()));
75+
}
76+
77+
template <typename T>
78+
void AssertFinished(const Future<T>& fut) {
79+
ASSERT_TRUE(IsFutureFinished(fut.state()));
80+
}
81+
82+
// Assert the future is successful *now*
83+
template <typename T>
84+
void AssertSuccessful(const Future<T>& fut) {
85+
if (IsFutureFinished(fut.state())) {
86+
ASSERT_EQ(fut.state(), FutureState::SUCCESS);
87+
ASSERT_OK(fut.status());
88+
} else {
89+
FAIL() << "Expected future to be completed successfully but it was still pending";
90+
}
91+
}
92+
93+
// Assert the future is failed *now*
94+
template <typename T>
95+
void AssertFailed(const Future<T>& fut) {
96+
if (IsFutureFinished(fut.state())) {
97+
ASSERT_EQ(fut.state(), FutureState::FAILURE);
98+
ASSERT_FALSE(fut.status().ok());
99+
} else {
100+
FAIL() << "Expected future to have failed but it was still pending";
101+
}
102+
}
103+
104+
} // namespace arrow

cpp/src/arrow/testing/gtest_util.h

Lines changed: 2 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
#include "arrow/type_traits.h"
4242
#include "arrow/util/bit_util.h"
4343
#include "arrow/util/macros.h"
44+
#include "arrow/util/type_fwd.h"
4445

4546
// NOTE: failing must be inline in the macros below, to get correct file / line number
4647
// reporting on test failures.
@@ -134,48 +135,8 @@
134135
ASSERT_EQ(expected, _actual); \
135136
} while (0)
136137

137-
// This macro should be called by futures that are expected to
138-
// complete pretty quickly. 2 seconds is the default max wait
139-
// here. Anything longer than that and it's a questionable
140-
// unit test anyways.
141-
#define ASSERT_FINISHES_IMPL(fut) \
142-
do { \
143-
ASSERT_TRUE(fut.Wait(10)); \
144-
if (!fut.is_finished()) { \
145-
FAIL() << "Future did not finish in a timely fashion"; \
146-
} \
147-
} while (false)
148-
149-
#define ASSERT_FINISHES_OK(expr) \
150-
do { \
151-
auto&& _fut = (expr); \
152-
ASSERT_TRUE(_fut.Wait(10)); \
153-
if (!_fut.is_finished()) { \
154-
FAIL() << "Future did not finish in a timely fashion"; \
155-
} \
156-
auto _st = _fut.status(); \
157-
if (!_st.ok()) { \
158-
FAIL() << "'" ARROW_STRINGIFY(expr) "' failed with " << _st.ToString(); \
159-
} \
160-
} while (false)
161-
162-
#define ASSERT_FINISHES_ERR(ENUM, expr) \
163-
do { \
164-
auto&& fut = (expr); \
165-
ASSERT_FINISHES_IMPL(fut); \
166-
ASSERT_RAISES(ENUM, fut.status()); \
167-
} while (false)
168-
169-
#define ASSERT_FINISHES_OK_AND_ASSIGN_IMPL(lhs, rexpr, future_name) \
170-
auto future_name = (rexpr); \
171-
ASSERT_FINISHES_IMPL(future_name); \
172-
ASSERT_OK_AND_ASSIGN(lhs, future_name.result());
173-
174-
#define ASSERT_FINISHES_OK_AND_ASSIGN(lhs, rexpr) \
175-
ASSERT_FINISHES_OK_AND_ASSIGN_IMPL(lhs, rexpr, \
176-
ARROW_ASSIGN_OR_RAISE_NAME(_fut, __COUNTER__))
177-
178138
namespace arrow {
139+
179140
// ----------------------------------------------------------------------
180141
// Useful testing::Types declarations
181142

cpp/src/arrow/util/async_generator.h

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,16 @@
1616
// under the License.
1717

1818
#pragma once
19+
20+
#include <cassert>
21+
#include <deque>
1922
#include <queue>
2023

2124
#include "arrow/util/functional.h"
2225
#include "arrow/util/future.h"
2326
#include "arrow/util/iterator.h"
2427
#include "arrow/util/logging.h"
28+
#include "arrow/util/mutex.h"
2529
#include "arrow/util/optional.h"
2630
#include "arrow/util/queue.h"
2731
#include "arrow/util/thread_pool.h"
@@ -36,6 +40,11 @@ Future<T> AsyncGeneratorEnd() {
3640
return Future<T>::MakeFinished(IterationTraits<T>::End());
3741
}
3842

43+
template <typename T>
44+
bool IsGeneratorEnd(const T& value) {
45+
return value == IterationTraits<T>::End();
46+
}
47+
3948
/// Iterates through a generator of futures, visiting the result of each one and
4049
/// returning a future that completes when all have been visited
4150
template <typename T>
@@ -336,6 +345,103 @@ class ReadaheadGenerator {
336345
std::queue<Future<T>> readahead_queue_;
337346
};
338347

348+
/// \brief A generator where the producer pushes items on a queue.
349+
///
350+
/// No back-pressure is applied, so this generator is mostly useful when
351+
/// producing the values is neither CPU- nor memory-expensive (e.g. fetching
352+
/// filesystem metadata).
353+
///
354+
/// This generator is not async-reentrant.
355+
template <typename T>
356+
class PushGenerator {
357+
struct State {
358+
util::Mutex mutex;
359+
std::deque<Result<T>> result_q;
360+
util::optional<Future<T>> consumer_fut;
361+
bool finished = false;
362+
};
363+
364+
public:
365+
/// Producer API for PushGenerator
366+
class Producer {
367+
public:
368+
explicit Producer(std::shared_ptr<State> state) : state_(std::move(state)) {}
369+
370+
/// Push a value on the queue
371+
void Push(Result<T> result) {
372+
auto lock = state_->mutex.Lock();
373+
if (state_->finished) {
374+
// Closed early
375+
return;
376+
}
377+
if (state_->consumer_fut.has_value()) {
378+
auto fut = std::move(state_->consumer_fut.value());
379+
state_->consumer_fut.reset();
380+
lock.Unlock(); // unlock before potentially invoking a callback
381+
fut.MarkFinished(std::move(result));
382+
return;
383+
}
384+
state_->result_q.push_back(std::move(result));
385+
}
386+
387+
/// \brief Tell the consumer we have finished producing
388+
///
389+
/// It is allowed to call this and later call Push() again ("early close").
390+
/// In this case, calls to Push() after the queue is closed are silently
391+
/// ignored. This can help implementing non-trivial cancellation cases.
392+
void Close() {
393+
auto lock = state_->mutex.Lock();
394+
if (state_->finished) {
395+
// Already closed
396+
return;
397+
}
398+
state_->finished = true;
399+
if (state_->consumer_fut.has_value()) {
400+
auto fut = std::move(state_->consumer_fut.value());
401+
state_->consumer_fut.reset();
402+
lock.Unlock(); // unlock before potentially invoking a callback
403+
fut.MarkFinished(IterationTraits<T>::End());
404+
}
405+
}
406+
407+
bool is_closed() const {
408+
auto lock = state_->mutex.Lock();
409+
return state_->finished;
410+
}
411+
412+
private:
413+
const std::shared_ptr<State> state_;
414+
};
415+
416+
PushGenerator() : state_(std::make_shared<State>()) {}
417+
418+
/// Read an item from the queue
419+
Future<T> operator()() {
420+
auto lock = state_->mutex.Lock();
421+
assert(!state_->consumer_fut.has_value()); // Non-reentrant
422+
if (!state_->result_q.empty()) {
423+
auto fut = Future<T>::MakeFinished(std::move(state_->result_q.front()));
424+
state_->result_q.pop_front();
425+
return fut;
426+
}
427+
if (state_->finished) {
428+
return AsyncGeneratorEnd<T>();
429+
}
430+
auto fut = Future<T>::Make();
431+
state_->consumer_fut = fut;
432+
return fut;
433+
}
434+
435+
/// \brief Return producer-side interface
436+
///
437+
/// The returned object must be used by the producer to push values on the queue.
438+
/// Only a single Producer object should be instantiated.
439+
Producer producer() { return Producer{state_}; }
440+
441+
private:
442+
const std::shared_ptr<State> state_;
443+
};
444+
339445
/// \brief Creates a generator that pulls reentrantly from a source
340446
/// This generator will pull reentrantly from a source, ensuring that max_readahead
341447
/// requests are active at any given time.

cpp/src/arrow/util/future_test.cc

Lines changed: 1 addition & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include <gmock/gmock-matchers.h>
3333
#include <gtest/gtest.h>
3434

35+
#include "arrow/testing/future_util.h"
3536
#include "arrow/testing/gtest_util.h"
3637
#include "arrow/util/logging.h"
3738
#include "arrow/util/thread_pool.h"
@@ -70,38 +71,6 @@ struct IterationTraits<MoveOnlyDataType> {
7071
static MoveOnlyDataType End() { return MoveOnlyDataType(-1); }
7172
};
7273

73-
template <typename T>
74-
void AssertNotFinished(const Future<T>& fut) {
75-
ASSERT_FALSE(IsFutureFinished(fut.state()));
76-
}
77-
78-
template <typename T>
79-
void AssertFinished(const Future<T>& fut) {
80-
ASSERT_TRUE(IsFutureFinished(fut.state()));
81-
}
82-
83-
// Assert the future is successful *now*
84-
template <typename T>
85-
void AssertSuccessful(const Future<T>& fut) {
86-
if (IsFutureFinished(fut.state())) {
87-
ASSERT_EQ(fut.state(), FutureState::SUCCESS);
88-
ASSERT_OK(fut.status());
89-
} else {
90-
FAIL() << "Expected future to be completed successfully but it was still pending";
91-
}
92-
}
93-
94-
// Assert the future is failed *now*
95-
template <typename T>
96-
void AssertFailed(const Future<T>& fut) {
97-
if (IsFutureFinished(fut.state())) {
98-
ASSERT_EQ(fut.state(), FutureState::FAILURE);
99-
ASSERT_FALSE(fut.status().ok());
100-
} else {
101-
FAIL() << "Expected future to have failed but it was still pending";
102-
}
103-
}
104-
10574
template <typename T>
10675
struct IteratorResults {
10776
std::vector<T> values;

0 commit comments

Comments
 (0)