-
Notifications
You must be signed in to change notification settings - Fork 41
Expand file tree
/
Copy pathgenerators.py
More file actions
126 lines (102 loc) · 3.72 KB
/
generators.py
File metadata and controls
126 lines (102 loc) · 3.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
"""
This module provides generator utilities for batching items from
iterables and async iterables.
Functions:
abatcher(generator, batch_size=None, interval=None):
Asyncio generator wrapper that returns items with a given batch
size or interval (whichever is reached first).
batcher(iterable, batch_size=10):
Generator wrapper that returns items with a given batch size.
"""
import asyncio
import time
import python_utils
from python_utils import types
_T = types.TypeVar('_T')
async def abatcher(
generator: types.Union[
types.AsyncGenerator[_T, None],
types.AsyncIterator[_T],
],
batch_size: types.Optional[int] = None,
interval: types.Optional[types.delta_type] = None,
) -> types.AsyncGenerator[types.List[_T], None]:
"""
Asyncio generator wrapper that returns items with a given batch size or
interval (whichever is reached first).
Args:
generator: The async generator or iterator to batch.
batch_size (types.Optional[int], optional): The number of items per
batch. Defaults to None.
interval (types.Optional[types.delta_type], optional): The time
interval to wait before yielding a batch. Defaults to None.
Yields:
types.AsyncGenerator[types.List[_T], None]: A generator that yields
batches of items.
"""
batch: types.List[_T] = []
assert batch_size or interval, 'Must specify either batch_size or interval'
# If interval is specified, use it to determine when to yield the batch
# Alternatively set a really long timeout to keep the code simpler
if interval:
interval_s = python_utils.delta_to_seconds(interval)
else:
# Set the timeout to 10 years
interval_s = 60 * 60 * 24 * 365 * 10.0
next_yield: float = time.perf_counter() + interval_s
done: types.Set[asyncio.Task[_T]]
pending: types.Set[asyncio.Task[_T]] = set()
while True:
try:
done, pending = await asyncio.wait(
pending
or [
asyncio.create_task(
types.cast(
types.Coroutine[None, None, _T],
generator.__anext__(),
)
),
],
timeout=interval_s,
return_when=asyncio.FIRST_COMPLETED,
)
if done:
for result in done:
batch.append(result.result())
except StopAsyncIteration:
if batch:
yield batch
break
if batch_size is not None and len(batch) == batch_size:
yield batch
batch = []
if interval and batch and time.perf_counter() > next_yield:
yield batch
batch = []
# Always set the next yield time to the current time. If the
# loop is running slow due to blocking functions we do not
# want to burst too much
next_yield = time.perf_counter() + interval_s
def batcher(
iterable: types.Iterable[_T],
batch_size: int = 10,
) -> types.Generator[types.List[_T], None, None]:
"""
Generator wrapper that returns items with a given batch size.
Args:
iterable (types.Iterable[_T]): The iterable to batch.
batch_size (int, optional): The number of items per batch. Defaults
to 10.
Yields:
types.Generator[types.List[_T], None, None]: A generator that yields
batches of items.
"""
batch: types.List[_T] = []
for item in iterable:
batch.append(item)
if len(batch) == batch_size:
yield batch
batch = []
if batch:
yield batch