Skip to content

Commit 544db1c

Browse files
fix: async client uses fixed grace period (#1236)
Previously, when a channel refresh occurs, the async client would use `channel.close()` with a grace parameter to allow previous channels to keep serving old requests for a time. We were seeing flakes in our tests, showing that `channel.close()` isn't reliable, and can sometimes cancel ongoing requests before the grace period ends This PR fixes this by using a fixed sleep time before calling close in the async client, like the sync client already does. This should remove the potential for cancelled requests before the grace period ends, and improve test flakiness I also updated the system test to fully capture this problematic state, instead of encountering it in a random race condition
1 parent 2a5baf1 commit 544db1c

File tree

4 files changed

+67
-69
lines changed

4 files changed

+67
-69
lines changed

google/cloud/bigtable/data/_async/client.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -481,12 +481,11 @@ async def _manage_channel(
481481
old_channel = super_channel.swap_channel(new_channel)
482482
self._invalidate_channel_stubs()
483483
# give old_channel a chance to complete existing rpcs
484-
if CrossSync.is_async:
485-
await old_channel.close(grace_period)
486-
else:
487-
if grace_period:
488-
self._is_closed.wait(grace_period) # type: ignore
489-
old_channel.close() # type: ignore
484+
if grace_period:
485+
await CrossSync.event_wait(
486+
self._is_closed, grace_period, async_break_early=False
487+
)
488+
await old_channel.close()
490489
# subtract the time spent waiting for the channel to be replaced
491490
next_refresh = random.uniform(refresh_interval_min, refresh_interval_max)
492491
next_sleep = max(next_refresh - (time.monotonic() - start_timestamp), 0)

google/cloud/bigtable/data/_sync_autogen/client.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -365,7 +365,9 @@ def _manage_channel(
365365
old_channel = super_channel.swap_channel(new_channel)
366366
self._invalidate_channel_stubs()
367367
if grace_period:
368-
self._is_closed.wait(grace_period)
368+
CrossSync._Sync_Impl.event_wait(
369+
self._is_closed, grace_period, async_break_early=False
370+
)
369371
old_channel.close()
370372
next_refresh = random.uniform(refresh_interval_min, refresh_interval_max)
371373
next_sleep = max(next_refresh - (time.monotonic() - start_timestamp), 0)

tests/system/data/test_system_async.py

Lines changed: 37 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -266,49 +266,49 @@ async def test_ping_and_warm(self, client, target):
266266
@CrossSync.pytest
267267
async def test_channel_refresh(self, table_id, instance_id, temp_rows):
268268
"""
269-
change grpc channel to refresh after 1 second. Schedule a read_rows call after refresh,
270-
to ensure new channel works
269+
perform requests while swapping out the grpc channel. Requests should continue without error
271270
"""
272-
await temp_rows.add_row(b"row_key_1")
273-
await temp_rows.add_row(b"row_key_2")
274-
client = self._make_client()
275-
# start custom refresh task
276-
try:
271+
import time
272+
273+
await temp_rows.add_row(b"test_row")
274+
async with self._make_client() as client:
275+
client._channel_refresh_task.cancel()
276+
channel_wrapper = client.transport.grpc_channel
277+
first_channel = channel_wrapper._channel
278+
# swap channels frequently, with large grace windows
277279
client._channel_refresh_task = CrossSync.create_task(
278280
client._manage_channel,
279-
refresh_interval_min=1,
280-
refresh_interval_max=1,
281+
refresh_interval_min=0.1,
282+
refresh_interval_max=0.1,
283+
grace_period=1,
281284
sync_executor=client._executor,
282285
)
283-
# let task run
284-
await CrossSync.yield_to_event_loop()
286+
287+
# hit channels with frequent requests
288+
end_time = time.monotonic() + 3
285289
async with client.get_table(instance_id, table_id) as table:
286-
rows = await table.read_rows({})
287-
channel_wrapper = client.transport.grpc_channel
288-
first_channel = channel_wrapper._channel
289-
assert len(rows) == 2
290-
await CrossSync.sleep(2)
291-
rows_after_refresh = await table.read_rows({})
292-
assert len(rows_after_refresh) == 2
293-
assert client.transport.grpc_channel is channel_wrapper
294-
updated_channel = channel_wrapper._channel
295-
assert updated_channel is not first_channel
296-
# ensure interceptors are kept (gapic's logging interceptor, and metric interceptor)
297-
if CrossSync.is_async:
298-
unary_interceptors = updated_channel._unary_unary_interceptors
299-
assert len(unary_interceptors) == 2
300-
assert GapicInterceptor in [type(i) for i in unary_interceptors]
301-
assert client._metrics_interceptor in unary_interceptors
302-
stream_interceptors = updated_channel._unary_stream_interceptors
303-
assert len(stream_interceptors) == 1
304-
assert client._metrics_interceptor in stream_interceptors
305-
else:
306-
assert isinstance(
307-
client.transport._logged_channel._interceptor, GapicInterceptor
308-
)
309-
assert updated_channel._interceptor == client._metrics_interceptor
310-
finally:
311-
await client.close()
290+
while time.monotonic() < end_time:
291+
# we expect a CancelledError if a channel is closed before completion
292+
rows = await table.read_rows({})
293+
assert len(rows) == 1
294+
await CrossSync.yield_to_event_loop()
295+
# ensure channel was updated
296+
updated_channel = channel_wrapper._channel
297+
assert updated_channel is not first_channel
298+
# ensure interceptors are kept (gapic's logging interceptor, and metric interceptor)
299+
if CrossSync.is_async:
300+
unary_interceptors = updated_channel._unary_unary_interceptors
301+
assert len(unary_interceptors) == 2
302+
assert GapicInterceptor in [type(i) for i in unary_interceptors]
303+
assert client._metrics_interceptor in unary_interceptors
304+
stream_interceptors = updated_channel._unary_stream_interceptors
305+
assert len(stream_interceptors) == 1
306+
assert client._metrics_interceptor in stream_interceptors
307+
else:
308+
assert isinstance(
309+
client.transport._logged_channel._interceptor, GapicInterceptor
310+
)
311+
assert updated_channel._interceptor == client._metrics_interceptor
312312

313313
@CrossSync.pytest
314314
@pytest.mark.usefixtures("target")

tests/system/data/test_system_autogen.py

Lines changed: 22 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -221,36 +221,33 @@ def test_ping_and_warm(self, client, target):
221221
reason="emulator mode doesn't refresh channel",
222222
)
223223
def test_channel_refresh(self, table_id, instance_id, temp_rows):
224-
"""change grpc channel to refresh after 1 second. Schedule a read_rows call after refresh,
225-
to ensure new channel works"""
226-
temp_rows.add_row(b"row_key_1")
227-
temp_rows.add_row(b"row_key_2")
228-
client = self._make_client()
229-
try:
224+
"""perform requests while swapping out the grpc channel. Requests should continue without error"""
225+
import time
226+
227+
temp_rows.add_row(b"test_row")
228+
with self._make_client() as client:
229+
client._channel_refresh_task.cancel()
230+
channel_wrapper = client.transport.grpc_channel
231+
first_channel = channel_wrapper._channel
230232
client._channel_refresh_task = CrossSync._Sync_Impl.create_task(
231233
client._manage_channel,
232-
refresh_interval_min=1,
233-
refresh_interval_max=1,
234+
refresh_interval_min=0.1,
235+
refresh_interval_max=0.1,
236+
grace_period=1,
234237
sync_executor=client._executor,
235238
)
236-
CrossSync._Sync_Impl.yield_to_event_loop()
239+
end_time = time.monotonic() + 3
237240
with client.get_table(instance_id, table_id) as table:
238-
rows = table.read_rows({})
239-
channel_wrapper = client.transport.grpc_channel
240-
first_channel = channel_wrapper._channel
241-
assert len(rows) == 2
242-
CrossSync._Sync_Impl.sleep(2)
243-
rows_after_refresh = table.read_rows({})
244-
assert len(rows_after_refresh) == 2
245-
assert client.transport.grpc_channel is channel_wrapper
246-
updated_channel = channel_wrapper._channel
247-
assert updated_channel is not first_channel
248-
assert isinstance(
249-
client.transport._logged_channel._interceptor, GapicInterceptor
250-
)
251-
assert updated_channel._interceptor == client._metrics_interceptor
252-
finally:
253-
client.close()
241+
while time.monotonic() < end_time:
242+
rows = table.read_rows({})
243+
assert len(rows) == 1
244+
CrossSync._Sync_Impl.yield_to_event_loop()
245+
updated_channel = channel_wrapper._channel
246+
assert updated_channel is not first_channel
247+
assert isinstance(
248+
client.transport._logged_channel._interceptor, GapicInterceptor
249+
)
250+
assert updated_channel._interceptor == client._metrics_interceptor
254251

255252
@pytest.mark.usefixtures("target")
256253
@CrossSync._Sync_Impl.Retry(

0 commit comments

Comments
 (0)