Skip to content

Commit 56c7d69

Browse files
authored
Increase timeout in ServiceInfo.request to handle loaded systems (#895)
It can take a few seconds for a loaded system to run the `async_request` coroutine when the event loop is busy or the system is CPU bound (example being Home Assistant startup). We now add an additional `_LOADED_SYSTEM_TIMEOUT` (10s) to the `run_coroutine_threadsafe` calls to ensure the coroutine has the total amount of time to run up to its internal timeout (default of 3000ms). Ten seconds is a bit large of a timeout; however, its only unused in cases where we wrap other timeouts. We now expect the only instance the `run_coroutine_threadsafe` result timeout will happen in a production circumstance is when someone is running a `ServiceInfo.request()` in a thread and another thread calls `Zeroconf.close()` at just the right moment that the future is never completed unless the system is so loaded that it is nearly unresponsive. The timeout for `run_coroutine_threadsafe` is the maximum time a thread can cleanly shut down when zeroconf is closed out in another thread, which should always be longer than the underlying thread operation.
1 parent 90bc8ca commit 56c7d69

7 files changed

Lines changed: 56 additions & 5 deletions

File tree

tests/services/test_info.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -739,3 +739,15 @@ def send(out, addr=const._MDNS_ADDR, port=const._MDNS_PORT):
739739
zeroconf.get_service_info(f"name.{type_}", type_, 500, question_type=r.DNSQuestionType.QM)
740740
assert first_outgoing.questions[0].unicast == False
741741
zeroconf.close()
742+
743+
744+
def test_request_timeout():
745+
"""Test that the timeout does not throw an exception and finishes close to the actual timeout."""
746+
zeroconf = r.Zeroconf(interfaces=['127.0.0.1'])
747+
start_time = r.current_time_millis()
748+
assert zeroconf.get_service_info("_notfound.local.", "notthere._notfound.local.") is None
749+
end_time = r.current_time_millis()
750+
zeroconf.close()
751+
# 3000ms for the default timeout
752+
# 1000ms for loaded systems + schedule overhead
753+
assert (end_time - start_time) < 3000 + 1000

tests/test_asyncio.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -935,3 +935,17 @@ def update_service(self, zc, type_, name) -> None:
935935
('add', type_, registration_name),
936936
]
937937
await aiozc.async_close()
938+
939+
940+
@pytest.mark.asyncio
941+
async def test_async_request_timeout():
942+
"""Test that the timeout does not throw an exception and finishes close to the actual timeout."""
943+
aiozc = AsyncZeroconf(interfaces=['127.0.0.1'])
944+
await aiozc.zeroconf.async_wait_for_start()
945+
start_time = current_time_millis()
946+
assert await aiozc.async_get_service_info("_notfound.local.", "notthere._notfound.local.") is None
947+
end_time = current_time_millis()
948+
await aiozc.async_close()
949+
# 3000ms for the default timeout
950+
# 1000ms for loaded systems + schedule overhead
951+
assert (end_time - start_time) < 3000 + 1000

tests/utils/test_asyncio.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,17 @@
55
"""Unit tests for zeroconf._utils.asyncio."""
66

77
import asyncio
8+
import concurrent.futures
89
import contextlib
910
import threading
1011
import time
1112
from unittest.mock import patch
1213

1314
import pytest
1415

16+
from zeroconf._core import _CLOSE_TIMEOUT
1517
from zeroconf._utils import asyncio as aioutils
18+
from zeroconf.const import _LOADED_SYSTEM_TIMEOUT
1619

1720

1821
@pytest.mark.asyncio
@@ -82,7 +85,8 @@ async def _still_running():
8285

8386
def _run_coro() -> None:
8487
runcoro_thread_ready.set()
85-
asyncio.run_coroutine_threadsafe(_still_running(), loop).result(1)
88+
with contextlib.suppress(concurrent.futures.TimeoutError):
89+
asyncio.run_coroutine_threadsafe(_still_running(), loop).result(1)
8690

8791
runcoro_thread = threading.Thread(target=_run_coro, daemon=True)
8892
runcoro_thread.start()
@@ -97,3 +101,14 @@ def _run_coro() -> None:
97101

98102
assert loop.is_running() is False
99103
runcoro_thread.join()
104+
105+
106+
def test_cumulative_timeouts_less_than_close_plus_buffer():
107+
"""Test that the combined async timeouts are shorter than the close timeout with the buffer.
108+
109+
We want to make sure that the close timeout is the one that gets
110+
raised if something goes wrong.
111+
"""
112+
assert (
113+
aioutils._TASK_AWAIT_TIMEOUT + aioutils._GET_ALL_TASKS_TIMEOUT + aioutils._WAIT_FOR_LOOP_TASKS_TIMEOUT
114+
) < 1 + _CLOSE_TIMEOUT + _LOADED_SYSTEM_TIMEOUT

zeroconf/_core.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@
6262
_FLAGS_AA,
6363
_FLAGS_QR_QUERY,
6464
_FLAGS_QR_RESPONSE,
65+
_LOADED_SYSTEM_TIMEOUT,
6566
_MAX_MSG_ABSOLUTE,
6667
_MDNS_ADDR,
6768
_MDNS_ADDR6,
@@ -172,7 +173,9 @@ def close(self) -> None:
172173
return
173174
if not self.loop.is_running():
174175
return
175-
asyncio.run_coroutine_threadsafe(self._async_close(), self.loop).result(_CLOSE_TIMEOUT)
176+
asyncio.run_coroutine_threadsafe(self._async_close(), self.loop).result(
177+
_CLOSE_TIMEOUT + _LOADED_SYSTEM_TIMEOUT
178+
)
176179

177180

178181
class AsyncListener(asyncio.Protocol, QuietLogger):

zeroconf/_services/info.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
_DNS_OTHER_TTL,
4646
_FLAGS_QR_QUERY,
4747
_LISTENER_TIME,
48+
_LOADED_SYSTEM_TIMEOUT,
4849
_TYPE_A,
4950
_TYPE_AAAA,
5051
_TYPE_PTR,
@@ -427,7 +428,7 @@ def request(
427428
raise RuntimeError("Use AsyncServiceInfo.async_request from the event loop")
428429
return asyncio.run_coroutine_threadsafe(
429430
self.async_request(zc, timeout, question_type), zc.loop
430-
).result(millis_to_seconds(timeout) + 1)
431+
).result(millis_to_seconds(timeout) + _LOADED_SYSTEM_TIMEOUT)
431432

432433
async def async_request(
433434
self, zc: 'Zeroconf', timeout: float, question_type: Optional[DNSQuestionType] = None

zeroconf/_utils/asyncio.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@
2626
from typing import Any, List, Optional, Set, cast
2727

2828
_TASK_AWAIT_TIMEOUT = 1
29-
_GET_ALL_TASKS_TIMEOUT = 1
30-
_WAIT_FOR_LOOP_TASKS_TIMEOUT = 2 # Must be larger than _TASK_AWAIT_TIMEOUT
29+
_GET_ALL_TASKS_TIMEOUT = 3
30+
_WAIT_FOR_LOOP_TASKS_TIMEOUT = 3 # Must be larger than _TASK_AWAIT_TIMEOUT
3131

3232

3333
def get_best_available_queue() -> queue.Queue:

zeroconf/const.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,12 @@
3434
_DUPLICATE_QUESTION_INTERVAL = _BROWSER_TIME - 1 # ms
3535
_BROWSER_BACKOFF_LIMIT = 3600 # s
3636
_CACHE_CLEANUP_INTERVAL = 10000 # ms
37+
_LOADED_SYSTEM_TIMEOUT = 10 # s
38+
# If the system is loaded or the event
39+
# loop was blocked by another task that was doing I/O in the loop
40+
# (shouldn't happen but it does in practice) we need to give
41+
# a buffer timeout to ensure a coroutine can finish before
42+
# the future times out
3743

3844
# Some DNS constants
3945

0 commit comments

Comments
 (0)