Skip to content

Commit e0a0858

Browse files
committed
core: don't process dbus unit and job queue when there are already too many messages pending
We maintain a queue of units and jobs that we are supposed to generate change/new notifications for because they were either just created or some of their property has changed. Let's throttle processing of this queue a bit: as soon as > 1K of bus messages are queued for writing let's skip processing the queue, and then recheck on the next iteration again. Moreover, never process more than 100 units in one go, return to the event loop after that. Both limits together should put effective limits on both space and time usage of the function, delaying further operations until a later moment, when the queue is empty or the the event loop is sufficiently idle again. This should keep the number of generated messages much lower than before on busy systems or where some client is hanging. Note that this also means a bad client can slow down message dispatching substantially for up to 90s if it likes to, for all clients. But that should be acceptable as we only allow trusted bus clients, anyway. Fixes: systemd#8166
1 parent 9fc677e commit e0a0858

File tree

4 files changed

+76
-9
lines changed

4 files changed

+76
-9
lines changed

src/core/dbus.c

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1279,3 +1279,34 @@ int bus_verify_reload_daemon_async(Manager *m, sd_bus_message *call, sd_bus_erro
12791279
int bus_verify_set_environment_async(Manager *m, sd_bus_message *call, sd_bus_error *error) {
12801280
return bus_verify_polkit_async(call, CAP_SYS_ADMIN, "org.freedesktop.systemd1.set-environment", NULL, false, UID_INVALID, &m->polkit_registry, error);
12811281
}
1282+
1283+
uint64_t manager_bus_n_queued_write(Manager *m) {
1284+
uint64_t c = 0;
1285+
Iterator i;
1286+
sd_bus *b;
1287+
int r;
1288+
1289+
/* Returns the total number of messages queued for writing on all our direct and API busses. */
1290+
1291+
SET_FOREACH(b, m->private_buses, i) {
1292+
uint64_t k;
1293+
1294+
r = sd_bus_get_n_queued_write(b, &k);
1295+
if (r < 0)
1296+
log_debug_errno(r, "Failed to query queued messages for private bus: %m");
1297+
else
1298+
c += k;
1299+
}
1300+
1301+
if (m->api_bus) {
1302+
uint64_t k;
1303+
1304+
r = sd_bus_get_n_queued_write(m->api_bus, &k);
1305+
if (r < 0)
1306+
log_debug_errno(r, "Failed to query queued messages for API bus: %m");
1307+
else
1308+
c += k;
1309+
}
1310+
1311+
return c;
1312+
}

src/core/dbus.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,3 +48,5 @@ int bus_verify_reload_daemon_async(Manager *m, sd_bus_message *call, sd_bus_erro
4848
int bus_verify_set_environment_async(Manager *m, sd_bus_message *call, sd_bus_error *error);
4949

5050
int bus_forward_agent_released(Manager *m, const char *path);
51+
52+
uint64_t manager_bus_n_queued_write(Manager *m);

src/core/manager.c

Lines changed: 40 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,13 @@
102102
#define JOBS_IN_PROGRESS_PERIOD_USEC (USEC_PER_SEC / 3)
103103
#define JOBS_IN_PROGRESS_PERIOD_DIVISOR 3
104104

105+
/* If there are more than 1K bus messages queue across our API and direct busses, then let's not add more on top until
106+
* the queue gets more empty. */
107+
#define MANAGER_BUS_BUSY_THRESHOLD 1024LU
108+
109+
/* How many units and jobs to process of the bus queue before returning to the event loop. */
110+
#define MANAGER_BUS_MESSAGE_BUDGET 100U
111+
105112
static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
106113
static int manager_dispatch_cgroups_agent_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
107114
static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
@@ -1886,41 +1893,65 @@ static int manager_dispatch_run_queue(sd_event_source *source, void *userdata) {
18861893
}
18871894

18881895
static unsigned manager_dispatch_dbus_queue(Manager *m) {
1889-
Job *j;
1896+
unsigned n = 0, budget;
18901897
Unit *u;
1891-
unsigned n = 0;
1898+
Job *j;
18921899

18931900
assert(m);
18941901

18951902
if (m->dispatching_dbus_queue)
18961903
return 0;
18971904

1905+
/* Anything to do at all? */
1906+
if (!m->dbus_unit_queue && !m->dbus_job_queue && !m->send_reloading_done && !m->queued_message)
1907+
return 0;
1908+
1909+
/* Do we have overly many messages queued at the moment? If so, let's not enqueue more on top, let's sit this
1910+
* cycle out, and process things in a later cycle when the queues got a bit emptier. */
1911+
if (manager_bus_n_queued_write(m) > MANAGER_BUS_BUSY_THRESHOLD)
1912+
return 0;
1913+
1914+
/* Only process a certain number of units/jobs per event loop iteration. Even if the bus queue wasn't overly
1915+
* full before this call we shouldn't increase it in size too wildly in one step, and we shouldn't monopolize
1916+
* CPU time with generating these messages. Note the difference in counting of this "budget" and the
1917+
* "threshold" above: the "budget" is decreased only once per generated message, regardless how many
1918+
* busses/direct connections it is enqueued on, while the "threshold" is applied to each queued instance of bus
1919+
* message, i.e. if the same message is enqueued to five busses/direct connections it will be counted five
1920+
* times. This difference in counting ("references" vs. "instances") is primarily a result of the fact that
1921+
* it's easier to implement it this way, however it also reflects the thinking that the "threshold" should put
1922+
* a limit on used queue memory, i.e. space, while the "budget" should put a limit on time. Also note that
1923+
* the "threshold" is currently chosen much higher than the "budget". */
1924+
budget = MANAGER_BUS_MESSAGE_BUDGET;
1925+
18981926
m->dispatching_dbus_queue = true;
18991927

1900-
while ((u = m->dbus_unit_queue)) {
1928+
while (budget > 0 && (u = m->dbus_unit_queue)) {
1929+
19011930
assert(u->in_dbus_queue);
19021931

19031932
bus_unit_send_change_signal(u);
1904-
n++;
1933+
n++, budget--;
19051934
}
19061935

1907-
while ((j = m->dbus_job_queue)) {
1936+
while (budget > 0 && (j = m->dbus_job_queue)) {
19081937
assert(j->in_dbus_queue);
19091938

19101939
bus_job_send_change_signal(j);
1911-
n++;
1940+
n++, budget--;
19121941
}
19131942

19141943
m->dispatching_dbus_queue = false;
19151944

1916-
if (m->send_reloading_done) {
1945+
if (budget > 0 && m->send_reloading_done) {
19171946
m->send_reloading_done = false;
1918-
19191947
bus_manager_send_reloading(m, false);
1948+
n++, budget--;
19201949
}
19211950

1922-
if (m->queued_message)
1951+
if (budget > 0 && m->queued_message) {
19231952
bus_send_queued_message(m);
1953+
n++;
1954+
}
19241955

19251956
return n;
19261957
}

src/systemd/sd-bus.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,9 @@ int sd_bus_attach_event(sd_bus *bus, sd_event *e, int priority);
202202
int sd_bus_detach_event(sd_bus *bus);
203203
sd_event *sd_bus_get_event(sd_bus *bus);
204204

205+
int sd_bus_get_n_queued_read(sd_bus *bus, uint64_t *ret);
206+
int sd_bus_get_n_queued_write(sd_bus *bus, uint64_t *ret);
207+
205208
int sd_bus_add_filter(sd_bus *bus, sd_bus_slot **slot, sd_bus_message_handler_t callback, void *userdata);
206209
int sd_bus_add_match(sd_bus *bus, sd_bus_slot **slot, const char *match, sd_bus_message_handler_t callback, void *userdata);
207210
int sd_bus_add_match_async(sd_bus *bus, sd_bus_slot **slot, const char *match, sd_bus_message_handler_t callback, sd_bus_message_handler_t install_callback, void *userdata);

0 commit comments

Comments
 (0)