Skip to content

Commit efd3be9

Browse files
yuwatapoettering
authored andcommitted
sd-event: re-check new epoll events when a child event is queued
Previously, when a process outputs something and exit just after epoll_wait() but before process_child(), then the IO event is ignored even if the IO event has higher priority. See systemd#18190. This can be solved by checking epoll event again after process_child(). However, there exists a possibility that another process outputs and exits just after process_child() but before the second epoll_wait(). When the IO event has lower priority than the child event, still IO event is processed. So, this makes new epoll events and child events are checked in a loop until no new event is detected. To prevent an infinite loop, the number of maximum trial is set to 10. Fixes systemd#18190.
1 parent e626367 commit efd3be9

File tree

1 file changed

+103
-36
lines changed

1 file changed

+103
-36
lines changed

src/libsystemd/sd-event/sd-event.c

Lines changed: 103 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1039,7 +1039,7 @@ static int source_set_pending(sd_event_source *s, bool b) {
10391039
}
10401040
}
10411041

1042-
return 0;
1042+
return 1;
10431043
}
10441044

10451045
static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
@@ -3116,11 +3116,19 @@ static int process_timer(
31163116
return 0;
31173117
}
31183118

3119-
static int process_child(sd_event *e) {
3119+
static int process_child(sd_event *e, int64_t threshold, int64_t *ret_min_priority) {
3120+
int64_t min_priority = threshold;
3121+
bool something_new = false;
31203122
sd_event_source *s;
31213123
int r;
31223124

31233125
assert(e);
3126+
assert(ret_min_priority);
3127+
3128+
if (!e->need_process_child) {
3129+
*ret_min_priority = min_priority;
3130+
return 0;
3131+
}
31243132

31253133
e->need_process_child = false;
31263134

@@ -3145,6 +3153,9 @@ static int process_child(sd_event *e) {
31453153
HASHMAP_FOREACH(s, e->child_sources) {
31463154
assert(s->type == SOURCE_CHILD);
31473155

3156+
if (s->priority > threshold)
3157+
continue;
3158+
31483159
if (s->pending)
31493160
continue;
31503161

@@ -3181,10 +3192,15 @@ static int process_child(sd_event *e) {
31813192
r = source_set_pending(s, true);
31823193
if (r < 0)
31833194
return r;
3195+
if (r > 0) {
3196+
something_new = true;
3197+
min_priority = MIN(min_priority, s->priority);
3198+
}
31843199
}
31853200
}
31863201

3187-
return 0;
3202+
*ret_min_priority = min_priority;
3203+
return something_new;
31883204
}
31893205

31903206
static int process_pidfd(sd_event *e, sd_event_source *s, uint32_t revents) {
@@ -3214,13 +3230,13 @@ static int process_pidfd(sd_event *e, sd_event_source *s, uint32_t revents) {
32143230
return source_set_pending(s, true);
32153231
}
32163232

3217-
static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
3218-
bool read_one = false;
3233+
static int process_signal(sd_event *e, struct signal_data *d, uint32_t events, int64_t *min_priority) {
32193234
int r;
32203235

32213236
assert(e);
32223237
assert(d);
32233238
assert_return(events == EPOLLIN, -EIO);
3239+
assert(min_priority);
32243240

32253241
/* If there's a signal queued on this priority and SIGCHLD is
32263242
on this priority too, then make sure to recheck the
@@ -3246,7 +3262,7 @@ static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
32463262
n = read(d->fd, &si, sizeof(si));
32473263
if (n < 0) {
32483264
if (IN_SET(errno, EAGAIN, EINTR))
3249-
return read_one;
3265+
return 0;
32503266

32513267
return -errno;
32523268
}
@@ -3256,8 +3272,6 @@ static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
32563272

32573273
assert(SIGNAL_VALID(si.ssi_signo));
32583274

3259-
read_one = true;
3260-
32613275
if (e->signal_sources)
32623276
s = e->signal_sources[si.ssi_signo];
32633277
if (!s)
@@ -3271,12 +3285,16 @@ static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
32713285
r = source_set_pending(s, true);
32723286
if (r < 0)
32733287
return r;
3288+
if (r > 0 && *min_priority >= s->priority) {
3289+
*min_priority = s->priority;
3290+
return 1; /* an event source with smaller priority is queued. */
3291+
}
32743292

3275-
return 1;
3293+
return 0;
32763294
}
32773295
}
32783296

3279-
static int event_inotify_data_read(sd_event *e, struct inotify_data *d, uint32_t revents) {
3297+
static int event_inotify_data_read(sd_event *e, struct inotify_data *d, uint32_t revents, int64_t threshold) {
32803298
ssize_t n;
32813299

32823300
assert(e);
@@ -3292,6 +3310,9 @@ static int event_inotify_data_read(sd_event *e, struct inotify_data *d, uint32_t
32923310
if (d->buffer_filled > 0)
32933311
return 0;
32943312

3313+
if (d->priority > threshold)
3314+
return 0;
3315+
32953316
n = read(d->fd, &d->buffer, sizeof(d->buffer));
32963317
if (n < 0) {
32973318
if (IN_SET(errno, EAGAIN, EINTR))
@@ -3831,20 +3852,14 @@ static int epoll_wait_usec(
38313852
return r;
38323853
}
38333854

3834-
_public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
3855+
static int process_epoll(sd_event *e, usec_t timeout, int64_t threshold, int64_t *ret_min_priority) {
3856+
int64_t min_priority = threshold;
3857+
bool something_new = false;
38353858
size_t n_event_queue, m;
38363859
int r;
38373860

3838-
assert_return(e, -EINVAL);
3839-
assert_return(e = event_resolve(e), -ENOPKG);
3840-
assert_return(!event_pid_changed(e), -ECHILD);
3841-
assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3842-
assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
3843-
3844-
if (e->exit_requested) {
3845-
e->state = SD_EVENT_PENDING;
3846-
return 1;
3847-
}
3861+
assert(e);
3862+
assert(ret_min_priority);
38483863

38493864
n_event_queue = MAX(e->n_sources, 1u);
38503865
if (!GREEDY_REALLOC(e->event_queue, e->event_queue_allocated, n_event_queue))
@@ -3856,12 +3871,8 @@ _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
38563871

38573872
for (;;) {
38583873
r = epoll_wait_usec(e->epoll_fd, e->event_queue, e->event_queue_allocated, timeout);
3859-
if (r == -EINTR) {
3860-
e->state = SD_EVENT_PENDING;
3861-
return 1;
3862-
}
38633874
if (r < 0)
3864-
goto finish;
3875+
return r;
38653876

38663877
m = (size_t) r;
38673878

@@ -3877,7 +3888,9 @@ _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
38773888
timeout = 0;
38783889
}
38793890

3880-
triple_timestamp_get(&e->timestamp);
3891+
/* Set timestamp only when this is called first time. */
3892+
if (threshold == INT64_MAX)
3893+
triple_timestamp_get(&e->timestamp);
38813894

38823895
for (size_t i = 0; i < m; i++) {
38833896

@@ -3893,6 +3906,11 @@ _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
38933906

38943907
assert(s);
38953908

3909+
if (s->priority > threshold)
3910+
continue;
3911+
3912+
min_priority = MIN(min_priority, s->priority);
3913+
38963914
switch (s->type) {
38973915

38983916
case SOURCE_IO:
@@ -3920,19 +3938,75 @@ _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
39203938
}
39213939

39223940
case WAKEUP_SIGNAL_DATA:
3923-
r = process_signal(e, e->event_queue[i].data.ptr, e->event_queue[i].events);
3941+
r = process_signal(e, e->event_queue[i].data.ptr, e->event_queue[i].events, &min_priority);
39243942
break;
39253943

39263944
case WAKEUP_INOTIFY_DATA:
3927-
r = event_inotify_data_read(e, e->event_queue[i].data.ptr, e->event_queue[i].events);
3945+
r = event_inotify_data_read(e, e->event_queue[i].data.ptr, e->event_queue[i].events, threshold);
39283946
break;
39293947

39303948
default:
39313949
assert_not_reached("Invalid wake-up pointer");
39323950
}
39333951
}
3952+
if (r < 0)
3953+
return r;
3954+
if (r > 0)
3955+
something_new = true;
3956+
}
3957+
3958+
*ret_min_priority = min_priority;
3959+
return something_new;
3960+
}
3961+
3962+
_public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
3963+
int r;
3964+
3965+
assert_return(e, -EINVAL);
3966+
assert_return(e = event_resolve(e), -ENOPKG);
3967+
assert_return(!event_pid_changed(e), -ECHILD);
3968+
assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3969+
assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
3970+
3971+
if (e->exit_requested) {
3972+
e->state = SD_EVENT_PENDING;
3973+
return 1;
3974+
}
3975+
3976+
for (int64_t threshold = INT64_MAX; ; threshold--) {
3977+
int64_t epoll_min_priority, child_min_priority;
3978+
3979+
/* There may be a possibility that new epoll (especially IO) and child events are
3980+
* triggered just after process_epoll() call but before process_child(), and the new IO
3981+
* events may have higher priority than the child events. To salvage these events,
3982+
* let's call epoll_wait() again, but accepts only events with higher priority than the
3983+
* previous. See issue https://github.com/systemd/systemd/issues/18190 and comments
3984+
* https://github.com/systemd/systemd/pull/18750#issuecomment-785801085
3985+
* https://github.com/systemd/systemd/pull/18922#issuecomment-792825226 */
3986+
3987+
r = process_epoll(e, timeout, threshold, &epoll_min_priority);
3988+
if (r == -EINTR) {
3989+
e->state = SD_EVENT_PENDING;
3990+
return 1;
3991+
}
3992+
if (r < 0)
3993+
goto finish;
3994+
if (r == 0 && threshold < INT64_MAX)
3995+
/* No new epoll event. */
3996+
break;
3997+
3998+
r = process_child(e, threshold, &child_min_priority);
39343999
if (r < 0)
39354000
goto finish;
4001+
if (r == 0)
4002+
/* No new child event. */
4003+
break;
4004+
4005+
threshold = MIN(epoll_min_priority, child_min_priority);
4006+
if (threshold == INT64_MIN)
4007+
break;
4008+
4009+
timeout = 0;
39364010
}
39374011

39384012
r = process_watchdog(e);
@@ -3959,19 +4033,12 @@ _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
39594033
if (r < 0)
39604034
goto finish;
39614035

3962-
if (e->need_process_child) {
3963-
r = process_child(e);
3964-
if (r < 0)
3965-
goto finish;
3966-
}
3967-
39684036
r = process_inotify(e);
39694037
if (r < 0)
39704038
goto finish;
39714039

39724040
if (event_next_pending(e)) {
39734041
e->state = SD_EVENT_PENDING;
3974-
39754042
return 1;
39764043
}
39774044

0 commit comments

Comments
 (0)