@@ -1039,7 +1039,7 @@ static int source_set_pending(sd_event_source *s, bool b) {
10391039 }
10401040 }
10411041
1042- return 0 ;
1042+ return 1 ;
10431043}
10441044
10451045static sd_event_source * source_new (sd_event * e , bool floating , EventSourceType type ) {
@@ -3116,11 +3116,19 @@ static int process_timer(
31163116 return 0 ;
31173117}
31183118
3119- static int process_child (sd_event * e ) {
3119+ static int process_child (sd_event * e , int64_t threshold , int64_t * ret_min_priority ) {
3120+ int64_t min_priority = threshold ;
3121+ bool something_new = false;
31203122 sd_event_source * s ;
31213123 int r ;
31223124
31233125 assert (e );
3126+ assert (ret_min_priority );
3127+
3128+ if (!e -> need_process_child ) {
3129+ * ret_min_priority = min_priority ;
3130+ return 0 ;
3131+ }
31243132
31253133 e -> need_process_child = false;
31263134
@@ -3145,6 +3153,9 @@ static int process_child(sd_event *e) {
31453153 HASHMAP_FOREACH (s , e -> child_sources ) {
31463154 assert (s -> type == SOURCE_CHILD );
31473155
3156+ if (s -> priority > threshold )
3157+ continue ;
3158+
31483159 if (s -> pending )
31493160 continue ;
31503161
@@ -3181,10 +3192,15 @@ static int process_child(sd_event *e) {
31813192 r = source_set_pending (s , true);
31823193 if (r < 0 )
31833194 return r ;
3195+ if (r > 0 ) {
3196+ something_new = true;
3197+ min_priority = MIN (min_priority , s -> priority );
3198+ }
31843199 }
31853200 }
31863201
3187- return 0 ;
3202+ * ret_min_priority = min_priority ;
3203+ return something_new ;
31883204}
31893205
31903206static int process_pidfd (sd_event * e , sd_event_source * s , uint32_t revents ) {
@@ -3214,13 +3230,13 @@ static int process_pidfd(sd_event *e, sd_event_source *s, uint32_t revents) {
32143230 return source_set_pending (s , true);
32153231}
32163232
3217- static int process_signal (sd_event * e , struct signal_data * d , uint32_t events ) {
3218- bool read_one = false;
3233+ static int process_signal (sd_event * e , struct signal_data * d , uint32_t events , int64_t * min_priority ) {
32193234 int r ;
32203235
32213236 assert (e );
32223237 assert (d );
32233238 assert_return (events == EPOLLIN , - EIO );
3239+ assert (min_priority );
32243240
32253241 /* If there's a signal queued on this priority and SIGCHLD is
32263242 on this priority too, then make sure to recheck the
@@ -3246,7 +3262,7 @@ static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
32463262 n = read (d -> fd , & si , sizeof (si ));
32473263 if (n < 0 ) {
32483264 if (IN_SET (errno , EAGAIN , EINTR ))
3249- return read_one ;
3265+ return 0 ;
32503266
32513267 return - errno ;
32523268 }
@@ -3256,8 +3272,6 @@ static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
32563272
32573273 assert (SIGNAL_VALID (si .ssi_signo ));
32583274
3259- read_one = true;
3260-
32613275 if (e -> signal_sources )
32623276 s = e -> signal_sources [si .ssi_signo ];
32633277 if (!s )
@@ -3271,12 +3285,16 @@ static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
32713285 r = source_set_pending (s , true);
32723286 if (r < 0 )
32733287 return r ;
3288+ if (r > 0 && * min_priority >= s -> priority ) {
3289+ * min_priority = s -> priority ;
3290+ return 1 ; /* an event source with smaller priority is queued. */
3291+ }
32743292
3275- return 1 ;
3293+ return 0 ;
32763294 }
32773295}
32783296
3279- static int event_inotify_data_read (sd_event * e , struct inotify_data * d , uint32_t revents ) {
3297+ static int event_inotify_data_read (sd_event * e , struct inotify_data * d , uint32_t revents , int64_t threshold ) {
32803298 ssize_t n ;
32813299
32823300 assert (e );
@@ -3292,6 +3310,9 @@ static int event_inotify_data_read(sd_event *e, struct inotify_data *d, uint32_t
32923310 if (d -> buffer_filled > 0 )
32933311 return 0 ;
32943312
3313+ if (d -> priority > threshold )
3314+ return 0 ;
3315+
32953316 n = read (d -> fd , & d -> buffer , sizeof (d -> buffer ));
32963317 if (n < 0 ) {
32973318 if (IN_SET (errno , EAGAIN , EINTR ))
@@ -3831,20 +3852,14 @@ static int epoll_wait_usec(
38313852 return r ;
38323853}
38333854
3834- _public_ int sd_event_wait (sd_event * e , uint64_t timeout ) {
3855+ static int process_epoll (sd_event * e , usec_t timeout , int64_t threshold , int64_t * ret_min_priority ) {
3856+ int64_t min_priority = threshold ;
3857+ bool something_new = false;
38353858 size_t n_event_queue , m ;
38363859 int r ;
38373860
3838- assert_return (e , - EINVAL );
3839- assert_return (e = event_resolve (e ), - ENOPKG );
3840- assert_return (!event_pid_changed (e ), - ECHILD );
3841- assert_return (e -> state != SD_EVENT_FINISHED , - ESTALE );
3842- assert_return (e -> state == SD_EVENT_ARMED , - EBUSY );
3843-
3844- if (e -> exit_requested ) {
3845- e -> state = SD_EVENT_PENDING ;
3846- return 1 ;
3847- }
3861+ assert (e );
3862+ assert (ret_min_priority );
38483863
38493864 n_event_queue = MAX (e -> n_sources , 1u );
38503865 if (!GREEDY_REALLOC (e -> event_queue , e -> event_queue_allocated , n_event_queue ))
@@ -3856,12 +3871,8 @@ _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
38563871
38573872 for (;;) {
38583873 r = epoll_wait_usec (e -> epoll_fd , e -> event_queue , e -> event_queue_allocated , timeout );
3859- if (r == - EINTR ) {
3860- e -> state = SD_EVENT_PENDING ;
3861- return 1 ;
3862- }
38633874 if (r < 0 )
3864- goto finish ;
3875+ return r ;
38653876
38663877 m = (size_t ) r ;
38673878
@@ -3877,7 +3888,9 @@ _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
38773888 timeout = 0 ;
38783889 }
38793890
3880- triple_timestamp_get (& e -> timestamp );
3891+ /* Set timestamp only when this is called first time. */
3892+ if (threshold == INT64_MAX )
3893+ triple_timestamp_get (& e -> timestamp );
38813894
38823895 for (size_t i = 0 ; i < m ; i ++ ) {
38833896
@@ -3893,6 +3906,11 @@ _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
38933906
38943907 assert (s );
38953908
3909+ if (s -> priority > threshold )
3910+ continue ;
3911+
3912+ min_priority = MIN (min_priority , s -> priority );
3913+
38963914 switch (s -> type ) {
38973915
38983916 case SOURCE_IO :
@@ -3920,19 +3938,75 @@ _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
39203938 }
39213939
39223940 case WAKEUP_SIGNAL_DATA :
3923- r = process_signal (e , e -> event_queue [i ].data .ptr , e -> event_queue [i ].events );
3941+ r = process_signal (e , e -> event_queue [i ].data .ptr , e -> event_queue [i ].events , & min_priority );
39243942 break ;
39253943
39263944 case WAKEUP_INOTIFY_DATA :
3927- r = event_inotify_data_read (e , e -> event_queue [i ].data .ptr , e -> event_queue [i ].events );
3945+ r = event_inotify_data_read (e , e -> event_queue [i ].data .ptr , e -> event_queue [i ].events , threshold );
39283946 break ;
39293947
39303948 default :
39313949 assert_not_reached ("Invalid wake-up pointer" );
39323950 }
39333951 }
3952+ if (r < 0 )
3953+ return r ;
3954+ if (r > 0 )
3955+ something_new = true;
3956+ }
3957+
3958+ * ret_min_priority = min_priority ;
3959+ return something_new ;
3960+ }
3961+
3962+ _public_ int sd_event_wait (sd_event * e , uint64_t timeout ) {
3963+ int r ;
3964+
3965+ assert_return (e , - EINVAL );
3966+ assert_return (e = event_resolve (e ), - ENOPKG );
3967+ assert_return (!event_pid_changed (e ), - ECHILD );
3968+ assert_return (e -> state != SD_EVENT_FINISHED , - ESTALE );
3969+ assert_return (e -> state == SD_EVENT_ARMED , - EBUSY );
3970+
3971+ if (e -> exit_requested ) {
3972+ e -> state = SD_EVENT_PENDING ;
3973+ return 1 ;
3974+ }
3975+
3976+ for (int64_t threshold = INT64_MAX ; ; threshold -- ) {
3977+ int64_t epoll_min_priority , child_min_priority ;
3978+
3979+ /* There may be a possibility that new epoll (especially IO) and child events are
3980+ * triggered just after process_epoll() call but before process_child(), and the new IO
3981+ * events may have higher priority than the child events. To salvage these events,
3982+ * let's call epoll_wait() again, but accepts only events with higher priority than the
3983+ * previous. See issue https://github.com/systemd/systemd/issues/18190 and comments
3984+ * https://github.com/systemd/systemd/pull/18750#issuecomment-785801085
3985+ * https://github.com/systemd/systemd/pull/18922#issuecomment-792825226 */
3986+
3987+ r = process_epoll (e , timeout , threshold , & epoll_min_priority );
3988+ if (r == - EINTR ) {
3989+ e -> state = SD_EVENT_PENDING ;
3990+ return 1 ;
3991+ }
3992+ if (r < 0 )
3993+ goto finish ;
3994+ if (r == 0 && threshold < INT64_MAX )
3995+ /* No new epoll event. */
3996+ break ;
3997+
3998+ r = process_child (e , threshold , & child_min_priority );
39343999 if (r < 0 )
39354000 goto finish ;
4001+ if (r == 0 )
4002+ /* No new child event. */
4003+ break ;
4004+
4005+ threshold = MIN (epoll_min_priority , child_min_priority );
4006+ if (threshold == INT64_MIN )
4007+ break ;
4008+
4009+ timeout = 0 ;
39364010 }
39374011
39384012 r = process_watchdog (e );
@@ -3959,19 +4033,12 @@ _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
39594033 if (r < 0 )
39604034 goto finish ;
39614035
3962- if (e -> need_process_child ) {
3963- r = process_child (e );
3964- if (r < 0 )
3965- goto finish ;
3966- }
3967-
39684036 r = process_inotify (e );
39694037 if (r < 0 )
39704038 goto finish ;
39714039
39724040 if (event_next_pending (e )) {
39734041 e -> state = SD_EVENT_PENDING ;
3974-
39754042 return 1 ;
39764043 }
39774044
0 commit comments