Skip to content

Commit f75f613

Browse files
committed
core: reduce the number of stalled PIDs from the watched processes list when possible
Some PIDs can remain in the watched list even though their processes have exited since a long time. It can easily happen if the main process of a forking service manages to spawn a child before the control process exits for example. However when a pid is about to be mapped to a unit by calling unit_watch_pid(), the caller usually knows if the pid should belong to this unit exclusively: if we just forked() off a child, then we can be sure that its PID is otherwise unused. In this case we take this opportunity to remove any stalled PIDs from the watched process list. If we learnt about a PID in any other form (for example via PID file, via searching, MAINPID= and so on), then we can't assume anything.
1 parent 4d05154 commit f75f613

File tree

11 files changed

+43
-28
lines changed

11 files changed

+43
-28
lines changed

src/core/cgroup.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2319,7 +2319,7 @@ static int unit_watch_pids_in_path(Unit *u, const char *path) {
23192319
pid_t pid;
23202320

23212321
while ((r = cg_read_pid(f, &pid)) > 0) {
2322-
r = unit_watch_pid(u, pid);
2322+
r = unit_watch_pid(u, pid, false);
23232323
if (r < 0 && ret >= 0)
23242324
ret = r;
23252325
}

src/core/dbus-scope.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ static int bus_scope_set_transient_property(
106106
return r;
107107

108108
if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
109-
r = unit_watch_pid(UNIT(s), pid);
109+
r = unit_watch_pid(UNIT(s), pid, false);
110110
if (r < 0 && r != -EEXIST)
111111
return r;
112112
}

src/core/manager.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2129,6 +2129,16 @@ void manager_clear_jobs(Manager *m) {
21292129
job_finish_and_invalidate(j, JOB_CANCELED, false, false);
21302130
}
21312131

2132+
void manager_unwatch_pid(Manager *m, pid_t pid) {
2133+
assert(m);
2134+
2135+
/* First let's drop the unit keyed as "pid". */
2136+
(void) hashmap_remove(m->watch_pids, PID_TO_PTR(pid));
2137+
2138+
/* Then, let's also drop the array keyed by -pid. */
2139+
free(hashmap_remove(m->watch_pids, PID_TO_PTR(-pid)));
2140+
}
2141+
21322142
static int manager_dispatch_run_queue(sd_event_source *source, void *userdata) {
21332143
Manager *m = userdata;
21342144
Job *j;

src/core/manager.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -437,6 +437,8 @@ int manager_get_dump_string(Manager *m, char **ret);
437437

438438
void manager_clear_jobs(Manager *m);
439439

440+
void manager_unwatch_pid(Manager *m, pid_t pid);
441+
440442
unsigned manager_dispatch_load_queue(Manager *m);
441443

442444
int manager_default_environment(Manager *m);

src/core/mount.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -704,7 +704,7 @@ static int mount_coldplug(Unit *u) {
704704
pid_is_unwaited(m->control_pid) &&
705705
MOUNT_STATE_WITH_PROCESS(new_state)) {
706706

707-
r = unit_watch_pid(UNIT(m), m->control_pid);
707+
r = unit_watch_pid(UNIT(m), m->control_pid, false);
708708
if (r < 0)
709709
return r;
710710

@@ -810,9 +810,8 @@ static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) {
810810
if (r < 0)
811811
return r;
812812

813-
r = unit_watch_pid(UNIT(m), pid);
813+
r = unit_watch_pid(UNIT(m), pid, true);
814814
if (r < 0)
815-
/* FIXME: we need to do something here */
816815
return r;
817816

818817
*_pid = pid;

src/core/service.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -990,7 +990,7 @@ static int service_load_pid_file(Service *s, bool may_warn) {
990990
if (r < 0)
991991
return r;
992992

993-
r = unit_watch_pid(UNIT(s), pid);
993+
r = unit_watch_pid(UNIT(s), pid, false);
994994
if (r < 0) /* FIXME: we need to do something here */
995995
return log_unit_warning_errno(UNIT(s), r, "Failed to watch PID "PID_FMT" for service: %m", pid);
996996

@@ -1020,7 +1020,7 @@ static void service_search_main_pid(Service *s) {
10201020
if (service_set_main_pid(s, pid) < 0)
10211021
return;
10221022

1023-
r = unit_watch_pid(UNIT(s), pid);
1023+
r = unit_watch_pid(UNIT(s), pid, false);
10241024
if (r < 0)
10251025
/* FIXME: we need to do something here */
10261026
log_unit_warning_errno(UNIT(s), r, "Failed to watch PID "PID_FMT" from: %m", pid);
@@ -1154,7 +1154,7 @@ static int service_coldplug(Unit *u) {
11541154
SERVICE_RUNNING, SERVICE_RELOAD,
11551155
SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
11561156
SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL))) {
1157-
r = unit_watch_pid(UNIT(s), s->main_pid);
1157+
r = unit_watch_pid(UNIT(s), s->main_pid, false);
11581158
if (r < 0)
11591159
return r;
11601160
}
@@ -1166,7 +1166,7 @@ static int service_coldplug(Unit *u) {
11661166
SERVICE_RELOAD,
11671167
SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
11681168
SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL)) {
1169-
r = unit_watch_pid(UNIT(s), s->control_pid);
1169+
r = unit_watch_pid(UNIT(s), s->control_pid, false);
11701170
if (r < 0)
11711171
return r;
11721172
}
@@ -1566,8 +1566,8 @@ static int service_spawn(
15661566
s->exec_fd_event_source = TAKE_PTR(exec_fd_source);
15671567
s->exec_fd_hot = false;
15681568

1569-
r = unit_watch_pid(UNIT(s), pid);
1570-
if (r < 0) /* FIXME: we need to do something here */
1569+
r = unit_watch_pid(UNIT(s), pid, true);
1570+
if (r < 0)
15711571
return r;
15721572

15731573
*_pid = pid;
@@ -3705,7 +3705,7 @@ static void service_notify_message(
37053705
if (r > 0) {
37063706
service_set_main_pid(s, new_main_pid);
37073707

3708-
r = unit_watch_pid(UNIT(s), new_main_pid);
3708+
r = unit_watch_pid(UNIT(s), new_main_pid, false);
37093709
if (r < 0)
37103710
log_unit_warning_errno(UNIT(s), r, "Failed to watch new main PID "PID_FMT" for service: %m", new_main_pid);
37113711

@@ -3923,7 +3923,7 @@ static void service_bus_name_owner_change(
39233923
log_unit_debug(u, "D-Bus name %s is now owned by process " PID_FMT, name, pid);
39243924

39253925
service_set_main_pid(s, pid);
3926-
unit_watch_pid(UNIT(s), pid);
3926+
unit_watch_pid(UNIT(s), pid, false);
39273927
}
39283928
}
39293929
}

src/core/socket.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1852,7 +1852,7 @@ static int socket_coldplug(Unit *u) {
18521852
SOCKET_FINAL_SIGTERM,
18531853
SOCKET_FINAL_SIGKILL)) {
18541854

1855-
r = unit_watch_pid(UNIT(s), s->control_pid);
1855+
r = unit_watch_pid(UNIT(s), s->control_pid, false);
18561856
if (r < 0)
18571857
return r;
18581858

@@ -1938,9 +1938,8 @@ static int socket_spawn(Socket *s, ExecCommand *c, pid_t *_pid) {
19381938
if (r < 0)
19391939
return r;
19401940

1941-
r = unit_watch_pid(UNIT(s), pid);
1941+
r = unit_watch_pid(UNIT(s), pid, true);
19421942
if (r < 0)
1943-
/* FIXME: we need to do something here */
19441943
return r;
19451944

19461945
*_pid = pid;
@@ -2009,7 +2008,7 @@ static int socket_chown(Socket *s, pid_t *_pid) {
20092008
_exit(EXIT_SUCCESS);
20102009
}
20112010

2012-
r = unit_watch_pid(UNIT(s), pid);
2011+
r = unit_watch_pid(UNIT(s), pid, true);
20132012
if (r < 0)
20142013
goto fail;
20152014

src/core/swap.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -550,7 +550,7 @@ static int swap_coldplug(Unit *u) {
550550
pid_is_unwaited(s->control_pid) &&
551551
SWAP_STATE_WITH_PROCESS(new_state)) {
552552

553-
r = unit_watch_pid(UNIT(s), s->control_pid);
553+
r = unit_watch_pid(UNIT(s), s->control_pid, false);
554554
if (r < 0)
555555
return r;
556556

@@ -657,9 +657,8 @@ static int swap_spawn(Swap *s, ExecCommand *c, pid_t *_pid) {
657657
if (r < 0)
658658
goto fail;
659659

660-
r = unit_watch_pid(UNIT(s), pid);
660+
r = unit_watch_pid(UNIT(s), pid, true);
661661
if (r < 0)
662-
/* FIXME: we need to do something here */
663662
goto fail;
664663

665664
*_pid = pid;

src/core/unit.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2525,14 +2525,20 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, UnitNotifyFlag
25252525
unit_add_to_gc_queue(u);
25262526
}
25272527

2528-
int unit_watch_pid(Unit *u, pid_t pid) {
2528+
int unit_watch_pid(Unit *u, pid_t pid, bool exclusive) {
25292529
int r;
25302530

25312531
assert(u);
25322532
assert(pid_is_valid(pid));
25332533

25342534
/* Watch a specific PID */
25352535

2536+
/* Caller might be sure that this PID belongs to this unit only. Let's take this
2537+
* opportunity to remove any stalled references to this PID as they can be created
2538+
* easily (when watching a process which is not our direct child). */
2539+
if (exclusive)
2540+
manager_unwatch_pid(u->manager, pid);
2541+
25362542
r = set_ensure_allocated(&u->pids, NULL);
25372543
if (r < 0)
25382544
return r;

src/core/unit.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -678,7 +678,7 @@ typedef enum UnitNotifyFlags {
678678

679679
void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, UnitNotifyFlags flags);
680680

681-
int unit_watch_pid(Unit *u, pid_t pid);
681+
int unit_watch_pid(Unit *u, pid_t pid, bool exclusive);
682682
void unit_unwatch_pid(Unit *u, pid_t pid);
683683
void unit_unwatch_all_pids(Unit *u);
684684

0 commit comments

Comments
 (0)