Skip to content

Commit a1f4fd3

Browse files
committed
udev: run the main process, workers, and spawned commands in /udev subcgroup
And enable cgroup delegation for udevd. Then, processes invoked through ExecReload= are assigned .control subcgroup, and they are not killed by cg_kill(). Fixes systemd#16867 and systemd#22686.
1 parent 4267084 commit a1f4fd3

File tree

2 files changed

+59
-18
lines changed

2 files changed

+59
-18
lines changed

src/udev/udevd.c

Lines changed: 58 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "sd-event.h"
2929

3030
#include "alloc-util.h"
31+
#include "cgroup-setup.h"
3132
#include "cgroup-util.h"
3233
#include "cpu-set-util.h"
3334
#include "dev-setup.h"
@@ -48,6 +49,7 @@
4849
#include "mkdir.h"
4950
#include "netlink-util.h"
5051
#include "parse-util.h"
52+
#include "path-util.h"
5153
#include "pretty-print.h"
5254
#include "proc-cmdline.h"
5355
#include "process-util.h"
@@ -85,7 +87,7 @@ typedef struct Manager {
8587
sd_event *event;
8688
Hashmap *workers;
8789
LIST_HEAD(Event, events);
88-
const char *cgroup;
90+
char *cgroup;
8991
pid_t pid; /* the process that originally allocated the manager object */
9092
int log_level;
9193

@@ -238,6 +240,7 @@ static Manager* manager_free(Manager *manager) {
238240
safe_close(manager->inotify_fd);
239241
safe_close_pair(manager->worker_watch);
240242

243+
free(manager->cgroup);
241244
return mfree(manager);
242245
}
243246

@@ -1722,20 +1725,71 @@ static int parse_argv(int argc, char *argv[]) {
17221725
return 1;
17231726
}
17241727

1725-
static int manager_new(Manager **ret, int fd_ctrl, int fd_uevent, const char *cgroup) {
1728+
static int create_subcgroup(char **ret) {
1729+
_cleanup_free_ char *cgroup = NULL, *subcgroup = NULL;
1730+
int r;
1731+
1732+
if (getppid() != 1)
1733+
return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Not invoked by PID1.");
1734+
1735+
r = sd_booted();
1736+
if (r < 0)
1737+
return log_debug_errno(r, "Failed to check if systemd is running: %m");
1738+
if (r == 0)
1739+
return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "systemd is not running.");
1740+
1741+
/* Get our own cgroup, we regularly kill everything udev has left behind.
1742+
* We only do this on systemd systems, and only if we are directly spawned
1743+
* by PID1. Otherwise we are not guaranteed to have a dedicated cgroup. */
1744+
1745+
r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &cgroup);
1746+
if (r < 0) {
1747+
if (IN_SET(r, -ENOENT, -ENOMEDIUM))
1748+
return log_debug_errno(r, "Dedicated cgroup not found: %m");
1749+
return log_debug_errno(r, "Failed to get cgroup: %m");
1750+
}
1751+
1752+
r = cg_get_xattr_bool(SYSTEMD_CGROUP_CONTROLLER, cgroup, "trusted.delegate");
1753+
if (IN_SET(r, 0, -ENODATA))
1754+
return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "The cgroup %s is not delegated to us.", cgroup);
1755+
if (r < 0)
1756+
return log_debug_errno(r, "Failed to read trusted.delegate attribute: %m");
1757+
1758+
/* We are invoked with our own delegated cgroup tree, let's move us one level down, so that we
1759+
* don't collide with the "no processes in inner nodes" rule of cgroups, when the service
1760+
* manager invokes the ExecReload= job in the .control/ subcgroup. */
1761+
1762+
subcgroup = path_join(cgroup, "/udev");
1763+
if (!subcgroup)
1764+
return log_oom_debug();
1765+
1766+
r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, subcgroup, 0);
1767+
if (r < 0)
1768+
return log_debug_errno(r, "Failed to create %s subcgroup: %m", subcgroup);
1769+
1770+
log_debug("Created %s subcgroup.", subcgroup);
1771+
if (ret)
1772+
*ret = TAKE_PTR(subcgroup);
1773+
return 0;
1774+
}
1775+
1776+
static int manager_new(Manager **ret, int fd_ctrl, int fd_uevent) {
17261777
_cleanup_(manager_freep) Manager *manager = NULL;
1778+
_cleanup_free_ char *cgroup = NULL;
17271779
int r;
17281780

17291781
assert(ret);
17301782

1783+
(void) create_subcgroup(&cgroup);
1784+
17311785
manager = new(Manager, 1);
17321786
if (!manager)
17331787
return log_oom();
17341788

17351789
*manager = (Manager) {
17361790
.inotify_fd = -1,
17371791
.worker_watch = { -1, -1 },
1738-
.cgroup = cgroup,
1792+
.cgroup = TAKE_PTR(cgroup),
17391793
};
17401794

17411795
r = udev_ctrl_new_from_fd(&manager->ctrl, fd_ctrl);
@@ -1880,7 +1934,6 @@ static int main_loop(Manager *manager) {
18801934
}
18811935

18821936
int run_udevd(int argc, char *argv[]) {
1883-
_cleanup_free_ char *cgroup = NULL;
18841937
_cleanup_(manager_freep) Manager *manager = NULL;
18851938
int fd_ctrl = -1, fd_uevent = -1;
18861939
int r;
@@ -1937,24 +1990,11 @@ int run_udevd(int argc, char *argv[]) {
19371990
if (r < 0 && r != -EEXIST)
19381991
return log_error_errno(r, "Failed to create /run/udev: %m");
19391992

1940-
if (getppid() == 1 && sd_booted() > 0) {
1941-
/* Get our own cgroup, we regularly kill everything udev has left behind.
1942-
* We only do this on systemd systems, and only if we are directly spawned
1943-
* by PID1. Otherwise we are not guaranteed to have a dedicated cgroup. */
1944-
r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &cgroup);
1945-
if (r < 0) {
1946-
if (IN_SET(r, -ENOENT, -ENOMEDIUM))
1947-
log_debug_errno(r, "Dedicated cgroup not found: %m");
1948-
else
1949-
log_warning_errno(r, "Failed to get cgroup: %m");
1950-
}
1951-
}
1952-
19531993
r = listen_fds(&fd_ctrl, &fd_uevent);
19541994
if (r < 0)
19551995
return log_error_errno(r, "Failed to listen on fds: %m");
19561996

1957-
r = manager_new(&manager, fd_ctrl, fd_uevent, cgroup);
1997+
r = manager_new(&manager, fd_ctrl, fd_uevent);
19581998
if (r < 0)
19591999
return log_error_errno(r, "Failed to create manager: %m");
19602000

units/systemd-udevd.service.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ Before=sysinit.target
1616
ConditionPathIsReadWrite=/sys
1717

1818
[Service]
19+
Delegate=pids
1920
DeviceAllow=block-* rwm
2021
DeviceAllow=char-* rwm
2122
Type=notify

0 commit comments

Comments
 (0)