Skip to content

Commit 76dc172

Browse files
alexlzhupoettering
authored andcommitted
core: remove refcount for bpf program
Currently ref count of bpf-program is kept in user space. However, the kernel already implements its own ref count. Thus the ref count we keep for bpf-program is redundant. This PR removes ref count for bpf program as part of a task to simplify bpf-program and remove redundancies, which will make the switch to code-compiled BPF programs easier. Part of systemd#19270
1 parent d92681a commit 76dc172

File tree

11 files changed

+79
-92
lines changed

11 files changed

+79
-92
lines changed

src/core/bpf-devices.c

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ int bpf_devices_cgroup_init(
184184
offsetof(struct bpf_cgroup_dev_ctx, minor)),
185185
};
186186

187-
_cleanup_(bpf_program_unrefp) BPFProgram *prog = NULL;
187+
_cleanup_(bpf_program_freep) BPFProgram *prog = NULL;
188188
int r;
189189

190190
assert(ret);
@@ -208,7 +208,7 @@ int bpf_devices_cgroup_init(
208208
}
209209

210210
int bpf_devices_apply_policy(
211-
BPFProgram *prog,
211+
BPFProgram **prog,
212212
CGroupDevicePolicy policy,
213213
bool allow_list,
214214
const char *cgroup_path,
@@ -219,7 +219,8 @@ int bpf_devices_apply_policy(
219219

220220
/* This will assign *prog_installed if everything goes well. */
221221

222-
if (!prog)
222+
assert(prog);
223+
if (!*prog)
223224
goto finish;
224225

225226
const bool deny_everything = policy == CGROUP_DEVICE_POLICY_STRICT && !allow_list;
@@ -237,37 +238,37 @@ int bpf_devices_apply_policy(
237238
};
238239

239240
if (!deny_everything) {
240-
r = bpf_program_add_instructions(prog, post_insn, ELEMENTSOF(post_insn));
241+
r = bpf_program_add_instructions(*prog, post_insn, ELEMENTSOF(post_insn));
241242
if (r < 0)
242243
return log_error_errno(r, "Extending device control BPF program failed: %m");
243244

244245
/* Fixup PASS_JUMP_OFF jump offsets. */
245-
for (size_t off = 0; off < prog->n_instructions; off++) {
246-
struct bpf_insn *ins = &prog->instructions[off];
246+
for (size_t off = 0; off < (*prog)->n_instructions; off++) {
247+
struct bpf_insn *ins = &((*prog)->instructions[off]);
247248

248249
if (ins->code == (BPF_JMP | BPF_JA) && ins->off == PASS_JUMP_OFF)
249-
ins->off = prog->n_instructions - off - 1;
250+
ins->off = (*prog)->n_instructions - off - 1;
250251
}
251252
}
252253

253-
r = bpf_program_add_instructions(prog, exit_insn, ELEMENTSOF(exit_insn));
254+
r = bpf_program_add_instructions(*prog, exit_insn, ELEMENTSOF(exit_insn));
254255
if (r < 0)
255256
return log_error_errno(r, "Extending device control BPF program failed: %m");
256257

257258
r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, NULL, &controller_path);
258259
if (r < 0)
259260
return log_error_errno(r, "Failed to determine cgroup path: %m");
260261

261-
r = bpf_program_cgroup_attach(prog, BPF_CGROUP_DEVICE, controller_path, BPF_F_ALLOW_MULTI);
262+
r = bpf_program_cgroup_attach(*prog, BPF_CGROUP_DEVICE, controller_path, BPF_F_ALLOW_MULTI);
262263
if (r < 0)
263264
return log_error_errno(r, "Attaching device control BPF program to cgroup %s failed: %m",
264265
empty_to_root(cgroup_path));
265266

266267
finish:
267268
/* Unref the old BPF program (which will implicitly detach it) right before attaching the new program. */
268269
if (prog_installed) {
269-
bpf_program_unref(*prog_installed);
270-
*prog_installed = bpf_program_ref(prog);
270+
bpf_program_free(*prog_installed);
271+
*prog_installed = TAKE_PTR(*prog);
271272
}
272273
return 0;
273274
}
@@ -278,7 +279,7 @@ int bpf_devices_supported(void) {
278279
BPF_EXIT_INSN()
279280
};
280281

281-
_cleanup_(bpf_program_unrefp) BPFProgram *program = NULL;
282+
_cleanup_(bpf_program_freep) BPFProgram *program = NULL;
282283
static int supported = -1;
283284
int r;
284285

src/core/bpf-devices.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ typedef struct BPFProgram BPFProgram;
99

1010
int bpf_devices_cgroup_init(BPFProgram **ret, CGroupDevicePolicy policy, bool allow_list);
1111
int bpf_devices_apply_policy(
12-
BPFProgram *prog,
12+
BPFProgram **prog,
1313
CGroupDevicePolicy policy,
1414
bool allow_list,
1515
const char *cgroup_path,

src/core/bpf-firewall.c

Lines changed: 18 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ static int bpf_firewall_compile_bpf(
192192
BPF_MOV64_IMM(BPF_REG_0, 0),
193193
};
194194

195-
_cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
195+
_cleanup_(bpf_program_freep) BPFProgram *p = NULL;
196196
int accounting_map_fd, r;
197197
bool access_enabled;
198198

@@ -555,8 +555,8 @@ int bpf_firewall_compile(Unit *u) {
555555
* but we reuse the accounting maps. That way the firewall in effect always maps to the actual
556556
* configuration, but we don't flush out the accounting unnecessarily */
557557

558-
u->ip_bpf_ingress = bpf_program_unref(u->ip_bpf_ingress);
559-
u->ip_bpf_egress = bpf_program_unref(u->ip_bpf_egress);
558+
u->ip_bpf_ingress = bpf_program_free(u->ip_bpf_ingress);
559+
u->ip_bpf_egress = bpf_program_free(u->ip_bpf_egress);
560560

561561
u->ipv4_allow_map_fd = safe_close(u->ipv4_allow_map_fd);
562562
u->ipv4_deny_map_fd = safe_close(u->ipv4_deny_map_fd);
@@ -601,7 +601,7 @@ static int load_bpf_progs_from_fs_to_set(Unit *u, char **filter_paths, Set **set
601601
set_clear(*set);
602602

603603
STRV_FOREACH(bpf_fs_path, filter_paths) {
604-
_cleanup_(bpf_program_unrefp) BPFProgram *prog = NULL;
604+
_cleanup_(bpf_program_freep) BPFProgram *prog = NULL;
605605
int r;
606606

607607
r = bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB, &prog);
@@ -657,25 +657,18 @@ static int attach_custom_bpf_progs(Unit *u, const char *path, int attach_type, S
657657
assert(u);
658658

659659
set_clear(*set_installed);
660+
set_ensure_allocated(set_installed, &bpf_program_hash_ops);
660661

661-
SET_FOREACH(prog, *set) {
662+
SET_FOREACH_MOVE(prog, *set_installed, *set) {
662663
r = bpf_program_cgroup_attach(prog, attach_type, path, BPF_F_ALLOW_MULTI);
663664
if (r < 0)
664665
return log_unit_error_errno(u, r, "Attaching custom egress BPF program to cgroup %s failed: %m", path);
665-
666-
/* Remember that these BPF programs are installed now. */
667-
r = set_ensure_put(set_installed, &bpf_program_hash_ops, prog);
668-
if (r < 0)
669-
return log_unit_error_errno(u, r, "Can't add program to BPF program set: %m");
670-
671-
bpf_program_ref(prog);
672666
}
673-
674667
return 0;
675668
}
676669

677670
int bpf_firewall_install(Unit *u) {
678-
_cleanup_(bpf_program_unrefp) BPFProgram *ip_bpf_ingress_uninstall = NULL, *ip_bpf_egress_uninstall = NULL;
671+
_cleanup_(bpf_program_freep) BPFProgram *ip_bpf_ingress_uninstall = NULL, *ip_bpf_egress_uninstall = NULL;
679672
_cleanup_free_ char *path = NULL;
680673
CGroupContext *cc;
681674
int r, supported;
@@ -719,8 +712,8 @@ int bpf_firewall_install(Unit *u) {
719712
/* If we don't have BPF_F_ALLOW_MULTI then unref the old BPF programs (which will implicitly
720713
* detach them) right before attaching the new program, to minimize the time window when we
721714
* don't account for IP traffic. */
722-
u->ip_bpf_egress_installed = bpf_program_unref(u->ip_bpf_egress_installed);
723-
u->ip_bpf_ingress_installed = bpf_program_unref(u->ip_bpf_ingress_installed);
715+
u->ip_bpf_egress_installed = bpf_program_free(u->ip_bpf_egress_installed);
716+
u->ip_bpf_ingress_installed = bpf_program_free(u->ip_bpf_ingress_installed);
724717
}
725718

726719
if (u->ip_bpf_egress) {
@@ -729,20 +722,20 @@ int bpf_firewall_install(Unit *u) {
729722
return log_unit_error_errno(u, r, "Attaching egress BPF program to cgroup %s failed: %m", path);
730723

731724
/* Remember that this BPF program is installed now. */
732-
u->ip_bpf_egress_installed = bpf_program_ref(u->ip_bpf_egress);
725+
u->ip_bpf_egress_installed = TAKE_PTR(u->ip_bpf_egress);
733726
}
734727

735728
if (u->ip_bpf_ingress) {
736729
r = bpf_program_cgroup_attach(u->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path, flags);
737730
if (r < 0)
738731
return log_unit_error_errno(u, r, "Attaching ingress BPF program to cgroup %s failed: %m", path);
739732

740-
u->ip_bpf_ingress_installed = bpf_program_ref(u->ip_bpf_ingress);
733+
u->ip_bpf_ingress_installed = TAKE_PTR(u->ip_bpf_ingress);
741734
}
742735

743736
/* And now, definitely get rid of the old programs, and detach them */
744-
ip_bpf_egress_uninstall = bpf_program_unref(ip_bpf_egress_uninstall);
745-
ip_bpf_ingress_uninstall = bpf_program_unref(ip_bpf_ingress_uninstall);
737+
ip_bpf_egress_uninstall = bpf_program_free(ip_bpf_egress_uninstall);
738+
ip_bpf_ingress_uninstall = bpf_program_free(ip_bpf_ingress_uninstall);
746739

747740
r = attach_custom_bpf_progs(u, path, BPF_CGROUP_INET_EGRESS, &u->ip_bpf_custom_egress, &u->ip_bpf_custom_egress_installed);
748741
if (r < 0)
@@ -806,7 +799,7 @@ int bpf_firewall_supported(void) {
806799
BPF_EXIT_INSN()
807800
};
808801

809-
_cleanup_(bpf_program_unrefp) BPFProgram *program = NULL;
802+
_cleanup_(bpf_program_freep) BPFProgram *program = NULL;
810803
static int supported = -1;
811804
union bpf_attr attr;
812805
int r;
@@ -936,10 +929,10 @@ void bpf_firewall_close(Unit *u) {
936929
u->ipv4_deny_map_fd = safe_close(u->ipv4_deny_map_fd);
937930
u->ipv6_deny_map_fd = safe_close(u->ipv6_deny_map_fd);
938931

939-
u->ip_bpf_ingress = bpf_program_unref(u->ip_bpf_ingress);
940-
u->ip_bpf_ingress_installed = bpf_program_unref(u->ip_bpf_ingress_installed);
941-
u->ip_bpf_egress = bpf_program_unref(u->ip_bpf_egress);
942-
u->ip_bpf_egress_installed = bpf_program_unref(u->ip_bpf_egress_installed);
932+
u->ip_bpf_ingress = bpf_program_free(u->ip_bpf_ingress);
933+
u->ip_bpf_ingress_installed = bpf_program_free(u->ip_bpf_ingress_installed);
934+
u->ip_bpf_egress = bpf_program_free(u->ip_bpf_egress);
935+
u->ip_bpf_egress_installed = bpf_program_free(u->ip_bpf_egress_installed);
943936

944937
u->ip_bpf_custom_ingress = set_free(u->ip_bpf_custom_ingress);
945938
u->ip_bpf_custom_egress = set_free(u->ip_bpf_custom_egress);

src/core/bpf-foreign.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ static void bpf_foreign_key_hash_func(const BPFForeignKey *p, struct siphash *h)
4949

5050
DEFINE_PRIVATE_HASH_OPS_FULL(bpf_foreign_by_key_hash_ops,
5151
BPFForeignKey, bpf_foreign_key_hash_func, bpf_foreign_key_compare_func, free,
52-
BPFProgram, bpf_program_unref);
52+
BPFProgram, bpf_program_free);
5353

5454
static int attach_programs(Unit *u, const char *path, Hashmap* foreign_by_key, uint32_t attach_flags) {
5555
const BPFForeignKey *key;
@@ -76,7 +76,7 @@ static int bpf_foreign_prepare(
7676
Unit *u,
7777
enum bpf_attach_type attach_type,
7878
const char *bpffs_path) {
79-
_cleanup_(bpf_program_unrefp) BPFProgram *prog = NULL;
79+
_cleanup_(bpf_program_freep) BPFProgram *prog = NULL;
8080
_cleanup_free_ BPFForeignKey *key = NULL;
8181
uint32_t prog_id;
8282
int r;

src/core/cgroup.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1170,7 +1170,7 @@ static void cgroup_apply_restrict_network_interfaces(Unit *u) {
11701170
}
11711171

11721172
static int cgroup_apply_devices(Unit *u) {
1173-
_cleanup_(bpf_program_unrefp) BPFProgram *prog = NULL;
1173+
_cleanup_(bpf_program_freep) BPFProgram *prog = NULL;
11741174
const char *path;
11751175
CGroupContext *c;
11761176
CGroupDeviceAllow *a;
@@ -1244,7 +1244,7 @@ static int cgroup_apply_devices(Unit *u) {
12441244
policy = CGROUP_DEVICE_POLICY_STRICT;
12451245
}
12461246

1247-
r = bpf_devices_apply_policy(prog, policy, any, path, &u->bpf_device_control_installed);
1247+
r = bpf_devices_apply_policy(&prog, policy, any, path, &u->bpf_device_control_installed);
12481248
if (r < 0) {
12491249
static bool warned = false;
12501250

@@ -2767,7 +2767,7 @@ void unit_prune_cgroup(Unit *u) {
27672767
u->cgroup_realized_mask = 0;
27682768
u->cgroup_enabled_mask = 0;
27692769

2770-
u->bpf_device_control_installed = bpf_program_unref(u->bpf_device_control_installed);
2770+
u->bpf_device_control_installed = bpf_program_free(u->bpf_device_control_installed);
27712771
}
27722772

27732773
int unit_search_main_pid(Unit *u, pid_t *ret) {

src/core/unit.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -770,7 +770,7 @@ Unit* unit_free(Unit *u) {
770770

771771
hashmap_free(u->bpf_foreign_by_key);
772772

773-
bpf_program_unref(u->bpf_device_control_installed);
773+
bpf_program_free(u->bpf_device_control_installed);
774774

775775
#if BPF_FRAMEWORK
776776
bpf_link_free(u->restrict_ifaces_ingress_bpf_link);

src/shared/bpf-program.c

Lines changed: 25 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,27 @@ static const char *const bpf_cgroup_attach_type_table[__MAX_BPF_ATTACH_TYPE] = {
3838

3939
DEFINE_STRING_TABLE_LOOKUP(bpf_cgroup_attach_type, int);
4040

41-
DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(bpf_program_hash_ops, void, trivial_hash_func, trivial_compare_func, bpf_program_unref);
41+
DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(bpf_program_hash_ops, void, trivial_hash_func, trivial_compare_func, bpf_program_free);
42+
43+
BPFProgram *bpf_program_free(BPFProgram *p) {
44+
if (!p)
45+
return NULL;
46+
/* Unfortunately, the kernel currently doesn't implicitly detach BPF programs from their cgroups when the last
47+
* fd to the BPF program is closed. This has nasty side-effects since this means that abnormally terminated
48+
* programs that attached one of their BPF programs to a cgroup will leave this program pinned for good with
49+
* zero chance of recovery, until the cgroup is removed. This is particularly problematic if the cgroup in
50+
* question is the root cgroup (or any other cgroup belonging to a service that cannot be restarted during
51+
* operation, such as dbus), as the memory for the BPF program can only be reclaimed through a reboot. To
52+
* counter this, we track closely to which cgroup a program was attached to and will detach it on our own
53+
* whenever we close the BPF fd. */
54+
(void) bpf_program_cgroup_detach(p);
55+
56+
safe_close(p->kernel_fd);
57+
free(p->instructions);
58+
free(p->attached_path);
59+
60+
return mfree(p);
61+
}
4262

4363
/* struct bpf_prog_info info must be initialized since its value is both input and output
4464
* for BPF_OBJ_GET_INFO_BY_FD syscall. */
@@ -61,14 +81,13 @@ static int bpf_program_get_info_by_fd(int prog_fd, struct bpf_prog_info *info, u
6181
}
6282

6383
int bpf_program_new(uint32_t prog_type, BPFProgram **ret) {
64-
_cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
84+
_cleanup_(bpf_program_freep) BPFProgram *p = NULL;
6585

6686
p = new(BPFProgram, 1);
6787
if (!p)
6888
return -ENOMEM;
6989

7090
*p = (BPFProgram) {
71-
.n_ref = 1,
7291
.prog_type = prog_type,
7392
.kernel_fd = -1,
7493
};
@@ -79,7 +98,7 @@ int bpf_program_new(uint32_t prog_type, BPFProgram **ret) {
7998
}
8099

81100
int bpf_program_new_from_bpffs_path(const char *path, BPFProgram **ret) {
82-
_cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
101+
_cleanup_(bpf_program_freep) BPFProgram *p = NULL;
83102
struct bpf_prog_info info = {};
84103
int r;
85104

@@ -92,7 +111,6 @@ int bpf_program_new_from_bpffs_path(const char *path, BPFProgram **ret) {
92111

93112
*p = (BPFProgram) {
94113
.prog_type = BPF_PROG_TYPE_UNSPEC,
95-
.n_ref = 1,
96114
.kernel_fd = -1,
97115
};
98116

@@ -110,27 +128,6 @@ int bpf_program_new_from_bpffs_path(const char *path, BPFProgram **ret) {
110128
return 0;
111129
}
112130

113-
static BPFProgram *bpf_program_free(BPFProgram *p) {
114-
assert(p);
115-
116-
/* Unfortunately, the kernel currently doesn't implicitly detach BPF programs from their cgroups when the last
117-
* fd to the BPF program is closed. This has nasty side-effects since this means that abnormally terminated
118-
* programs that attached one of their BPF programs to a cgroup will leave this programs pinned for good with
119-
* zero chance of recovery, until the cgroup is removed. This is particularly problematic if the cgroup in
120-
* question is the root cgroup (or any other cgroup belonging to a service that cannot be restarted during
121-
* operation, such as dbus), as the memory for the BPF program can only be reclaimed through a reboot. To
122-
* counter this, we track closely to which cgroup a program was attached to and will detach it on our own
123-
* whenever we close the BPF fd. */
124-
(void) bpf_program_cgroup_detach(p);
125-
126-
safe_close(p->kernel_fd);
127-
free(p->instructions);
128-
free(p->attached_path);
129-
130-
return mfree(p);
131-
}
132-
133-
DEFINE_TRIVIAL_REF_UNREF_FUNC(BPFProgram, bpf_program, bpf_program_free);
134131

135132
int bpf_program_add_instructions(BPFProgram *p, const struct bpf_insn *instructions, size_t count) {
136133

@@ -424,7 +421,7 @@ int bpf_program_serialize_attachment_set(FILE *f, FDSet *fds, const char *key, S
424421

425422
int bpf_program_deserialize_attachment(const char *v, FDSet *fds, BPFProgram **bpfp) {
426423
_cleanup_free_ char *sfd = NULL, *sat = NULL, *unescaped = NULL;
427-
_cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
424+
_cleanup_(bpf_program_freep) BPFProgram *p = NULL;
428425
_cleanup_close_ int fd = -1;
429426
ssize_t l;
430427
int ifd, at, r;
@@ -470,15 +467,14 @@ int bpf_program_deserialize_attachment(const char *v, FDSet *fds, BPFProgram **b
470467
return -ENOMEM;
471468

472469
*p = (BPFProgram) {
473-
.n_ref = 1,
474470
.kernel_fd = TAKE_FD(fd),
475471
.prog_type = BPF_PROG_TYPE_UNSPEC,
476472
.attached_path = TAKE_PTR(unescaped),
477473
.attached_type = at,
478474
};
479475

480476
if (*bpfp)
481-
bpf_program_unref(*bpfp);
477+
bpf_program_free(*bpfp);
482478

483479
*bpfp = TAKE_PTR(p);
484480
return 0;

0 commit comments

Comments
 (0)