From f88cbaac9011d381e5b1ab8d2b8fc209c7bb6d1b Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Feb 21 2022 18:35:54 +0000 Subject: [PATCH 1/2] bpf: name unnamed bpf programs bpf-firewall and bpf-devices do not have names. This complicates debugging with bpftool(8). Assign names starting with 'sd_' prefix: * firewall program names are 'sd_fw_ingress' for ingress attach point and 'sd_fw_egress' for egress. * 'sd_devices' for devices prog 'sd_' prefix is already used in source-compiled programs, e.g. sd_restrictif_i, sd_restrictif_e, sd_bind6. The name must not be longer than 15 characters or BPF_OBJ_NAME_LEN - 1. Assign names only to programs loaded to kernel by systemd since programs pinned to bpffs are already loaded. --- diff --git a/src/core/bpf-devices.c b/src/core/bpf-devices.c index 4d86e86..e3100b8 100644 --- a/src/core/bpf-devices.c +++ b/src/core/bpf-devices.c @@ -192,7 +192,7 @@ int bpf_devices_cgroup_init( if (policy == CGROUP_DEVICE_POLICY_AUTO && !allow_list) return 0; - r = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE, &prog); + r = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE, "sd_devices", &prog); if (r < 0) return log_error_errno(r, "Loading device control BPF program failed: %m"); @@ -306,7 +306,7 @@ int bpf_devices_supported(void) { return supported = 0; } - r = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE, &program); + r = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE, NULL, &program); if (r < 0) { log_debug_errno(r, "Can't allocate CGROUP DEVICE BPF program, BPF device control is not supported: %m"); return supported = 0; diff --git a/src/core/bpf-firewall.c b/src/core/bpf-firewall.c index 3c1c02e..8158faf 100644 --- a/src/core/bpf-firewall.c +++ b/src/core/bpf-firewall.c @@ -193,6 +193,7 @@ static int bpf_firewall_compile_bpf( }; _cleanup_(bpf_program_freep) BPFProgram *p = NULL; + const char *prog_name = is_ingress ? "sd_fw_ingress" : "sd_fw_egress"; int accounting_map_fd, r; bool access_enabled; @@ -216,7 +217,7 @@ static int bpf_firewall_compile_bpf( return 0; } - r = bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB, &p); + r = bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB, prog_name, &p); if (r < 0) return r; @@ -604,7 +605,7 @@ static int load_bpf_progs_from_fs_to_set(Unit *u, char **filter_paths, Set **set _cleanup_(bpf_program_freep) BPFProgram *prog = NULL; int r; - r = bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB, &prog); + r = bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB, NULL, &prog); if (r < 0) return log_unit_error_errno(u, r, "Can't allocate CGROUP SKB BPF program: %m"); @@ -825,7 +826,7 @@ int bpf_firewall_supported(void) { return supported = BPF_FIREWALL_UNSUPPORTED; } - r = bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB, &program); + r = bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB, NULL, &program); if (r < 0) { bpf_firewall_unsupported_reason = log_debug_errno(r, "Can't allocate CGROUP SKB BPF program, BPF firewalling is not supported: %m"); diff --git a/src/shared/bpf-program.c b/src/shared/bpf-program.c index b8ca32a..31fa444 100644 --- a/src/shared/bpf-program.c +++ b/src/shared/bpf-program.c @@ -55,6 +55,7 @@ BPFProgram *bpf_program_free(BPFProgram *p) { (void) bpf_program_cgroup_detach(p); safe_close(p->kernel_fd); + free(p->prog_name); free(p->instructions); free(p->attached_path); @@ -78,8 +79,18 @@ static int bpf_program_get_info_by_fd(int prog_fd, struct bpf_prog_info *info, u return RET_NERRNO(bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr))); } -int bpf_program_new(uint32_t prog_type, BPFProgram **ret) { +int bpf_program_new(uint32_t prog_type, const char *prog_name, BPFProgram **ret) { _cleanup_(bpf_program_freep) BPFProgram *p = NULL; + _cleanup_free_ char *name = NULL; + + if (prog_name) { + if (strlen(prog_name) >= BPF_OBJ_NAME_LEN) + return -ENAMETOOLONG; + + name = strdup(prog_name); + if (!name) + return -ENOMEM; + } p = new(BPFProgram, 1); if (!p) @@ -88,6 +99,7 @@ int bpf_program_new(uint32_t prog_type, BPFProgram **ret) { *p = (BPFProgram) { .prog_type = prog_type, .kernel_fd = -1, + .prog_name = TAKE_PTR(name), }; *ret = TAKE_PTR(p); @@ -165,6 +177,8 @@ int bpf_program_load_kernel(BPFProgram *p, char *log_buf, size_t log_size) { attr.log_buf = PTR_TO_UINT64(log_buf); attr.log_level = !!log_buf; attr.log_size = log_size; + if (p->prog_name) + strncpy(attr.prog_name, p->prog_name, BPF_OBJ_NAME_LEN - 1); p->kernel_fd = bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); if (p->kernel_fd < 0) diff --git a/src/shared/bpf-program.h b/src/shared/bpf-program.h index e54900f..b640fb9 100644 --- a/src/shared/bpf-program.h +++ b/src/shared/bpf-program.h @@ -20,6 +20,7 @@ struct BPFProgram { /* The loaded BPF program, if loaded */ int kernel_fd; uint32_t prog_type; + char *prog_name; /* The code of it BPF program, if known */ size_t n_instructions; @@ -32,7 +33,7 @@ struct BPFProgram { uint32_t attached_flags; }; -int bpf_program_new(uint32_t prog_type, BPFProgram **ret); +int bpf_program_new(uint32_t prog_type, const char *prog_name, BPFProgram **ret); int bpf_program_new_from_bpffs_path(const char *path, BPFProgram **ret); BPFProgram *bpf_program_free(BPFProgram *p); diff --git a/src/test/test-bpf-firewall.c b/src/test/test-bpf-firewall.c index 2e19db6..cbcb525 100644 --- a/src/test/test-bpf-firewall.c +++ b/src/test/test-bpf-firewall.c @@ -55,7 +55,7 @@ int main(int argc, char *argv[]) { assert_se(set_unit_path(unit_dir) >= 0); assert_se(runtime_dir = setup_fake_runtime_dir()); - r = bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB, &p); + r = bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB, "sd_trivial", &p); assert_se(r == 0); r = bpf_program_add_instructions(p, exit_insn, ELEMENTSOF(exit_insn)); diff --git a/src/test/test-bpf-foreign-programs.c b/src/test/test-bpf-foreign-programs.c index 1765dc7..8a3d2e0 100644 --- a/src/test/test-bpf-foreign-programs.c +++ b/src/test/test-bpf-foreign-programs.c @@ -162,7 +162,7 @@ static int pin_programs(Unit *u, CGroupContext *cc, const Test *test_suite, size if (r < 0) return log_error_errno(r, "Failed to convert program to string"); - r = bpf_program_new(test_suite[i].prog_type, &prog); + r = bpf_program_new(test_suite[i].prog_type, "sd_trivial", &prog); if (r < 0) return log_error_errno(r, "Failed to create program '%s'", str); From 8a8cf1a2594c1837416c24eb5c23052a51b41471 Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Feb 21 2022 18:35:54 +0000 Subject: [PATCH 2/2] bpf: load firewall with name only if supported BPF firewall is supported starting from v4.9 kernel where BPF_PROG_TYPE_SOCKET_FILTER support was added [0]. However, program name support was added to v4.15 [1] and BPF_PROG_LOAD syscall will fail on older kernels if called with prog_name attribute. BPF_F_ALLOW_MULTI was also added to v4.15 kernel which allows reusing BPF_F_ALLOW_MULTI probe to indicate that program name is also supported. It is no problem for BPF_PROG_TYPE_CGROUP_DEVICE since it was added in v4.15. [0] https://elixir.bootlin.com/linux/v4.9/source/include/uapi/linux/bpf.h#L92 [1] https://elixir.bootlin.com/linux/v4.15/source/include/uapi/linux/bpf.h#L191 Follow-up of https://github.com/systemd/systemd/pull/22214 --- diff --git a/src/core/bpf-devices.c b/src/core/bpf-devices.c index e3100b8..f62c6f1 100644 --- a/src/core/bpf-devices.c +++ b/src/core/bpf-devices.c @@ -306,7 +306,7 @@ int bpf_devices_supported(void) { return supported = 0; } - r = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE, NULL, &program); + r = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE, "sd_devices", &program); if (r < 0) { log_debug_errno(r, "Can't allocate CGROUP DEVICE BPF program, BPF device control is not supported: %m"); return supported = 0; diff --git a/src/core/bpf-firewall.c b/src/core/bpf-firewall.c index 8158faf..0297053 100644 --- a/src/core/bpf-firewall.c +++ b/src/core/bpf-firewall.c @@ -145,6 +145,7 @@ static int add_instructions_for_ip_any( static int bpf_firewall_compile_bpf( Unit *u, + const char *prog_name, bool is_ingress, BPFProgram **ret, bool ip_allow_any, @@ -193,7 +194,6 @@ static int bpf_firewall_compile_bpf( }; _cleanup_(bpf_program_freep) BPFProgram *p = NULL; - const char *prog_name = is_ingress ? "sd_fw_ingress" : "sd_fw_egress"; int accounting_map_fd, r; bool access_enabled; @@ -527,9 +527,10 @@ static int bpf_firewall_prepare_accounting_maps(Unit *u, bool enabled, int *fd_i } int bpf_firewall_compile(Unit *u) { + const char *ingress_name = NULL, *egress_name = NULL; + bool ip_allow_any = false, ip_deny_any = false; CGroupContext *cc; int r, supported; - bool ip_allow_any = false, ip_deny_any = false; assert(u); @@ -552,6 +553,13 @@ int bpf_firewall_compile(Unit *u) { return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP), "BPF_F_ALLOW_MULTI is not supported on this manager, not doing BPF firewall on slice units."); + /* If BPF_F_ALLOW_MULTI flag is supported program name is also supported (both were added to v4.15 + * kernel). */ + if (supported == BPF_FIREWALL_SUPPORTED_WITH_MULTI) { + ingress_name = "sd_fw_ingress"; + egress_name = "sd_fw_egress"; + } + /* Note that when we compile a new firewall we first flush out the access maps and the BPF programs themselves, * but we reuse the accounting maps. That way the firewall in effect always maps to the actual * configuration, but we don't flush out the accounting unnecessarily */ @@ -585,11 +593,11 @@ int bpf_firewall_compile(Unit *u) { if (r < 0) return log_unit_error_errno(u, r, "Preparation of eBPF accounting maps failed: %m"); - r = bpf_firewall_compile_bpf(u, true, &u->ip_bpf_ingress, ip_allow_any, ip_deny_any); + r = bpf_firewall_compile_bpf(u, ingress_name, true, &u->ip_bpf_ingress, ip_allow_any, ip_deny_any); if (r < 0) return log_unit_error_errno(u, r, "Compilation for ingress BPF program failed: %m"); - r = bpf_firewall_compile_bpf(u, false, &u->ip_bpf_egress, ip_allow_any, ip_deny_any); + r = bpf_firewall_compile_bpf(u, egress_name, false, &u->ip_bpf_egress, ip_allow_any, ip_deny_any); if (r < 0) return log_unit_error_errno(u, r, "Compilation for egress BPF program failed: %m"); @@ -826,6 +834,7 @@ int bpf_firewall_supported(void) { return supported = BPF_FIREWALL_UNSUPPORTED; } + /* prog_name is NULL since it is supported only starting from v4.15 kernel. */ r = bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB, NULL, &program); if (r < 0) { bpf_firewall_unsupported_reason = @@ -883,7 +892,9 @@ int bpf_firewall_supported(void) { /* So now we know that the BPF program is generally available, let's see if BPF_F_ALLOW_MULTI is also supported * (which was added in kernel 4.15). We use a similar logic as before, but this time we use the BPF_PROG_ATTACH * bpf() call and the BPF_F_ALLOW_MULTI flags value. Since the flags are checked early in the system call we'll - * get EINVAL if it's not supported, and EBADF as before if it is available. */ + * get EINVAL if it's not supported, and EBADF as before if it is available. + * Use probe result as the indicator that program name is also supported since they both were + * added in kernel 4.15. */ zero(attr); attr.attach_type = BPF_CGROUP_INET_EGRESS;