From bae141f54be83b06652c1d47e50e4e75ed4e9c7e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 6 Dec 2019 22:49:34 +0100 Subject: bpf: Emit audit messages upon successful prog load and unload Allow for audit messages to be emitted upon BPF program load and unload for having a timeline of events. The load itself is in syscall context, so additional info about the process initiating the BPF prog creation can be logged and later directly correlated to the unload event. The only info really needed from BPF side is the globally unique prog ID where then audit user space tooling can query / dump all info needed about the specific BPF program right upon load event and enrich the record, thus these changes needed here can be kept small and non-intrusive to the core. Raw example output: # auditctl -D # auditctl -a always,exit -F arch=x86_64 -S bpf # ausearch --start recent -m 1334 ... ---- time->Wed Nov 27 16:04:13 2019 type=PROCTITLE msg=audit(1574867053.120:84664): proctitle="./bpf" type=SYSCALL msg=audit(1574867053.120:84664): arch=c000003e syscall=321 \ success=yes exit=3 a0=5 a1=7ffea484fbe0 a2=70 a3=0 items=0 ppid=7477 \ pid=12698 auid=1001 uid=1001 gid=1001 euid=1001 suid=1001 fsuid=1001 \ egid=1001 sgid=1001 fsgid=1001 tty=pts2 ses=4 comm="bpf" \ exe="/home/jolsa/auditd/audit-testsuite/tests/bpf/bpf" \ subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 key=(null) type=UNKNOWN[1334] msg=audit(1574867053.120:84664): prog-id=76 op=LOAD ---- time->Wed Nov 27 16:04:13 2019 type=UNKNOWN[1334] msg=audit(1574867053.120:84665): prog-id=76 op=UNLOAD ... Signed-off-by: Daniel Borkmann Co-developed-by: Jiri Olsa Signed-off-by: Jiri Olsa Acked-by: Paul Moore Link: https://lore.kernel.org/bpf/20191206214934.11319-1-jolsa@kernel.org --- include/uapi/linux/audit.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index 3ad935527177..a534d71e689a 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -116,6 +116,7 @@ #define AUDIT_FANOTIFY 1331 /* Fanotify access decision */ #define AUDIT_TIME_INJOFFSET 1332 /* Timekeeping offset injected */ #define AUDIT_TIME_ADJNTPVAL 1333 /* NTP value adjustment */ +#define AUDIT_BPF 1334 /* BPF subsystem */ #define AUDIT_AVC 1400 /* SE Linux avc denial or grant */ #define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */ -- cgit v1.2.3 From 98e8627efcada18ac043a77b9101b4b4c768090b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Fri, 13 Dec 2019 18:51:07 +0100 Subject: bpf: Move trampoline JIT image allocation to a function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refactor the image allocation in the BPF trampoline code into a separate function, so it can be shared with the BPF dispatcher in upcoming commits. Signed-off-by: Björn Töpel Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20191213175112.30208-2-bjorn.topel@gmail.com --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 35903f148be5..5d744828b399 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -475,6 +475,7 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key); int bpf_trampoline_link_prog(struct bpf_prog *prog); int bpf_trampoline_unlink_prog(struct bpf_prog *prog); void bpf_trampoline_put(struct bpf_trampoline *tr); +void *bpf_jit_alloc_exec_page(void); #else static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key) { -- cgit v1.2.3 From 75ccbef6369e94ecac696a152a998a978d41376b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Fri, 13 Dec 2019 18:51:08 +0100 Subject: bpf: Introduce BPF dispatcher MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The BPF dispatcher is a multi-way branch code generator, mainly targeted for XDP programs. When an XDP program is executed via the bpf_prog_run_xdp(), it is invoked via an indirect call. The indirect call has a substantial performance impact, when retpolines are enabled. The dispatcher transform indirect calls to direct calls, and therefore avoids the retpoline. The dispatcher is generated using the BPF JIT, and relies on text poking provided by bpf_arch_text_poke(). The dispatcher hijacks a trampoline function it via the __fentry__ nop of the trampoline. One dispatcher instance currently supports up to 64 dispatch points. A user creates a dispatcher with its corresponding trampoline with the DEFINE_BPF_DISPATCHER macro. Signed-off-by: Björn Töpel Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20191213175112.30208-3-bjorn.topel@gmail.com --- include/linux/bpf.h | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5d744828b399..53ae4a50abe4 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -470,12 +470,61 @@ struct bpf_trampoline { void *image; u64 selector; }; + +#define BPF_DISPATCHER_MAX 64 /* Fits in 2048B */ + +struct bpf_dispatcher_prog { + struct bpf_prog *prog; + refcount_t users; +}; + +struct bpf_dispatcher { + /* dispatcher mutex */ + struct mutex mutex; + void *func; + struct bpf_dispatcher_prog progs[BPF_DISPATCHER_MAX]; + int num_progs; + void *image; + u32 image_off; +}; + #ifdef CONFIG_BPF_JIT struct bpf_trampoline *bpf_trampoline_lookup(u64 key); int bpf_trampoline_link_prog(struct bpf_prog *prog); int bpf_trampoline_unlink_prog(struct bpf_prog *prog); void bpf_trampoline_put(struct bpf_trampoline *tr); void *bpf_jit_alloc_exec_page(void); +#define BPF_DISPATCHER_INIT(name) { \ + .mutex = __MUTEX_INITIALIZER(name.mutex), \ + .func = &name##func, \ + .progs = {}, \ + .num_progs = 0, \ + .image = NULL, \ + .image_off = 0 \ +} + +#define DEFINE_BPF_DISPATCHER(name) \ + noinline unsigned int name##func( \ + const void *ctx, \ + const struct bpf_insn *insnsi, \ + unsigned int (*bpf_func)(const void *, \ + const struct bpf_insn *)) \ + { \ + return bpf_func(ctx, insnsi); \ + } \ + EXPORT_SYMBOL(name##func); \ + struct bpf_dispatcher name = BPF_DISPATCHER_INIT(name); +#define DECLARE_BPF_DISPATCHER(name) \ + unsigned int name##func( \ + const void *ctx, \ + const struct bpf_insn *insnsi, \ + unsigned int (*bpf_func)(const void *, \ + const struct bpf_insn *)); \ + extern struct bpf_dispatcher name; +#define BPF_DISPATCHER_FUNC(name) name##func +#define BPF_DISPATCHER_PTR(name) (&name) +void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, + struct bpf_prog *to); #else static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key) { @@ -490,6 +539,13 @@ static inline int bpf_trampoline_unlink_prog(struct bpf_prog *prog) return -ENOTSUPP; } static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {} +#define DEFINE_BPF_DISPATCHER(name) +#define DECLARE_BPF_DISPATCHER(name) +#define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_nopfunc +#define BPF_DISPATCHER_PTR(name) NULL +static inline void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, + struct bpf_prog *from, + struct bpf_prog *to) {} #endif struct bpf_func_info_aux { -- cgit v1.2.3 From 7e6897f95935973c3253fd756135b5ea58043dc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Fri, 13 Dec 2019 18:51:09 +0100 Subject: bpf, xdp: Start using the BPF dispatcher for XDP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit adds a BPF dispatcher for XDP. The dispatcher is updated from the XDP control-path, dev_xdp_install(), and used when an XDP program is run via bpf_prog_run_xdp(). Signed-off-by: Björn Töpel Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20191213175112.30208-4-bjorn.topel@gmail.com --- include/linux/bpf.h | 15 +++++++++++++++ include/linux/filter.h | 40 ++++++++++++++++++++++++---------------- 2 files changed, 39 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 53ae4a50abe4..5970989b99d1 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -488,6 +488,14 @@ struct bpf_dispatcher { u32 image_off; }; +static __always_inline unsigned int bpf_dispatcher_nopfunc( + const void *ctx, + const struct bpf_insn *insnsi, + unsigned int (*bpf_func)(const void *, + const struct bpf_insn *)) +{ + return bpf_func(ctx, insnsi); +} #ifdef CONFIG_BPF_JIT struct bpf_trampoline *bpf_trampoline_lookup(u64 key); int bpf_trampoline_link_prog(struct bpf_prog *prog); @@ -997,6 +1005,8 @@ int btf_distill_func_proto(struct bpf_verifier_log *log, int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog); +struct bpf_prog *bpf_prog_by_id(u32 id); + #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { @@ -1128,6 +1138,11 @@ static inline int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, static inline void bpf_map_put(struct bpf_map *map) { } + +static inline struct bpf_prog *bpf_prog_by_id(u32 id) +{ + return ERR_PTR(-ENOTSUPP); +} #endif /* CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, diff --git a/include/linux/filter.h b/include/linux/filter.h index a141cb07e76a..37ac7025031d 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -559,23 +559,26 @@ struct sk_filter { DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key); -#define BPF_PROG_RUN(prog, ctx) ({ \ - u32 ret; \ - cant_sleep(); \ - if (static_branch_unlikely(&bpf_stats_enabled_key)) { \ - struct bpf_prog_stats *stats; \ - u64 start = sched_clock(); \ - ret = (*(prog)->bpf_func)(ctx, (prog)->insnsi); \ - stats = this_cpu_ptr(prog->aux->stats); \ - u64_stats_update_begin(&stats->syncp); \ - stats->cnt++; \ - stats->nsecs += sched_clock() - start; \ - u64_stats_update_end(&stats->syncp); \ - } else { \ - ret = (*(prog)->bpf_func)(ctx, (prog)->insnsi); \ - } \ +#define __BPF_PROG_RUN(prog, ctx, dfunc) ({ \ + u32 ret; \ + cant_sleep(); \ + if (static_branch_unlikely(&bpf_stats_enabled_key)) { \ + struct bpf_prog_stats *stats; \ + u64 start = sched_clock(); \ + ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \ + stats = this_cpu_ptr(prog->aux->stats); \ + u64_stats_update_begin(&stats->syncp); \ + stats->cnt++; \ + stats->nsecs += sched_clock() - start; \ + u64_stats_update_end(&stats->syncp); \ + } else { \ + ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \ + } \ ret; }) +#define BPF_PROG_RUN(prog, ctx) __BPF_PROG_RUN(prog, ctx, \ + bpf_dispatcher_nopfunc) + #define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN struct bpf_skb_data_end { @@ -699,6 +702,8 @@ static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog, return res; } +DECLARE_BPF_DISPATCHER(bpf_dispatcher_xdp) + static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, struct xdp_buff *xdp) { @@ -708,9 +713,12 @@ static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, * already takes rcu_read_lock() when fetching the program, so * it's not necessary here anymore. */ - return BPF_PROG_RUN(prog, xdp); + return __BPF_PROG_RUN(prog, xdp, + BPF_DISPATCHER_FUNC(bpf_dispatcher_xdp)); } +void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog); + static inline u32 bpf_prog_insn_size(const struct bpf_prog *prog) { return prog->len * sizeof(struct bpf_insn); -- cgit v1.2.3 From 116eb788f57c9c35c40b29cfaa2607020de99a84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Fri, 13 Dec 2019 18:51:12 +0100 Subject: bpf, x86: Align dispatcher branch targets to 16B MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit >From Intel 64 and IA-32 Architectures Optimization Reference Manual, 3.4.1.4 Code Alignment, Assembly/Compiler Coding Rule 11: All branch targets should be 16-byte aligned. This commits aligns branch targets according to the Intel manual. The nops used to align branch targets make the dispatcher larger, and therefore the number of supported dispatch points/programs are descreased from 64 to 48. Signed-off-by: Björn Töpel Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20191213175112.30208-7-bjorn.topel@gmail.com --- include/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5970989b99d1..d467983e61bb 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -471,7 +471,7 @@ struct bpf_trampoline { u64 selector; }; -#define BPF_DISPATCHER_MAX 64 /* Fits in 2048B */ +#define BPF_DISPATCHER_MAX 48 /* Fits in 2048B */ struct bpf_dispatcher_prog { struct bpf_prog *prog; -- cgit v1.2.3 From 166750bc1dd256b2184b22588fb9fe6d3fbb93ae Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 13 Dec 2019 17:47:08 -0800 Subject: libbpf: Support libbpf-provided extern variables Add support for extern variables, provided to BPF program by libbpf. Currently the following extern variables are supported: - LINUX_KERNEL_VERSION; version of a kernel in which BPF program is executing, follows KERNEL_VERSION() macro convention, can be 4- and 8-byte long; - CONFIG_xxx values; a set of values of actual kernel config. Tristate, boolean, strings, and integer values are supported. Set of possible values is determined by declared type of extern variable. Supported types of variables are: - Tristate values. Are represented as `enum libbpf_tristate`. Accepted values are **strictly** 'y', 'n', or 'm', which are represented as TRI_YES, TRI_NO, or TRI_MODULE, respectively. - Boolean values. Are represented as bool (_Bool) types. Accepted values are 'y' and 'n' only, turning into true/false values, respectively. - Single-character values. Can be used both as a substritute for bool/tristate, or as a small-range integer: - 'y'/'n'/'m' are represented as is, as characters 'y', 'n', or 'm'; - integers in a range [-128, 127] or [0, 255] (depending on signedness of char in target architecture) are recognized and represented with respective values of char type. - Strings. String values are declared as fixed-length char arrays. String of up to that length will be accepted and put in first N bytes of char array, with the rest of bytes zeroed out. If config string value is longer than space alloted, it will be truncated and warning message emitted. Char array is always zero terminated. String literals in config have to be enclosed in double quotes, just like C-style string literals. - Integers. 8-, 16-, 32-, and 64-bit integers are supported, both signed and unsigned variants. Libbpf enforces parsed config value to be in the supported range of corresponding integer type. Integers values in config can be: - decimal integers, with optional + and - signs; - hexadecimal integers, prefixed with 0x or 0X; - octal integers, starting with 0. Config file itself is searched in /boot/config-$(uname -r) location with fallback to /proc/config.gz, unless config path is specified explicitly through bpf_object_open_opts' kernel_config_path option. Both gzipped and plain text formats are supported. Libbpf adds explicit dependency on zlib because of this, but this shouldn't be a problem, given libelf already depends on zlib. All detected extern variables, are put into a separate .extern internal map. It, similarly to .rodata map, is marked as read-only from BPF program side, as well as is frozen on load. This allows BPF verifier to track extern values as constants and perform enhanced branch prediction and dead code elimination. This can be relied upon for doing kernel version/feature detection and using potentially unsupported field relocations or BPF helpers in a CO-RE-based BPF program, while still having a single version of BPF program running on old and new kernels. Selftests are validating this explicitly for unexisting BPF helper. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20191214014710.3449601-3-andriin@fb.com --- include/uapi/linux/btf.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h index c02dec97e1ce..1a2898c482ee 100644 --- a/include/uapi/linux/btf.h +++ b/include/uapi/linux/btf.h @@ -142,7 +142,8 @@ struct btf_param { enum { BTF_VAR_STATIC = 0, - BTF_VAR_GLOBAL_ALLOCATED, + BTF_VAR_GLOBAL_ALLOCATED = 1, + BTF_VAR_GLOBAL_EXTERN = 2, }; /* BTF_KIND_VAR is followed by a single "struct btf_var" to describe -- cgit v1.2.3 From e312b9e706ed6d94f6cc9088fcd9fbd81de4525c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Thu, 19 Dec 2019 07:10:02 +0100 Subject: xsk: Make xskmap flush_list common for all map instances MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The xskmap flush list is used to track entries that need to flushed from via the xdp_do_flush_map() function. This list used to be per-map, but there is really no reason for that. Instead make the flush list global for all xskmaps, which simplifies __xsk_map_flush() and xsk_map_alloc(). Signed-off-by: Björn Töpel Signed-off-by: Alexei Starovoitov Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/bpf/20191219061006.21980-5-bjorn.topel@gmail.com --- include/net/xdp_sock.h | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index e3780e4b74e1..48594740d67c 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -72,7 +72,6 @@ struct xdp_umem { struct xsk_map { struct bpf_map map; - struct list_head __percpu *flush_list; spinlock_t lock; /* Synchronize map updates */ struct xdp_sock *xsk_map[]; }; @@ -139,9 +138,8 @@ void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs, struct xdp_sock **map_entry); int xsk_map_inc(struct xsk_map *map); void xsk_map_put(struct xsk_map *map); -int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp, - struct xdp_sock *xs); -void __xsk_map_flush(struct bpf_map *map); +int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp); +void __xsk_map_flush(void); static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, u32 key) @@ -369,13 +367,12 @@ static inline u64 xsk_umem_adjust_offset(struct xdp_umem *umem, u64 handle, return 0; } -static inline int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp, - struct xdp_sock *xs) +static inline int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp) { return -EOPNOTSUPP; } -static inline void __xsk_map_flush(struct bpf_map *map) +static inline void __xsk_map_flush(void) { } -- cgit v1.2.3 From 96360004b8628541f5d05a845ea213267db0b1a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Thu, 19 Dec 2019 07:10:03 +0100 Subject: xdp: Make devmap flush_list common for all map instances MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The devmap flush list is used to track entries that need to flushed from via the xdp_do_flush_map() function. This list used to be per-map, but there is really no reason for that. Instead make the flush list global for all devmaps, which simplifies __dev_map_flush() and dev_map_init_map(). Signed-off-by: Björn Töpel Signed-off-by: Alexei Starovoitov Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/bpf/20191219061006.21980-6-bjorn.topel@gmail.com --- include/linux/bpf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index d467983e61bb..31191804ca09 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -959,7 +959,7 @@ struct sk_buff; struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key); struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key); -void __dev_map_flush(struct bpf_map *map); +void __dev_map_flush(void); int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, struct net_device *dev_rx); int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, @@ -1068,7 +1068,7 @@ static inline struct net_device *__dev_map_hash_lookup_elem(struct bpf_map *map return NULL; } -static inline void __dev_map_flush(struct bpf_map *map) +static inline void __dev_map_flush(void) { } -- cgit v1.2.3 From cdfafe98cabefeedbbc65af5c191c59745c03298 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Thu, 19 Dec 2019 07:10:04 +0100 Subject: xdp: Make cpumap flush_list common for all map instances MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cpumap flush list is used to track entries that need to flushed from via the xdp_do_flush_map() function. This list used to be per-map, but there is really no reason for that. Instead make the flush list global for all devmaps, which simplifies __cpu_map_flush() and cpu_map_alloc(). Signed-off-by: Björn Töpel Signed-off-by: Alexei Starovoitov Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/bpf/20191219061006.21980-7-bjorn.topel@gmail.com --- include/linux/bpf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 31191804ca09..8f3e00c84f39 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -966,7 +966,7 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, struct bpf_prog *xdp_prog); struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key); -void __cpu_map_flush(struct bpf_map *map); +void __cpu_map_flush(void); int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp, struct net_device *dev_rx); @@ -1097,7 +1097,7 @@ struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key) return NULL; } -static inline void __cpu_map_flush(struct bpf_map *map) +static inline void __cpu_map_flush(void) { } -- cgit v1.2.3 From 332f22a60e4c3492d4953cd6f7aaa4e8bd0bba97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Thu, 19 Dec 2019 07:10:05 +0100 Subject: xdp: Remove map_to_flush and map swap detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that all XDP maps that can be used with bpf_redirect_map() tracks entries to be flushed in a global fashion, there is not need to track that the map has changed and flush from xdp_do_generic_map() anymore. All entries will be flushed in xdp_do_flush_map(). This means that the map_to_flush can be removed, and the corresponding checks. Moving the flush logic to one place, xdp_do_flush_map(), give a bulking behavior and performance boost. Signed-off-by: Björn Töpel Signed-off-by: Alexei Starovoitov Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/bpf/20191219061006.21980-8-bjorn.topel@gmail.com --- include/linux/filter.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 37ac7025031d..69d6706fc889 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -592,7 +592,6 @@ struct bpf_redirect_info { u32 tgt_index; void *tgt_value; struct bpf_map *map; - struct bpf_map *map_to_flush; u32 kern_flags; }; -- cgit v1.2.3 From 7dd68b3279f1792103d12e69933db3128c6d416e Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Wed, 18 Dec 2019 23:44:35 -0800 Subject: bpf: Support replacing cgroup-bpf program in MULTI mode The common use-case in production is to have multiple cgroup-bpf programs per attach type that cover multiple use-cases. Such programs are attached with BPF_F_ALLOW_MULTI and can be maintained by different people. Order of programs usually matters, for example imagine two egress programs: the first one drops packets and the second one counts packets. If they're swapped the result of counting program will be different. It brings operational challenges with updating cgroup-bpf program(s) attached with BPF_F_ALLOW_MULTI since there is no way to replace a program: * One way to update is to detach all programs first and then attach the new version(s) again in the right order. This introduces an interruption in the work a program is doing and may not be acceptable (e.g. if it's egress firewall); * Another way is attach the new version of a program first and only then detach the old version. This introduces the time interval when two versions of same program are working, what may not be acceptable if a program is not idempotent. It also imposes additional burden on program developers to make sure that two versions of their program can co-exist. Solve the problem by introducing a "replace" mode in BPF_PROG_ATTACH command for cgroup-bpf programs being attached with BPF_F_ALLOW_MULTI flag. This mode is enabled by newly introduced BPF_F_REPLACE attach flag and bpf_attr.replace_bpf_fd attribute to pass fd of the old program to replace That way user can replace any program among those attached with BPF_F_ALLOW_MULTI flag without the problems described above. Details of the new API: * If BPF_F_REPLACE is set but replace_bpf_fd doesn't have valid descriptor of BPF program, BPF_PROG_ATTACH will return corresponding error (EINVAL or EBADF). * If replace_bpf_fd has valid descriptor of BPF program but such a program is not attached to specified cgroup, BPF_PROG_ATTACH will return ENOENT. BPF_F_REPLACE is introduced to make the user intent clear, since replace_bpf_fd alone can't be used for this (its default value, 0, is a valid fd). BPF_F_REPLACE also makes it possible to extend the API in the future (e.g. add BPF_F_BEFORE and BPF_F_AFTER if needed). Signed-off-by: Andrey Ignatov Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Acked-by: Andrii Narkyiko Link: https://lore.kernel.org/bpf/30cd850044a0057bdfcaaf154b7d2f39850ba813.1576741281.git.rdna@fb.com --- include/linux/bpf-cgroup.h | 4 +++- include/uapi/linux/bpf.h | 10 ++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 169fd25f6bc2..18f6a6da7c3c 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -85,6 +85,7 @@ int cgroup_bpf_inherit(struct cgroup *cgrp); void cgroup_bpf_offline(struct cgroup *cgrp); int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, + struct bpf_prog *replace_prog, enum bpf_attach_type type, u32 flags); int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, enum bpf_attach_type type); @@ -93,7 +94,8 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, /* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */ int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, - enum bpf_attach_type type, u32 flags); + struct bpf_prog *replace_prog, enum bpf_attach_type type, + u32 flags); int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, enum bpf_attach_type type, u32 flags); int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index dbbcf0b02970..7df436da542d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -231,6 +231,11 @@ enum bpf_attach_type { * When children program makes decision (like picking TCP CA or sock bind) * parent program has a chance to override it. * + * With BPF_F_ALLOW_MULTI a new program is added to the end of the list of + * programs for a cgroup. Though it's possible to replace an old program at + * any position by also specifying BPF_F_REPLACE flag and position itself in + * replace_bpf_fd attribute. Old program at this position will be released. + * * A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups. * A cgroup with NONE doesn't allow any programs in sub-cgroups. * Ex1: @@ -249,6 +254,7 @@ enum bpf_attach_type { */ #define BPF_F_ALLOW_OVERRIDE (1U << 0) #define BPF_F_ALLOW_MULTI (1U << 1) +#define BPF_F_REPLACE (1U << 2) /* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the * verifier will perform strict alignment checking as if the kernel @@ -442,6 +448,10 @@ union bpf_attr { __u32 attach_bpf_fd; /* eBPF program to attach */ __u32 attach_type; __u32 attach_flags; + __u32 replace_bpf_fd; /* previously attached eBPF + * program to replace if + * BPF_F_REPLACE is used + */ }; struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */ -- cgit v1.2.3 From 03896ef1f0cb23d2742ddf486c531c700a2da7d6 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Thu, 19 Dec 2019 13:39:27 +0100 Subject: xsk: Change names of validation functions Change the names of the validation functions to better reflect what they are doing. The uppermost ones are reading entries from the rings and only the bottom ones validate entries. So xskq_cons_read_ is a better prefix name. Also change the xskq_cons_read_ functions to return a bool as the the descriptor or address is already returned by reference in the parameters. Everyone is using the return value as a bool anyway. Signed-off-by: Magnus Karlsson Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/1576759171-28550-9-git-send-email-magnus.karlsson@intel.com --- include/net/xdp_sock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 48594740d67c..63f005830866 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -118,7 +118,7 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp); bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs); /* Used from netdev driver */ bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt); -u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr); +bool xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr); void xsk_umem_discard_addr(struct xdp_umem *umem); void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries); bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc); @@ -197,7 +197,7 @@ static inline bool xsk_umem_has_addrs_rq(struct xdp_umem *umem, u32 cnt) return xsk_umem_has_addrs(umem, cnt - rq->length); } -static inline u64 *xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr) +static inline bool xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr) { struct xdp_umem_fq_reuse *rq = umem->fq_reuse; -- cgit v1.2.3 From f8509aa078de0842ec1817e8026e58620cd05d3b Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Thu, 19 Dec 2019 13:39:28 +0100 Subject: xsk: ixgbe: i40e: ice: mlx5: Xsk_umem_discard_addr to xsk_umem_release_addr Change the name of xsk_umem_discard_addr to xsk_umem_release_addr to better reflect the new naming of the AF_XDP queue manipulation functions. As this functions is used by drivers implementing support for AF_XDP zero-copy, it requires a name change to these drivers. The function xsk_umem_release_addr_rq has also changed name in the same fashion. Signed-off-by: Magnus Karlsson Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/1576759171-28550-10-git-send-email-magnus.karlsson@intel.com --- include/net/xdp_sock.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 63f005830866..e86ec48ef627 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -119,7 +119,7 @@ bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs); /* Used from netdev driver */ bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt); bool xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr); -void xsk_umem_discard_addr(struct xdp_umem *umem); +void xsk_umem_release_addr(struct xdp_umem *umem); void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries); bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc); void xsk_umem_consume_tx_done(struct xdp_umem *umem); @@ -208,12 +208,12 @@ static inline bool xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr) return addr; } -static inline void xsk_umem_discard_addr_rq(struct xdp_umem *umem) +static inline void xsk_umem_release_addr_rq(struct xdp_umem *umem) { struct xdp_umem_fq_reuse *rq = umem->fq_reuse; if (!rq->length) - xsk_umem_discard_addr(umem); + xsk_umem_release_addr(umem); else rq->length--; } @@ -258,7 +258,7 @@ static inline u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr) return NULL; } -static inline void xsk_umem_discard_addr(struct xdp_umem *umem) +static inline void xsk_umem_release_addr(struct xdp_umem *umem) { } @@ -332,7 +332,7 @@ static inline u64 *xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr) return NULL; } -static inline void xsk_umem_discard_addr_rq(struct xdp_umem *umem) +static inline void xsk_umem_release_addr_rq(struct xdp_umem *umem) { } -- cgit v1.2.3