diff options
Diffstat (limited to 'kernel/bpf')
-rw-r--r-- | kernel/bpf/Makefile | 1 | ||||
-rw-r--r-- | kernel/bpf/arraymap.c | 30 | ||||
-rw-r--r-- | kernel/bpf/bpf_iter.c | 14 | ||||
-rw-r--r-- | kernel/bpf/bpf_local_storage.c | 20 | ||||
-rw-r--r-- | kernel/bpf/bpf_lsm.c | 140 | ||||
-rw-r--r-- | kernel/bpf/bpf_struct_ops.c | 19 | ||||
-rw-r--r-- | kernel/bpf/bpf_task_storage.c | 315 | ||||
-rw-r--r-- | kernel/bpf/btf.c | 481 | ||||
-rw-r--r-- | kernel/bpf/core.c | 23 | ||||
-rw-r--r-- | kernel/bpf/cpumap.c | 37 | ||||
-rw-r--r-- | kernel/bpf/devmap.c | 25 | ||||
-rw-r--r-- | kernel/bpf/hashtab.c | 190 | ||||
-rw-r--r-- | kernel/bpf/helpers.c | 17 | ||||
-rw-r--r-- | kernel/bpf/local_storage.c | 44 | ||||
-rw-r--r-- | kernel/bpf/lpm_trie.c | 19 | ||||
-rw-r--r-- | kernel/bpf/queue_stack_maps.c | 16 | ||||
-rw-r--r-- | kernel/bpf/reuseport_array.c | 12 | ||||
-rw-r--r-- | kernel/bpf/ringbuf.c | 35 | ||||
-rw-r--r-- | kernel/bpf/stackmap.c | 18 | ||||
-rw-r--r-- | kernel/bpf/syscall.c | 337 | ||||
-rw-r--r-- | kernel/bpf/sysfs_btf.c | 2 | ||||
-rw-r--r-- | kernel/bpf/task_iter.c | 87 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 451 |
23 files changed, 1554 insertions, 779 deletions
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index c1b9f71ee6aa..d1249340fd6b 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_i obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o +obj-${CONFIG_BPF_LSM} += bpf_task_storage.o obj-$(CONFIG_BPF_SYSCALL) += disasm.o obj-$(CONFIG_BPF_JIT) += trampoline.o obj-$(CONFIG_BPF_SYSCALL) += btf.o diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index c6c81eceb68f..1f8453343bf2 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -34,8 +34,8 @@ static int bpf_array_alloc_percpu(struct bpf_array *array) int i; for (i = 0; i < array->map.max_entries; i++) { - ptr = __alloc_percpu_gfp(array->elem_size, 8, - GFP_USER | __GFP_NOWARN); + ptr = bpf_map_alloc_percpu(&array->map, array->elem_size, 8, + GFP_USER | __GFP_NOWARN); if (!ptr) { bpf_array_free_percpu(array); return -ENOMEM; @@ -81,11 +81,10 @@ int array_map_alloc_check(union bpf_attr *attr) static struct bpf_map *array_map_alloc(union bpf_attr *attr) { bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; - int ret, numa_node = bpf_map_attr_numa_node(attr); + int numa_node = bpf_map_attr_numa_node(attr); u32 elem_size, index_mask, max_entries; bool bypass_spec_v1 = bpf_bypass_spec_v1(); - u64 cost, array_size, mask64; - struct bpf_map_memory mem; + u64 array_size, mask64; struct bpf_array *array; elem_size = round_up(attr->value_size, 8); @@ -126,44 +125,29 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) } } - /* make sure there is no u32 overflow later in round_up() */ - cost = array_size; - if (percpu) - cost += (u64)attr->max_entries * elem_size * num_possible_cpus(); - - ret = bpf_map_charge_init(&mem, cost); - if (ret < 0) - return ERR_PTR(ret); - /* allocate all map elements and zero-initialize them */ if (attr->map_flags & BPF_F_MMAPABLE) { void *data; /* kmalloc'ed memory can't be mmap'ed, use explicit vmalloc */ data = bpf_map_area_mmapable_alloc(array_size, numa_node); - if (!data) { - bpf_map_charge_finish(&mem); + if (!data) return ERR_PTR(-ENOMEM); - } array = data + PAGE_ALIGN(sizeof(struct bpf_array)) - offsetof(struct bpf_array, value); } else { array = bpf_map_area_alloc(array_size, numa_node); } - if (!array) { - bpf_map_charge_finish(&mem); + if (!array) return ERR_PTR(-ENOMEM); - } array->index_mask = index_mask; array->map.bypass_spec_v1 = bypass_spec_v1; /* copy mandatory map attributes */ bpf_map_init_from_attr(&array->map, attr); - bpf_map_charge_move(&array->map.memory, &mem); array->elem_size = elem_size; if (percpu && bpf_array_alloc_percpu(array)) { - bpf_map_charge_finish(&array->map.memory); bpf_map_area_free(array); return ERR_PTR(-ENOMEM); } @@ -1018,7 +1002,7 @@ static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr) struct bpf_array_aux *aux; struct bpf_map *map; - aux = kzalloc(sizeof(*aux), GFP_KERNEL); + aux = kzalloc(sizeof(*aux), GFP_KERNEL_ACCOUNT); if (!aux) return ERR_PTR(-ENOMEM); diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index 8f10e30ea0b0..5454161407f1 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -67,6 +67,15 @@ static void bpf_iter_done_stop(struct seq_file *seq) iter_priv->done_stop = true; } +static bool bpf_iter_support_resched(struct seq_file *seq) +{ + struct bpf_iter_priv_data *iter_priv; + + iter_priv = container_of(seq->private, struct bpf_iter_priv_data, + target_private); + return iter_priv->tinfo->reg_info->feature & BPF_ITER_RESCHED; +} + /* maximum visited objects before bailing out */ #define MAX_ITER_OBJECTS 1000000 @@ -83,6 +92,7 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size, struct seq_file *seq = file->private_data; size_t n, offs, copied = 0; int err = 0, num_objs = 0; + bool can_resched; void *p; mutex_lock(&seq->lock); @@ -135,6 +145,7 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size, goto done; } + can_resched = bpf_iter_support_resched(seq); while (1) { loff_t pos = seq->index; @@ -180,6 +191,9 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size, } break; } + + if (can_resched) + cond_resched(); } stop: offs = seq->count; diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 5d3a7af9ba9b..dd5aedee99e7 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -67,7 +67,8 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, if (charge_mem && mem_charge(smap, owner, smap->elem_size)) return NULL; - selem = kzalloc(smap->elem_size, GFP_ATOMIC | __GFP_NOWARN); + selem = bpf_map_kzalloc(&smap->map, smap->elem_size, + GFP_ATOMIC | __GFP_NOWARN); if (selem) { if (value) memcpy(SDATA(selem)->data, value, smap->map.value_size); @@ -264,7 +265,8 @@ int bpf_local_storage_alloc(void *owner, if (err) return err; - storage = kzalloc(sizeof(*storage), GFP_ATOMIC | __GFP_NOWARN); + storage = bpf_map_kzalloc(&smap->map, sizeof(*storage), + GFP_ATOMIC | __GFP_NOWARN); if (!storage) { err = -ENOMEM; goto uncharge; @@ -543,10 +545,8 @@ struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr) struct bpf_local_storage_map *smap; unsigned int i; u32 nbuckets; - u64 cost; - int ret; - smap = kzalloc(sizeof(*smap), GFP_USER | __GFP_NOWARN); + smap = kzalloc(sizeof(*smap), GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT); if (!smap) return ERR_PTR(-ENOMEM); bpf_map_init_from_attr(&smap->map, attr); @@ -555,18 +555,10 @@ struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr) /* Use at least 2 buckets, select_bucket() is undefined behavior with 1 bucket */ nbuckets = max_t(u32, 2, nbuckets); smap->bucket_log = ilog2(nbuckets); - cost = sizeof(*smap->buckets) * nbuckets + sizeof(*smap); - - ret = bpf_map_charge_init(&smap->map.memory, cost); - if (ret < 0) { - kfree(smap); - return ERR_PTR(ret); - } smap->buckets = kvcalloc(sizeof(*smap->buckets), nbuckets, - GFP_USER | __GFP_NOWARN); + GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT); if (!smap->buckets) { - bpf_map_charge_finish(&smap->map.memory); kfree(smap); return ERR_PTR(-ENOMEM); } diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index 56cc5a915f67..70e5e0b6d69d 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c @@ -7,6 +7,7 @@ #include <linux/filter.h> #include <linux/bpf.h> #include <linux/btf.h> +#include <linux/binfmts.h> #include <linux/lsm_hooks.h> #include <linux/bpf_lsm.h> #include <linux/kallsyms.h> @@ -14,6 +15,7 @@ #include <net/bpf_sk_storage.h> #include <linux/bpf_local_storage.h> #include <linux/btf_ids.h> +#include <linux/ima.h> /* For every LSM hook that allows attachment of BPF programs, declare a nop * function where a BPF program can be attached. @@ -51,6 +53,52 @@ int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog, return 0; } +/* Mask for all the currently supported BPRM option flags */ +#define BPF_F_BRPM_OPTS_MASK BPF_F_BPRM_SECUREEXEC + +BPF_CALL_2(bpf_bprm_opts_set, struct linux_binprm *, bprm, u64, flags) +{ + if (flags & ~BPF_F_BRPM_OPTS_MASK) + return -EINVAL; + + bprm->secureexec = (flags & BPF_F_BPRM_SECUREEXEC); + return 0; +} + +BTF_ID_LIST_SINGLE(bpf_bprm_opts_set_btf_ids, struct, linux_binprm) + +const static struct bpf_func_proto bpf_bprm_opts_set_proto = { + .func = bpf_bprm_opts_set, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID, + .arg1_btf_id = &bpf_bprm_opts_set_btf_ids[0], + .arg2_type = ARG_ANYTHING, +}; + +BPF_CALL_3(bpf_ima_inode_hash, struct inode *, inode, void *, dst, u32, size) +{ + return ima_inode_hash(inode, dst, size); +} + +static bool bpf_ima_inode_hash_allowed(const struct bpf_prog *prog) +{ + return bpf_lsm_is_sleepable_hook(prog->aux->attach_btf_id); +} + +BTF_ID_LIST_SINGLE(bpf_ima_inode_hash_btf_ids, struct, inode) + +const static struct bpf_func_proto bpf_ima_inode_hash_proto = { + .func = bpf_ima_inode_hash, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID, + .arg1_btf_id = &bpf_ima_inode_hash_btf_ids[0], + .arg2_type = ARG_PTR_TO_UNINIT_MEM, + .arg3_type = ARG_CONST_SIZE, + .allowed = bpf_ima_inode_hash_allowed, +}; + static const struct bpf_func_proto * bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -63,11 +111,103 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sk_storage_get_proto; case BPF_FUNC_sk_storage_delete: return &bpf_sk_storage_delete_proto; + case BPF_FUNC_spin_lock: + return &bpf_spin_lock_proto; + case BPF_FUNC_spin_unlock: + return &bpf_spin_unlock_proto; + case BPF_FUNC_task_storage_get: + return &bpf_task_storage_get_proto; + case BPF_FUNC_task_storage_delete: + return &bpf_task_storage_delete_proto; + case BPF_FUNC_bprm_opts_set: + return &bpf_bprm_opts_set_proto; + case BPF_FUNC_ima_inode_hash: + return prog->aux->sleepable ? &bpf_ima_inode_hash_proto : NULL; default: return tracing_prog_func_proto(func_id, prog); } } +/* The set of hooks which are called without pagefaults disabled and are allowed + * to "sleep" and thus can be used for sleeable BPF programs. + */ +BTF_SET_START(sleepable_lsm_hooks) +BTF_ID(func, bpf_lsm_bpf) +BTF_ID(func, bpf_lsm_bpf_map) +BTF_ID(func, bpf_lsm_bpf_map_alloc_security) +BTF_ID(func, bpf_lsm_bpf_map_free_security) +BTF_ID(func, bpf_lsm_bpf_prog) +BTF_ID(func, bpf_lsm_bprm_check_security) +BTF_ID(func, bpf_lsm_bprm_committed_creds) +BTF_ID(func, bpf_lsm_bprm_committing_creds) +BTF_ID(func, bpf_lsm_bprm_creds_for_exec) +BTF_ID(func, bpf_lsm_bprm_creds_from_file) +BTF_ID(func, bpf_lsm_capget) +BTF_ID(func, bpf_lsm_capset) +BTF_ID(func, bpf_lsm_cred_prepare) +BTF_ID(func, bpf_lsm_file_ioctl) +BTF_ID(func, bpf_lsm_file_lock) +BTF_ID(func, bpf_lsm_file_open) +BTF_ID(func, bpf_lsm_file_receive) +BTF_ID(func, bpf_lsm_inet_conn_established) +BTF_ID(func, bpf_lsm_inode_create) +BTF_ID(func, bpf_lsm_inode_free_security) +BTF_ID(func, bpf_lsm_inode_getattr) +BTF_ID(func, bpf_lsm_inode_getxattr) +BTF_ID(func, bpf_lsm_inode_mknod) +BTF_ID(func, bpf_lsm_inode_need_killpriv) +BTF_ID(func, bpf_lsm_inode_post_setxattr) +BTF_ID(func, bpf_lsm_inode_readlink) +BTF_ID(func, bpf_lsm_inode_rename) +BTF_ID(func, bpf_lsm_inode_rmdir) +BTF_ID(func, bpf_lsm_inode_setattr) +BTF_ID(func, bpf_lsm_inode_setxattr) +BTF_ID(func, bpf_lsm_inode_symlink) +BTF_ID(func, bpf_lsm_inode_unlink) +BTF_ID(func, bpf_lsm_kernel_module_request) +BTF_ID(func, bpf_lsm_kernfs_init_security) +BTF_ID(func, bpf_lsm_key_free) +BTF_ID(func, bpf_lsm_mmap_file) +BTF_ID(func, bpf_lsm_netlink_send) +BTF_ID(func, bpf_lsm_path_notify) +BTF_ID(func, bpf_lsm_release_secctx) +BTF_ID(func, bpf_lsm_sb_alloc_security) +BTF_ID(func, bpf_lsm_sb_eat_lsm_opts) +BTF_ID(func, bpf_lsm_sb_kern_mount) +BTF_ID(func, bpf_lsm_sb_mount) +BTF_ID(func, bpf_lsm_sb_remount) +BTF_ID(func, bpf_lsm_sb_set_mnt_opts) +BTF_ID(func, bpf_lsm_sb_show_options) +BTF_ID(func, bpf_lsm_sb_statfs) +BTF_ID(func, bpf_lsm_sb_umount) +BTF_ID(func, bpf_lsm_settime) +BTF_ID(func, bpf_lsm_socket_accept) +BTF_ID(func, bpf_lsm_socket_bind) +BTF_ID(func, bpf_lsm_socket_connect) +BTF_ID(func, bpf_lsm_socket_create) +BTF_ID(func, bpf_lsm_socket_getpeername) +BTF_ID(func, bpf_lsm_socket_getpeersec_dgram) +BTF_ID(func, bpf_lsm_socket_getsockname) +BTF_ID(func, bpf_lsm_socket_getsockopt) +BTF_ID(func, bpf_lsm_socket_listen) +BTF_ID(func, bpf_lsm_socket_post_create) +BTF_ID(func, bpf_lsm_socket_recvmsg) +BTF_ID(func, bpf_lsm_socket_sendmsg) +BTF_ID(func, bpf_lsm_socket_shutdown) +BTF_ID(func, bpf_lsm_socket_socketpair) +BTF_ID(func, bpf_lsm_syslog) +BTF_ID(func, bpf_lsm_task_alloc) +BTF_ID(func, bpf_lsm_task_getsecid) +BTF_ID(func, bpf_lsm_task_prctl) +BTF_ID(func, bpf_lsm_task_setscheduler) +BTF_ID(func, bpf_lsm_task_to_inode) +BTF_SET_END(sleepable_lsm_hooks) + +bool bpf_lsm_is_sleepable_hook(u32 btf_id) +{ + return btf_id_set_contains(&sleepable_lsm_hooks, btf_id); +} + const struct bpf_prog_ops lsm_prog_ops = { }; diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 4c3b543bb33b..1a666a975416 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -548,12 +548,10 @@ static int bpf_struct_ops_map_alloc_check(union bpf_attr *attr) static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) { const struct bpf_struct_ops *st_ops; - size_t map_total_size, st_map_size; + size_t st_map_size; struct bpf_struct_ops_map *st_map; const struct btf_type *t, *vt; - struct bpf_map_memory mem; struct bpf_map *map; - int err; if (!bpf_capable()) return ERR_PTR(-EPERM); @@ -573,20 +571,11 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) * struct bpf_struct_ops_tcp_congestions_ops */ (vt->size - sizeof(struct bpf_struct_ops_value)); - map_total_size = st_map_size + - /* uvalue */ - sizeof(vt->size) + - /* struct bpf_progs **progs */ - btf_type_vlen(t) * sizeof(struct bpf_prog *); - err = bpf_map_charge_init(&mem, map_total_size); - if (err < 0) - return ERR_PTR(err); st_map = bpf_map_area_alloc(st_map_size, NUMA_NO_NODE); - if (!st_map) { - bpf_map_charge_finish(&mem); + if (!st_map) return ERR_PTR(-ENOMEM); - } + st_map->st_ops = st_ops; map = &st_map->map; @@ -597,14 +586,12 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) st_map->image = bpf_jit_alloc_exec(PAGE_SIZE); if (!st_map->uvalue || !st_map->progs || !st_map->image) { bpf_struct_ops_map_free(map); - bpf_map_charge_finish(&mem); return ERR_PTR(-ENOMEM); } mutex_init(&st_map->lock); set_vm_flush_reset_perms(st_map->image); bpf_map_init_from_attr(map, attr); - bpf_map_charge_move(&map->memory, &mem); return map; } diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c new file mode 100644 index 000000000000..4ef1959a78f2 --- /dev/null +++ b/kernel/bpf/bpf_task_storage.c @@ -0,0 +1,315 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2020 Facebook + * Copyright 2020 Google LLC. + */ + +#include <linux/pid.h> +#include <linux/sched.h> +#include <linux/rculist.h> +#include <linux/list.h> +#include <linux/hash.h> +#include <linux/types.h> +#include <linux/spinlock.h> +#include <linux/bpf.h> +#include <linux/bpf_local_storage.h> +#include <linux/filter.h> +#include <uapi/linux/btf.h> +#include <linux/bpf_lsm.h> +#include <linux/btf_ids.h> +#include <linux/fdtable.h> + +DEFINE_BPF_STORAGE_CACHE(task_cache); + +static struct bpf_local_storage __rcu **task_storage_ptr(void *owner) +{ + struct task_struct *task = owner; + struct bpf_storage_blob *bsb; + + bsb = bpf_task(task); + if (!bsb) + return NULL; + return &bsb->storage; +} + +static struct bpf_local_storage_data * +task_storage_lookup(struct task_struct *task, struct bpf_map *map, + bool cacheit_lockit) +{ + struct bpf_local_storage *task_storage; + struct bpf_local_storage_map *smap; + struct bpf_storage_blob *bsb; + + bsb = bpf_task(task); + if (!bsb) + return NULL; + + task_storage = rcu_dereference(bsb->storage); + if (!task_storage) + return NULL; + + smap = (struct bpf_local_storage_map *)map; + return bpf_local_storage_lookup(task_storage, smap, cacheit_lockit); +} + +void bpf_task_storage_free(struct task_struct *task) +{ + struct bpf_local_storage_elem *selem; + struct bpf_local_storage *local_storage; + bool free_task_storage = false; + struct bpf_storage_blob *bsb; + struct hlist_node *n; + + bsb = bpf_task(task); + if (!bsb) + return; + + rcu_read_lock(); + + local_storage = rcu_dereference(bsb->storage); + if (!local_storage) { + rcu_read_unlock(); + return; + } + + /* Neither the bpf_prog nor the bpf-map's syscall + * could be modifying the local_storage->list now. + * Thus, no elem can be added-to or deleted-from the + * local_storage->list by the bpf_prog or by the bpf-map's syscall. + * + * It is racing with bpf_local_storage_map_free() alone + * when unlinking elem from the local_storage->list and + * the map's bucket->list. + */ + raw_spin_lock_bh(&local_storage->lock); + hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) { + /* Always unlink from map before unlinking from + * local_storage. + */ + bpf_selem_unlink_map(selem); + free_task_storage = bpf_selem_unlink_storage_nolock( + local_storage, selem, false); + } + raw_spin_unlock_bh(&local_storage->lock); + rcu_read_unlock(); + + /* free_task_storage should always be true as long as + * local_storage->list was non-empty. + */ + if (free_task_storage) + kfree_rcu(local_storage, rcu); +} + +static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key) +{ + struct bpf_local_storage_data *sdata; + struct task_struct *task; + unsigned int f_flags; + struct pid *pid; + int fd, err; + + fd = *(int *)key; + pid = pidfd_get_pid(fd, &f_flags); + if (IS_ERR(pid)) + return ERR_CAST(pid); + + /* We should be in an RCU read side critical section, it should be safe + * to call pid_task. + */ + WARN_ON_ONCE(!rcu_read_lock_held()); + task = pid_task(pid, PIDTYPE_PID); + if (!task) { + err = -ENOENT; + goto out; + } + + sdata = task_storage_lookup(task, map, true); + put_pid(pid); + return sdata ? sdata->data : NULL; +out: + put_pid(pid); + return ERR_PTR(err); +} + +static int bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags) +{ + struct bpf_local_storage_data *sdata; + struct task_struct *task; + unsigned int f_flags; + struct pid *pid; + int fd, err; + + fd = *(int *)key; + pid = pidfd_get_pid(fd, &f_flags); + if (IS_ERR(pid)) + return PTR_ERR(pid); + + /* We should be in an RCU read side critical section, it should be safe + * to call pid_task. + */ + WARN_ON_ONCE(!rcu_read_lock_held()); + task = pid_task(pid, PIDTYPE_PID); + if (!task || !task_storage_ptr(task)) { + err = -ENOENT; + goto out; + } + + sdata = bpf_local_storage_update( + task, (struct bpf_local_storage_map *)map, value, map_flags); + + err = PTR_ERR_OR_ZERO(sdata); +out: + put_pid(pid); + return err; +} + +static int task_storage_delete(struct task_struct *task, struct bpf_map *map) +{ + struct bpf_local_storage_data *sdata; + + sdata = task_storage_lookup(task, map, false); + if (!sdata) + return -ENOENT; + + bpf_selem_unlink(SELEM(sdata)); + + return 0; +} + +static int bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key) +{ + struct task_struct *task; + unsigned int f_flags; + struct pid *pid; + int fd, err; + + fd = *(int *)key; + pid = pidfd_get_pid(fd, &f_flags); + if (IS_ERR(pid)) + return PTR_ERR(pid); + + /* We should be in an RCU read side critical section, it should be safe + * to call pid_task. + */ + WARN_ON_ONCE(!rcu_read_lock_held()); + task = pid_task(pid, PIDTYPE_PID); + if (!task) { + err = -ENOENT; + goto out; + } + + err = task_storage_delete(task, map); +out: + put_pid(pid); + return err; +} + +BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *, + task, void *, value, u64, flags) +{ + struct bpf_local_storage_data *sdata; + + if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE)) + return (unsigned long)NULL; + + /* explicitly check that the task_storage_ptr is not + * NULL as task_storage_lookup returns NULL in this case and + * bpf_local_storage_update expects the owner to have a + * valid storage pointer. + */ + if (!task_storage_ptr(task)) + return (unsigned long)NULL; + + sdata = task_storage_lookup(task, map, true); + if (sdata) + return (unsigned long)sdata->data; + + /* This helper must only be called from places where the lifetime of the task + * is guaranteed. Either by being refcounted or by being protected + * by an RCU read-side critical section. + */ + if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) { + sdata = bpf_local_storage_update( + task, (struct bpf_local_storage_map *)map, value, + BPF_NOEXIST); + return IS_ERR(sdata) ? (unsigned long)NULL : + (unsigned long)sdata->data; + } + + return (unsigned long)NULL; +} + +BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *, + task) +{ + /* This helper must only be called from places where the lifetime of the task + * is guaranteed. Either by being refcounted or by being protected + * by an RCU read-side critical section. + */ + return task_storage_delete(task, map); +} + +static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key) +{ + return -ENOTSUPP; +} + +static struct bpf_map *task_storage_map_alloc(union bpf_attr *attr) +{ + struct bpf_local_storage_map *smap; + + smap = bpf_local_storage_map_alloc(attr); + if (IS_ERR(smap)) + return ERR_CAST(smap); + + smap->cache_idx = bpf_local_storage_cache_idx_get(&task_cache); + return &smap->map; +} + +static void task_storage_map_free(struct bpf_map *map) +{ + struct bpf_local_storage_map *smap; + + smap = (struct bpf_local_storage_map *)map; + bpf_local_storage_cache_idx_free(&task_cache, smap->cache_idx); + bpf_local_storage_map_free(smap); +} + +static int task_storage_map_btf_id; +const struct bpf_map_ops task_storage_map_ops = { + .map_meta_equal = bpf_map_meta_equal, + .map_alloc_check = bpf_local_storage_map_alloc_check, + .map_alloc = task_storage_map_alloc, + .map_free = task_storage_map_free, + .map_get_next_key = notsupp_get_next_key, + .map_lookup_elem = bpf_pid_task_storage_lookup_elem, + .map_update_elem = bpf_pid_task_storage_update_elem, + .map_delete_elem = bpf_pid_task_storage_delete_elem, + .map_check_btf = bpf_local_storage_map_check_btf, + .map_btf_name = "bpf_local_storage_map", + .map_btf_id = &task_storage_map_btf_id, + .map_owner_storage_ptr = task_storage_ptr, +}; + +BTF_ID_LIST_SINGLE(bpf_task_storage_btf_ids, struct, task_struct) + +const struct bpf_func_proto bpf_task_storage_get_proto = { + .func = bpf_task_storage_get, + .gpl_only = false, + .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_BTF_ID, + .arg2_btf_id = &bpf_task_storage_btf_ids[0], + .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, + .arg4_type = ARG_ANYTHING, +}; + +const struct bpf_func_proto bpf_task_storage_delete_proto = { + .func = bpf_task_storage_delete, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_BTF_ID, + .arg2_btf_id = &bpf_task_storage_btf_ids[0], +}; diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index ed7d02e8bc93..8d6bdb4f4d61 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -22,7 +22,8 @@ #include <linux/skmsg.h> #include <linux/perf_event.h> #include <linux/bsearch.h> -#include <linux/btf_ids.h> +#include <linux/kobject.h> +#include <linux/sysfs.h> #include <net/sock.h> /* BTF (BPF Type Format) is the meta data format which describes @@ -204,12 +205,19 @@ struct btf { const char *strings; void *nohdr_data; struct btf_header hdr; - u32 nr_types; + u32 nr_types; /* includes VOID for base BTF */ u32 types_size; u32 data_size; refcount_t refcnt; u32 id; struct rcu_head rcu; + + /* split BTF support */ + struct btf *base_btf; + u32 start_id; /* first type ID in this BTF (0 for base BTF) */ + u32 start_str_off; /* first string offset (0 for base BTF) */ + char name[MODULE_NAME_LEN]; + bool kernel_btf; }; enum verifier_phase { @@ -450,14 +458,27 @@ static bool btf_type_is_datasec(const struct btf_type *t) return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC; } +static u32 btf_nr_types_total(const struct btf *btf) +{ + u32 total = 0; + + while (btf) { + total += btf->nr_types; + btf = btf->base_btf; + } + + return total; +} + s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind) { const struct btf_type *t; const char *tname; - u32 i; + u32 i, total; - for (i = 1; i <= btf->nr_types; i++) { - t = btf->types[i]; + total = btf_nr_types_total(btf); + for (i = 1; i < total; i++) { + t = btf_type_by_id(btf, i); if (BTF_INFO_KIND(t->info) != kind) continue; @@ -600,8 +621,14 @@ static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t) static bool btf_name_offset_valid(const struct btf *btf, u32 offset) { - return BTF_STR_OFFSET_VALID(offset) && - offset < btf->hdr.str_len; + if (!BTF_STR_OFFSET_VALID(offset)) + return false; + + while (offset < btf->start_str_off) + btf = btf->base_btf; + + offset -= btf->start_str_off; + return offset < btf->hdr.str_len; } static bool __btf_name_char_ok(char c, bool first, bool dot_ok) @@ -615,10 +642,22 @@ static bool __btf_name_char_ok(char c, bool first, bool dot_ok) return true; } +static const char *btf_str_by_offset(const struct btf *btf, u32 offset) +{ + while (offset < btf->start_str_off) + btf = btf->base_btf; + + offset -= btf->start_str_off; + if (offset < btf->hdr.str_len) + return &btf->strings[offset]; + + return NULL; +} + static bool __btf_name_valid(const struct btf *btf, u32 offset, bool dot_ok) { /* offset must be valid */ - const char *src = &btf->strings[offset]; + const char *src = btf_str_by_offset(btf, offset); const char *src_limit; if (!__btf_name_char_ok(*src, true, dot_ok)) @@ -651,27 +690,28 @@ static bool btf_name_valid_section(const struct btf *btf, u32 offset) static const char *__btf_name_by_offset(const struct btf *btf, u32 offset) { + const char *name; + if (!offset) return "(anon)"; - else if (offset < btf->hdr.str_len) - return &btf->strings[offset]; - else - return "(invalid-name-offset)"; + + name = btf_str_by_offset(btf, offset); + return name ?: "(invalid-name-offset)"; } const char *btf_name_by_offset(const struct btf *btf, u32 offset) { - if (offset < btf->hdr.str_len) - return &btf->strings[offset]; - - return NULL; + return btf_str_by_offset(btf, offset); } const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id) { - if (type_id > btf->nr_types) - return NULL; + while (type_id < btf->start_id) + btf = btf->base_btf; + type_id -= btf->start_id; + if (type_id >= btf->nr_types) + return NULL; return btf->types[type_id]; } @@ -1391,17 +1431,13 @@ static int btf_add_type(struct btf_verifier_env *env, struct btf_type *t) { struct btf *btf = env->btf; - /* < 2 because +1 for btf_void which is always in btf->types[0]. - * btf_void is not accounted in btf->nr_types because btf_void - * does not come from the BTF file. - */ - if (btf->types_size - btf->nr_types < 2) { + if (btf->types_size == btf->nr_types) { /* Expand 'types' array */ struct btf_type **new_types; u32 expand_by, new_size; - if (btf->types_size == BTF_MAX_TYPE) { + if (btf->start_id + btf->types_size == BTF_MAX_TYPE) { btf_verifier_log(env, "Exceeded max num of types"); return -E2BIG; } @@ -1415,18 +1451,23 @@ static int btf_add_type(struct btf_verifier_env *env, struct btf_type *t) if (!new_types) return -ENOMEM; - if (btf->nr_types == 0) - new_types[0] = &btf_void; - else + if (btf->nr_types == 0) { + if (!btf->base_btf) { + /* lazily init VOID type */ + new_types[0] = &btf_void; + btf->nr_types++; + } + } else { memcpy(new_types, btf->types, - sizeof(*btf->types) * (btf->nr_types + 1)); + sizeof(*btf->types) * btf->nr_types); + } kvfree(btf->types); btf->types = new_types; btf->types_size = new_size; } - btf->types[++(btf->nr_types)] = t; + btf->types[btf->nr_types++] = t; return 0; } @@ -1483,6 +1524,11 @@ static void btf_free_rcu(struct rcu_head *rcu) btf_free(btf); } +void btf_get(struct btf *btf) +{ + refcount_inc(&btf->refcnt); +} + void btf_put(struct btf *btf) { if (btf && refcount_dec_and_test(&btf->refcnt)) { @@ -1499,18 +1545,17 @@ static int env_resolve_init(struct btf_verifier_env *env) u32 *resolved_ids = NULL; u8 *visit_states = NULL; - /* +1 for btf_void */ - resolved_sizes = kvcalloc(nr_types + 1, sizeof(*resolved_sizes), + resolved_sizes = kvcalloc(nr_types, sizeof(*resolved_sizes), GFP_KERNEL | __GFP_NOWARN); if (!resolved_sizes) goto nomem; - resolved_ids = kvca |