summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/Makefile1
-rw-r--r--kernel/bpf/arraymap.c263
-rw-r--r--kernel/bpf/btf.c796
-rw-r--r--kernel/bpf/core.c129
-rw-r--r--kernel/bpf/devmap.c74
-rw-r--r--kernel/bpf/inode.c7
-rw-r--r--kernel/bpf/map_in_map.c7
-rw-r--r--kernel/bpf/stackmap.c7
-rw-r--r--kernel/bpf/syscall.c382
-rw-r--r--kernel/bpf/trampoline.c253
-rw-r--r--kernel/bpf/verifier.c543
-rw-r--r--kernel/bpf/xskmap.c118
-rw-r--r--kernel/events/core.c7
-rw-r--r--kernel/extable.c2
-rw-r--r--kernel/trace/bpf_trace.c227
15 files changed, 2446 insertions, 370 deletions
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index e1d9adb212f9..3f671bf617e8 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -6,6 +6,7 @@ obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o
obj-$(CONFIG_BPF_SYSCALL) += disasm.o
+obj-$(CONFIG_BPF_JIT) += trampoline.o
obj-$(CONFIG_BPF_SYSCALL) += btf.o
ifeq ($(CONFIG_NET),y)
obj-$(CONFIG_BPF_SYSCALL) += devmap.o
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 1c65ce0098a9..f0d19bbb9211 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -14,7 +14,7 @@
#include "map_in_map.h"
#define ARRAY_CREATE_FLAG_MASK \
- (BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK)
+ (BPF_F_NUMA_NODE | BPF_F_MMAPABLE | BPF_F_ACCESS_MASK)
static void bpf_array_free_percpu(struct bpf_array *array)
{
@@ -59,6 +59,10 @@ int array_map_alloc_check(union bpf_attr *attr)
(percpu && numa_node != NUMA_NO_NODE))
return -EINVAL;
+ if (attr->map_type != BPF_MAP_TYPE_ARRAY &&
+ attr->map_flags & BPF_F_MMAPABLE)
+ return -EINVAL;
+
if (attr->value_size > KMALLOC_MAX_SIZE)
/* if value_size is bigger, the user space won't be able to
* access the elements.
@@ -102,10 +106,19 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
}
array_size = sizeof(*array);
- if (percpu)
+ if (percpu) {
array_size += (u64) max_entries * sizeof(void *);
- else
- array_size += (u64) max_entries * elem_size;
+ } else {
+ /* rely on vmalloc() to return page-aligned memory and
+ * ensure array->value is exactly page-aligned
+ */
+ if (attr->map_flags & BPF_F_MMAPABLE) {
+ array_size = PAGE_ALIGN(array_size);
+ array_size += PAGE_ALIGN((u64) max_entries * elem_size);
+ } else {
+ array_size += (u64) max_entries * elem_size;
+ }
+ }
/* make sure there is no u32 overflow later in round_up() */
cost = array_size;
@@ -117,7 +130,20 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
return ERR_PTR(ret);
/* allocate all map elements and zero-initialize them */
- array = bpf_map_area_alloc(array_size, numa_node);
+ if (attr->map_flags & BPF_F_MMAPABLE) {
+ void *data;
+
+ /* kmalloc'ed memory can't be mmap'ed, use explicit vmalloc */
+ data = bpf_map_area_mmapable_alloc(array_size, numa_node);
+ if (!data) {
+ bpf_map_charge_finish(&mem);
+ return ERR_PTR(-ENOMEM);
+ }
+ array = data + PAGE_ALIGN(sizeof(struct bpf_array))
+ - offsetof(struct bpf_array, value);
+ } else {
+ array = bpf_map_area_alloc(array_size, numa_node);
+ }
if (!array) {
bpf_map_charge_finish(&mem);
return ERR_PTR(-ENOMEM);
@@ -350,6 +376,11 @@ static int array_map_delete_elem(struct bpf_map *map, void *key)
return -EINVAL;
}
+static void *array_map_vmalloc_addr(struct bpf_array *array)
+{
+ return (void *)round_down((unsigned long)array, PAGE_SIZE);
+}
+
/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
static void array_map_free(struct bpf_map *map)
{
@@ -365,7 +396,10 @@ static void array_map_free(struct bpf_map *map)
if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
bpf_array_free_percpu(array);
- bpf_map_area_free(array);
+ if (array->map.map_flags & BPF_F_MMAPABLE)
+ bpf_map_area_free(array_map_vmalloc_addr(array));
+ else
+ bpf_map_area_free(array);
}
static void array_map_seq_show_elem(struct bpf_map *map, void *key,
@@ -444,6 +478,17 @@ static int array_map_check_btf(const struct bpf_map *map,
return 0;
}
+static int array_map_mmap(struct bpf_map *map, struct vm_area_struct *vma)
+{
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
+ pgoff_t pgoff = PAGE_ALIGN(sizeof(*array)) >> PAGE_SHIFT;
+
+ if (!(map->map_flags & BPF_F_MMAPABLE))
+ return -EINVAL;
+
+ return remap_vmalloc_range(vma, array_map_vmalloc_addr(array), pgoff);
+}
+
const struct bpf_map_ops array_map_ops = {
.map_alloc_check = array_map_alloc_check,
.map_alloc = array_map_alloc,
@@ -455,6 +500,7 @@ const struct bpf_map_ops array_map_ops = {
.map_gen_lookup = array_map_gen_lookup,
.map_direct_value_addr = array_map_direct_value_addr,
.map_direct_value_meta = array_map_direct_value_meta,
+ .map_mmap = array_map_mmap,
.map_seq_show_elem = array_map_seq_show_elem,
.map_check_btf = array_map_check_btf,
};
@@ -540,10 +586,17 @@ int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
if (IS_ERR(new_ptr))
return PTR_ERR(new_ptr);
- old_ptr = xchg(array->ptrs + index, new_ptr);
+ if (map->ops->map_poke_run) {
+ mutex_lock(&array->aux->poke_mutex);
+ old_ptr = xchg(array->ptrs + index, new_ptr);
+ map->ops->map_poke_run(map, index, old_ptr, new_ptr);
+ mutex_unlock(&array->aux->poke_mutex);
+ } else {
+ old_ptr = xchg(array->ptrs + index, new_ptr);
+ }
+
if (old_ptr)
map->ops->map_fd_put_ptr(old_ptr);
-
return 0;
}
@@ -556,7 +609,15 @@ static int fd_array_map_delete_elem(struct bpf_map *map, void *key)
if (index >= array->map.max_entries)
return -E2BIG;
- old_ptr = xchg(array->ptrs + index, NULL);
+ if (map->ops->map_poke_run) {
+ mutex_lock(&array->aux->poke_mutex);
+ old_ptr = xchg(array->ptrs + index, NULL);
+ map->ops->map_poke_run(map, index, old_ptr, NULL);
+ mutex_unlock(&array->aux->poke_mutex);
+ } else {
+ old_ptr = xchg(array->ptrs + index, NULL);
+ }
+
if (old_ptr) {
map->ops->map_fd_put_ptr(old_ptr);
return 0;
@@ -625,17 +686,195 @@ static void prog_array_map_seq_show_elem(struct bpf_map *map, void *key,
rcu_read_unlock();
}
+struct prog_poke_elem {
+ struct list_head list;
+ struct bpf_prog_aux *aux;
+};
+
+static int prog_array_map_poke_track(struct bpf_map *map,
+ struct bpf_prog_aux *prog_aux)
+{
+ struct prog_poke_elem *elem;
+ struct bpf_array_aux *aux;
+ int ret = 0;
+
+ aux = container_of(map, struct bpf_array, map)->aux;
+ mutex_lock(&aux->poke_mutex);
+ list_for_each_entry(elem, &aux->poke_progs, list) {
+ if (elem->aux == prog_aux)
+ goto out;
+ }
+
+ elem = kmalloc(sizeof(*elem), GFP_KERNEL);
+ if (!elem) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&elem->list);
+ /* We must track the program's aux info at this point in time
+ * since the program pointer itself may not be stable yet, see
+ * also comment in prog_array_map_poke_run().
+ */
+ elem->aux = prog_aux;
+
+ list_add_tail(&elem->list, &aux->poke_progs);
+out:
+ mutex_unlock(&aux->poke_mutex);
+ return ret;
+}
+
+static void prog_array_map_poke_untrack(struct bpf_map *map,
+ struct bpf_prog_aux *prog_aux)
+{
+ struct prog_poke_elem *elem, *tmp;
+ struct bpf_array_aux *aux;
+
+ aux = container_of(map, struct bpf_array, map)->aux;
+ mutex_lock(&aux->poke_mutex);
+ list_for_each_entry_safe(elem, tmp, &aux->poke_progs, list) {
+ if (elem->aux == prog_aux) {
+ list_del_init(&elem->list);
+ kfree(elem);
+ break;
+ }
+ }
+ mutex_unlock(&aux->poke_mutex);
+}
+
+static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
+ struct bpf_prog *old,
+ struct bpf_prog *new)
+{
+ struct prog_poke_elem *elem;
+ struct bpf_array_aux *aux;
+
+ aux = container_of(map, struct bpf_array, map)->aux;
+ WARN_ON_ONCE(!mutex_is_locked(&aux->poke_mutex));
+
+ list_for_each_entry(elem, &aux->poke_progs, list) {
+ struct bpf_jit_poke_descriptor *poke;
+ int i, ret;
+
+ for (i = 0; i < elem->aux->size_poke_tab; i++) {
+ poke = &elem->aux->poke_tab[i];
+
+ /* Few things to be aware of:
+ *
+ * 1) We can only ever access aux in this context, but
+ * not aux->prog since it might not be stable yet and
+ * there could be danger of use after free otherwise.
+ * 2) Initially when we start tracking aux, the program
+ * is not JITed yet and also does not have a kallsyms
+ * entry. We skip these as poke->ip_stable is not
+ * active yet. The JIT will do the final fixup before
+ * setting it stable. The various poke->ip_stable are
+ * successively activated, so tail call updates can
+ * arrive from here while JIT is still finishing its
+ * final fixup for non-activated poke entries.
+ * 3) On program teardown, the program's kallsym entry gets
+ * removed out of RCU callback, but we can only untrack
+ * from sleepable context, therefore bpf_arch_text_poke()
+ * might not see that this is in BPF text section and
+ * bails out with -EINVAL. As these are unreachable since
+ * RCU grace period already passed, we simply skip them.
+ * 4) Also programs reaching refcount of zero while patching
+ * is in progress is okay since we're protected under
+ * poke_mutex and untrack the programs before the JIT
+ * buffer is freed. When we're still in the middle of
+ * patching and suddenly kallsyms entry of the program
+ * gets evicted, we just skip the rest which is fine due
+ * to point 3).
+ * 5) Any other error happening below from bpf_arch_text_poke()
+ * is a unexpected bug.
+ */
+ if (!READ_ONCE(poke->ip_stable))
+ continue;
+ if (poke->reason != BPF_POKE_REASON_TAIL_CALL)
+ continue;
+ if (poke->tail_call.map != map ||
+ poke->tail_call.key != key)
+ continue;
+
+ ret = bpf_arch_text_poke(poke->ip, BPF_MOD_JUMP,
+ old ? (u8 *)old->bpf_func +
+ poke->adj_off : NULL,
+ new ? (u8 *)new->bpf_func +
+ poke->adj_off : NULL);
+ BUG_ON(ret < 0 && ret != -EINVAL);
+ }
+ }
+}
+
+static void prog_array_map_clear_deferred(struct work_struct *work)
+{
+ struct bpf_map *map = container_of(work, struct bpf_array_aux,
+ work)->map;
+ bpf_fd_array_map_clear(map);
+ bpf_map_put(map);
+}
+
+static void prog_array_map_clear(struct bpf_map *map)
+{
+ struct bpf_array_aux *aux = container_of(map, struct bpf_array,
+ map)->aux;
+ bpf_map_inc(map);
+ schedule_work(&aux->work);
+}
+
+static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr)
+{
+ struct bpf_array_aux *aux;
+ struct bpf_map *map;
+
+ aux = kzalloc(sizeof(*aux), GFP_KERNEL);
+ if (!aux)
+ return ERR_PTR(-ENOMEM);
+
+ INIT_WORK(&aux->work, prog_array_map_clear_deferred);
+ INIT_LIST_HEAD(&aux->poke_progs);
+ mutex_init(&aux->poke_mutex);
+
+ map = array_map_alloc(attr);
+ if (IS_ERR(map)) {
+ kfree(aux);
+ return map;
+ }
+
+ container_of(map, struct bpf_array, map)->aux = aux;
+ aux->map = map;
+
+ return map;
+}
+
+static void prog_array_map_free(struct bpf_map *map)
+{
+ struct prog_poke_elem *elem, *tmp;
+ struct bpf_array_aux *aux;
+
+ aux = container_of(map, struct bpf_array, map)->aux;
+ list_for_each_entry_safe(elem, tmp, &aux->poke_progs, list) {
+ list_del_init(&elem->list);
+ kfree(elem);
+ }
+ kfree(aux);
+ fd_array_map_free(map);
+}
+
const struct bpf_map_ops prog_array_map_ops = {
.map_alloc_check = fd_array_map_alloc_check,
- .map_alloc = array_map_alloc,
- .map_free = fd_array_map_free,
+ .map_alloc = prog_array_map_alloc,
+ .map_free = prog_array_map_free,
+ .map_poke_track = prog_array_map_poke_track,
+ .map_poke_untrack = prog_array_map_poke_untrack,
+ .map_poke_run = prog_array_map_poke_run,
.map_get_next_key = array_map_get_next_key,
.map_lookup_elem = fd_array_map_lookup_elem,
.map_delete_elem = fd_array_map_delete_elem,
.map_fd_get_ptr = prog_fd_array_get_ptr,
.map_fd_put_ptr = prog_fd_array_put_ptr,
.map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem,
- .map_release_uref = bpf_fd_array_map_clear,
+ .map_release_uref = prog_array_map_clear,
.map_seq_show_elem = prog_array_map_seq_show_elem,
};
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 29c7c06c6bd6..40efde5eedcb 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -2,6 +2,8 @@
/* Copyright (c) 2018 Facebook */
#include <uapi/linux/btf.h>
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/bpf_perf_event.h>
#include <uapi/linux/types.h>
#include <linux/seq_file.h>
#include <linux/compiler.h>
@@ -16,6 +18,9 @@
#include <linux/sort.h>
#include <linux/bpf_verifier.h>
#include <linux/btf.h>
+#include <linux/skmsg.h>
+#include <linux/perf_event.h>
+#include <net/sock.h>
/* BTF (BPF Type Format) is the meta data format which describes
* the data types of BPF program/map. Hence, it basically focus
@@ -336,16 +341,6 @@ static bool btf_type_is_fwd(const struct btf_type *t)
return BTF_INFO_KIND(t->info) == BTF_KIND_FWD;
}
-static bool btf_type_is_func(const struct btf_type *t)
-{
- return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC;
-}
-
-static bool btf_type_is_func_proto(const struct btf_type *t)
-{
- return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC_PROTO;
-}
-
static bool btf_type_nosize(const struct btf_type *t)
{
return btf_type_is_void(t) || btf_type_is_fwd(t) ||
@@ -377,16 +372,6 @@ static bool btf_type_is_array(const struct btf_type *t)
return BTF_INFO_KIND(t->info) == BTF_KIND_ARRAY;
}
-static bool btf_type_is_ptr(const struct btf_type *t)
-{
- return BTF_INFO_KIND(t->info) == BTF_KIND_PTR;
-}
-
-static bool btf_type_is_int(const struct btf_type *t)
-{
- return BTF_INFO_KIND(t->info) == BTF_KIND_INT;
-}
-
static bool btf_type_is_var(const struct btf_type *t)
{
return BTF_INFO_KIND(t->info) == BTF_KIND_VAR;
@@ -698,6 +683,13 @@ __printf(4, 5) static void __btf_verifier_log_type(struct btf_verifier_env *env,
if (!bpf_verifier_log_needed(log))
return;
+ /* btf verifier prints all types it is processing via
+ * btf_verifier_log_type(..., fmt = NULL).
+ * Skip those prints for in-kernel BTF verification.
+ */
+ if (log->level == BPF_LOG_KERNEL && !fmt)
+ return;
+
__btf_verifier_log(log, "[%u] %s %s%s",
env->log_type_id,
btf_kind_str[kind],
@@ -735,6 +727,8 @@ static void btf_verifier_log_member(struct btf_verifier_env *env,
if (!bpf_verifier_log_needed(log))
return;
+ if (log->level == BPF_LOG_KERNEL && !fmt)
+ return;
/* The CHECK_META phase already did a btf dump.
*
* If member is logged again, it must hit an error in
@@ -777,6 +771,8 @@ static void btf_verifier_log_vsi(struct btf_verifier_env *env,
if (!bpf_verifier_log_needed(log))
return;
+ if (log->level == BPF_LOG_KERNEL && !fmt)
+ return;
if (env->phase != CHECK_META)
btf_verifier_log_type(env, datasec_type, NULL);
@@ -802,6 +798,8 @@ static void btf_verifier_log_hdr(struct btf_verifier_env *env,
if (!bpf_verifier_log_needed(log))
return;
+ if (log->level == BPF_LOG_KERNEL)
+ return;
hdr = &btf->hdr;
__btf_verifier_log(log, "magic: 0x%x\n", hdr->magic);
__btf_verifier_log(log, "version: %u\n", hdr->version);
@@ -1043,6 +1041,82 @@ static const struct resolve_vertex *env_stack_peak(struct btf_verifier_env *env)
return env->top_stack ? &env->stack[env->top_stack - 1] : NULL;
}
+/* Resolve the size of a passed-in "type"
+ *
+ * type: is an array (e.g. u32 array[x][y])
+ * return type: type "u32[x][y]", i.e. BTF_KIND_ARRAY,
+ * *type_size: (x * y * sizeof(u32)). Hence, *type_size always
+ * corresponds to the return type.
+ * *elem_type: u32
+ * *total_nelems: (x * y). Hence, individual elem size is
+ * (*type_size / *total_nelems)
+ *
+ * type: is not an array (e.g. const struct X)
+ * return type: type "struct X"
+ * *type_size: sizeof(struct X)
+ * *elem_type: same as return type ("struct X")
+ * *total_nelems: 1
+ */
+static const struct btf_type *
+btf_resolve_size(const struct btf *btf, const struct btf_type *type,
+ u32 *type_size, const struct btf_type **elem_type,
+ u32 *total_nelems)
+{
+ const struct btf_type *array_type = NULL;
+ const struct btf_array *array;
+ u32 i, size, nelems = 1;
+
+ for (i = 0; i < MAX_RESOLVE_DEPTH; i++) {
+ switch (BTF_INFO_KIND(type->info)) {
+ /* type->size can be used */
+ case BTF_KIND_INT:
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ case BTF_KIND_ENUM:
+ size = type->size;
+ goto resolved;
+
+ case BTF_KIND_PTR:
+ size = sizeof(void *);
+ goto resolved;
+
+ /* Modifiers */
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_CONST:
+ case BTF_KIND_RESTRICT:
+ type = btf_type_by_id(btf, type->type);
+ break;
+
+ case BTF_KIND_ARRAY:
+ if (!array_type)
+ array_type = type;
+ array = btf_type_array(type);
+ if (nelems && array->nelems > U32_MAX / nelems)
+ return ERR_PTR(-EINVAL);
+ nelems *= array->nelems;
+ type = btf_type_by_id(btf, array->type);
+ break;
+
+ /* type without size */
+ default:
+ return ERR_PTR(-EINVAL);
+ }
+ }
+
+ return ERR_PTR(-EINVAL);
+
+resolved:
+ if (nelems && size > U32_MAX / nelems)
+ return ERR_PTR(-EINVAL);
+
+ *type_size = nelems * size;
+ *total_nelems = nelems;
+ *elem_type = type;
+
+ return array_type ? : type;
+}
+
/* The input param "type_id" must point to a needs_resolve type */
static const struct btf_type *btf_type_id_resolve(const struct btf *btf,
u32 *type_id)
@@ -2405,7 +2479,8 @@ static s32 btf_enum_check_meta(struct btf_verifier_env *env,
return -EINVAL;
}
-
+ if (env->log.level == BPF_LOG_KERNEL)
+ continue;
btf_verifier_log(env, "\t%s val=%d\n",
__btf_name_by_offset(btf, enums[i].name_off),
enums[i].val);
@@ -3367,6 +3442,685 @@ errout:
return ERR_PTR(err);
}
+extern char __weak _binary__btf_vmlinux_bin_start[];
+extern char __weak _binary__btf_vmlinux_bin_end[];
+extern struct btf *btf_vmlinux;
+
+#define BPF_MAP_TYPE(_id, _ops)
+static union {
+ struct bpf_ctx_convert {
+#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
+ prog_ctx_type _id##_prog; \
+ kern_ctx_type _id##_kern;
+#include <linux/bpf_types.h>
+#undef BPF_PROG_TYPE
+ } *__t;
+ /* 't' is written once under lock. Read many times. */
+ const struct btf_type *t;
+} bpf_ctx_convert;
+enum {
+#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
+ __ctx_convert##_id,
+#include <linux/bpf_types.h>
+#undef BPF_PROG_TYPE
+};
+static u8 bpf_ctx_convert_map[] = {
+#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
+ [_id] = __ctx_convert##_id,
+#include <linux/bpf_types.h>
+#undef BPF_PROG_TYPE
+};
+#undef BPF_MAP_TYPE
+
+static const struct btf_member *
+btf_get_prog_ctx_type(struct bpf_verifier_log *log, struct btf *btf,
+ const struct btf_type *t, enum bpf_prog_type prog_type)
+{
+ const struct btf_type *conv_struct;
+ const struct btf_type *ctx_struct;
+ const struct btf_member *ctx_type;
+ const char *tname, *ctx_tname;
+
+ conv_struct = bpf_ctx_convert.t;
+ if (!conv_struct) {
+ bpf_log(log, "btf_vmlinux is malformed\n");
+ return NULL;
+ }
+ t = btf_type_by_id(btf, t->type);
+ while (btf_type_is_modifier(t))
+ t = btf_type_by_id(btf, t->type);
+ if (!btf_type_is_struct(t)) {
+ /* Only pointer to struct is supported for now.
+ * That means that BPF_PROG_TYPE_TRACEPOINT with BTF
+ * is not supported yet.
+ * BPF_PROG_TYPE_RAW_TRACEPOINT is fine.
+ */
+ bpf_log(log, "BPF program ctx type is not a struct\n");
+ return NULL;
+ }
+ tname = btf_name_by_offset(btf, t->name_off);
+ if (!tname) {
+ bpf_log(log, "BPF program ctx struct doesn't have a name\n");
+ return NULL;
+ }
+ /* prog_type is valid bpf program type. No need for bounds check. */
+ ctx_type = btf_type_member(conv_struct) + bpf_ctx_convert_map[prog_type] * 2;
+ /* ctx_struct is a pointer to prog_ctx_type in vmlinux.
+ * Like 'struct __sk_buff'
+ */
+ ctx_struct = btf_type_by_id(btf_vmlinux, ctx_type->type);
+ if (!ctx_struct)
+ /* should not happen */
+ return NULL;
+ ctx_tname = btf_name_by_offset(btf_vmlinux, ctx_struct->name_off);
+ if (!ctx_tname) {
+ /* should not happen */
+ bpf_log(log, "Please fix kernel include/linux/bpf_types.h\n");
+ return NULL;
+ }
+ /* only compare that prog's ctx type name is the same as
+ * kernel expects. No need to compare field by field.
+ * It's ok for bpf prog to do:
+ * struct __sk_buff {};
+ * int socket_filter_bpf_prog(struct __sk_buff *skb)
+ * { // no fields of skb are ever used }
+ */
+ if (strcmp(ctx_tname, tname))
+ return NULL;
+ return ctx_type;
+}
+
+static int btf_translate_to_vmlinux(struct bpf_verifier_log *log,
+ struct btf *btf,
+ const struct btf_type *t,
+ enum bpf_prog_type prog_type)
+{
+ const struct btf_member *prog_ctx_type, *kern_ctx_type;
+
+ prog_ctx_type = btf_get_prog_ctx_type(log, btf, t, prog_type);
+ if (!prog_ctx_type)
+ return -ENOENT;
+ kern_ctx_type = prog_ctx_type + 1;
+ return kern_ctx_type->type;
+}
+
+struct btf *btf_parse_vmlinux(void)
+{
+ struct btf_verifier_env *env = NULL;
+ struct bpf_verifier_log *log;
+ struct btf *btf = NULL;
+ int err, i;
+
+ env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN);
+ if (!env)
+ return ERR_PTR(-ENOMEM);
+
+ log = &env->log;
+ log->level = BPF_LOG_KERNEL;
+
+ btf = kzalloc(sizeof(*btf), GFP_KERNEL | __GFP_NOWARN);
+ if (!btf) {
+ err = -ENOMEM;
+ goto errout;
+ }
+ env->btf = btf;
+
+ btf->data = _binary__btf_vmlinux_bin_start;
+ btf->data_size = _binary__btf_vmlinux_bin_end -
+ _binary__btf_vmlinux_bin_start;
+
+ err = btf_parse_hdr(env);
+ if (err)
+ goto errout;
+
+ btf->nohdr_data = btf->data + btf->hdr.hdr_len;
+
+ err = btf_parse_str_sec(env);
+ if (err)
+ goto errout;
+
+ err = btf_check_all_metas(env);
+ if (err)
+ goto errout;
+
+ /* find struct bpf_ctx_convert for type checking later */
+ for (i = 1; i <= btf->nr_types; i++) {
+ const struct btf_type *t;
+ const char *tname;
+
+ t = btf_type_by_id(btf, i);
+ if (!__btf_type_is_struct(t))
+ continue;
+ tname = __btf_name_by_offset(btf, t->name_off);
+ if (!strcmp(tname, "bpf_ctx_convert")) {
+ /* btf_parse_vmlinux() runs under bpf_verifier_lock */
+ bpf_ctx_convert.t = t;
+ break;
+ }
+ }
+ if (i > btf->nr_types) {
+ err = -ENOENT;
+ goto errout;
+ }
+
+ btf_verifier_env_free(env);
+ refcount_set(&btf->refcnt, 1);
+ return btf;
+
+errout:
+ btf_verifier_env_free(env);
+ if (btf) {
+ kvfree(btf->types);
+ kfree(btf);
+ }
+ return ERR_PTR(err);
+}
+
+struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog)
+{
+ struct bpf_prog *tgt_prog = prog->aux->linked_prog;
+
+ if (tgt_prog) {
+ return tgt_prog->aux->btf;
+ } else {
+ return btf_vmlinux;
+ }
+}
+
+bool btf_ctx_access(int off, int size, enum bpf_access_type type,
+ const struct bpf_prog *prog,
+ struct bpf_insn_access_aux *info)
+{
+ const struct btf_type *t = prog->aux->attach_func_proto;
+ struct bpf_prog *tgt_prog = prog->aux->linked_prog;
+ struct btf *btf = bpf_prog_get_target_btf(prog);
+ const char *tname = prog->aux->attach_func_name;
+ struct bpf_verifier_log *log = info->log;
+ const struct btf_param *args;
+ u32 nr_args, arg;
+ int ret;
+
+ if (off % 8) {
+ bpf_log(log, "func '%s' offset %d is not multiple of 8\n",
+ tname, off);
+ return false;
+ }
+ arg = off / 8;
+ args = (const struct btf_param *)(t + 1);
+ /* if (t == NULL) Fall back to default BPF prog with 5 u64 arguments */
+ nr_args = t ? btf_type_vlen(t) : 5;
+ if (prog->aux->attach_btf_trace) {
+ /* skip first 'void *__data' argument in btf_trace_##name typedef */
+ args++;
+ nr_args--;
+ }
+
+ if (prog->expected_attach_type == BPF_TRACE_FEXIT &&
+ arg == nr_args) {
+ if (!t)
+ /* Default prog with 5 args. 6th arg is retval. */
+ return true;
+ /* function return type */
+ t = btf_type_by_id(btf, t->type);
+ } else if (arg >= nr_args) {
+ bpf_log(log, "func '%s' doesn't have %d-th argument\n",
+ tname, arg + 1);
+ return false;
+ } else {
+ if (!t)
+ /* Default prog with 5 args */
+ return true;
+ t = btf_type_by_id(btf, args[arg].type);
+ }
+ /* skip modifiers */
+ while (btf_type_is_modifier(t))
+ t = btf_type_by_id(btf, t->type);
+ if (btf_type_is_int(t))
+ /* accessing a scalar */
+ return true;
+ if (!btf_type_is_ptr(t)) {
+ bpf_log(log,
+ "func '%s' arg%d '%s' has type %s. Only pointer access is allowed\n",
+ tname, arg,
+ __btf_name_by_offset(btf, t->name_off),
+ btf_kind_str[BTF_INFO_KIND(t->info)]);
+ return false;
+ }
+ if (t->type == 0)
+ /* This is a pointer to void.
+ * It is the same as scalar from the verifier safety pov.
+ * No further pointer walking is allowed.
+ */
+ return true;
+
+ /* this is a pointer to another type */
+ info->reg_type = PTR_TO_BTF_ID;
+ info->btf_id = t->type;
+
+ if (tgt_prog) {
+ ret = btf_translate_to_vmlinux(log, btf, t, tgt_prog->type);
+ if (ret > 0) {
+ info->btf_id = ret;
+ return true;
+ } else {
+ return false;
+ }
+ }
+ t = btf_type_by_id(btf, t->type);
+ /* skip modifiers */
+ while (btf_type_is_modifier(t))
+ t = btf_type_by_id(btf, t->type);
+ if (!btf_type_is_struct(t)) {
+ bpf_log(log,
+ "func '%s' arg%d type %s is not a struct\n",
+ tname, arg, btf_kind_str[BTF_INFO_KIND(t->info)]);
+ return false;
+ }
+ bpf_log(log, "func '%s' arg%d has btf_id %d type %s '%s'\n",
+ tname, arg, info->btf_id, btf_kind_str[BTF_INFO_KIND(t->info)],
+ __btf_name_by_offset(btf, t->name_off));
+ return true;
+}
+
+int btf_struct_access(struct bpf_verifier_log *log,
+ const struct btf_type *t, int off, int size,
+ enum bpf_access_type atype,
+ u32 *next_btf_id)
+{
+ u32 i, moff, mtrue_end, msize = 0, total_nelems = 0;
+ const struct btf_type *mtype, *elem_type = NULL;
+ const struct btf_member *member;
+ const char *tname, *mname;
+
+again:
+ tname = __btf_name_by_offset(btf_vmlinux, t->name_off);
+ if (!btf_type_is_struct(t)) {
+ bpf_log(log, "Type '%s' is not a struct", tname);
+ return -EINVAL;
+ }
+
+ for_each_member(i, t, member) {
+ if (btf_member_bitfield_size(t, member))
+ /* bitfields are not supported yet */
+ continue;
+
+ /* offset of the field in bytes */
+ moff = btf_member_bit_offset(t, member) / 8;
+ if (off + size <= moff)
+ /* won't find anything, field is already too far */
+ break;
+ /* In case of "off" is pointing to holes of a struct */
+ if (off < moff)
+ continue;
+
+ /* type of the field */
+ mtype = btf_type_by_id(btf_vmlinux, member->type);
+ mname = __btf_name_by_offset(btf_vmlinux, member->name_off);
+
+ mtype = btf_resolve_size(btf_vmlinux, mtype, &msize,
+ &elem_type, &total_nelems);
+ if (IS_ERR(mtype)) {
+ bpf_log(log, "field %s doesn't have size\n", mname);
+ return -EFAULT;
+ }
+
+ mtrue_end = moff + msize;
+ if (off >= mtrue_end)
+ /* no overlap with member, keep iterating */
+ continue;
+
+ if (btf_type_is_array(mtype)) {
+ u32 elem_idx;
+
+ /* btf_resolve_size() above helps to
+ * linearize a multi-dimensional array.
+ *
+ * The logic here is treating an array
+ * in a struct as the following way:
+ *
+ * struct outer {
+ * struct inner array[2][2];
+ * };
+ *
+ * looks like:
+ *
+ * struct outer {
+ * struct inner array_elem0;
+ * struct inner array_elem1;
+ * struct inner array_elem2;
+ * struct inner array_elem3;
+ * };
+ *
+ * When accessing outer->array[1][0], it moves
+ * moff to "array_elem2", set mtype to
+ * "struct inner", and msize also becomes
+ * sizeof(struct inner). Then most of the
+ * remaining logic will fall through without
+ * caring the current member is an array or
+ * not.
+ *
+ * Unlike mtype/msize/moff, mtrue_end does not
+ * change. The naming difference ("_true") tells
+ * that it is not always corresponding to
+ * the current mtype/msize/moff.
+ * It is the true end of the current
+ * member (i.e. array in this case). That
+ * will allow an int array to be accessed like
+ * a scratch space,
+ * i.e. allow access beyond the size of
+ * the array's element as long as it is
+ * within the mtrue_end boundary.
+ */
+
+ /* skip empty array */
+ if (moff == mtrue_end)
+ continue;
+
+ msize /= total_nelems;
+ elem_idx = (off - moff) / msize;
+ moff += elem_idx * msize;
+ mtype = elem_type;
+ }
+
+ /* the 'off' we're looking for is either equal to start
+ * of this field or inside of this struct
+ */
+ if (btf_type_is_struct(mtype)) {
+ /* our field must be inside that union or struct */
+ t = mtype;
+
+ /* adjust offset we're looking for */
+ off -= moff;
+ goto again;
+ }
+
+ if (btf_type_is_ptr(mtype)) {
+ const struct btf_type *stype;
+
+ if (msize != size || off != moff) {
+ bpf_log(log,
+ "cannot access ptr member %s with moff %u in struct %s with off %u size %u\n",
+ mname, moff, tname, off, size);
+ return -EACCES;
+ }
+
+ stype = btf_type_by_id(btf_vmlinux, mtype->type);
+ /* skip modifiers */
+ while (btf_type_is_modifier(stype))
+ stype = btf_type_by_id(btf_vmlinux, stype->type);
+ if (btf_type_is_struct(stype)) {
+ *next_btf_id = mtype->type;
+ return PTR_TO_BTF_ID;
+ }
+ }
+
+ /* Allow more flexible access within an int as long as
+ * it is within mtrue_end.
+ * Since mtrue_end could be the end of an array,
+ * that also allows using an array of int as a scratch
+ * space. e.g. skb->cb[].
+ */
+ if (off + size > mtrue_end) {
+ bpf_log(log,
+ "access beyond the end of member %s (mend:%u) in struct %s with off %u size %u\n",
+ mname, mtrue_end, tname, off, size);
+ return -EACCES;
+ }
+
+ return SCALAR_VALUE;
+ }
+ bpf_log(log, "struct %s doesn't have field at offset %d\n", tname, off);
+ return -EINVAL;
+}
+
+static int __btf_resolve_helper_id(struct bpf_verifier_log *log, void *fn,
+ int arg)
+{
+ char fnname[KSYM_SYMBOL_LEN + 4] = "btf_";
+ const struct btf_param *args;
+ const struct btf_type *t;
+ const char *tname, *sym;
+ u32 btf_id, i;
+
+ if (IS_ERR(btf_vmlinux)) {
+ bpf_log(log, "btf_vmlinux is malformed\n");
+ return -EINVAL;
+ }
+
+ sym = kallsyms_lookup((long)fn, NULL, NULL, NULL, fnname + 4);
+ if (!sym) {
+ bpf_log(log, "kernel doesn't have kallsyms\n");
+ return -EFAULT;
+ }
+
+ for (i = 1; i <= btf_vmlinux->nr_types; i++) {
+ t = btf_type_by_id(btf_vmlinux, i);
+ if (BTF_INFO_KIND(t->info) != BTF_KIND_TYPEDEF)
+ continue;
+ tname = __btf_name_by_offset(btf_vmlinux, t->name_off);
+ if (!strcmp(tname, fnname))
+ break;
+ }
+ if (i > btf_vmlinux->nr_types) {
+ bpf_log(log, "helper %s type is not found\n", fnname);
+ return -ENOENT;
+ }
+
+ t = btf_type_by_id(btf_vmlinux, t->type);
+ if (!btf_type_is_ptr(t))
+ return -EFAULT;
+ t = btf_type_by_id(btf_vmlinux, t->type);
+ if (!btf_type_is_func_proto(t))
+ return -EFAULT;
+
+ args = (const struct btf_param *)(t + 1);
+ if (arg >= btf_type_vlen(t)) {
+ bpf_log(log, "bpf helper %s doesn't have %d-th argument\n",
+ fnname, arg);
+ return -EINVAL;
+ }
+
+ t = btf_type_by_id(btf_vmlinux, args[arg].type);
+ if (!btf_type_is_ptr(t) || !t->type) {
+ /* anything but the pointer to struct is a helper config bug */
+ bpf_log(log, "ARG_PTR_TO_BTF is misconfigured\n");
+ return -EFAULT;
+ }
+ btf_id = t->type;</