summaryrefslogtreecommitdiffstats
path: root/kernel/bpf
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/bpf')
-rw-r--r--kernel/bpf/arraymap.c53
-rw-r--r--kernel/bpf/btf.c419
-rw-r--r--kernel/bpf/cgroup.c370
-rw-r--r--kernel/bpf/core.c14
-rw-r--r--kernel/bpf/cpumap.c53
-rw-r--r--kernel/bpf/disasm.c5
-rw-r--r--kernel/bpf/hashtab.c6
-rw-r--r--kernel/bpf/helpers.c131
-rw-r--r--kernel/bpf/local_storage.c6
-rw-r--r--kernel/bpf/lpm_trie.c3
-rw-r--r--kernel/bpf/queue_stack_maps.c6
-rw-r--r--kernel/bpf/syscall.c164
-rw-r--r--kernel/bpf/verifier.c618
13 files changed, 1619 insertions, 229 deletions
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index c72e0d8e1e65..584636c9e2eb 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -22,7 +22,7 @@
#include "map_in_map.h"
#define ARRAY_CREATE_FLAG_MASK \
- (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
+ (BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK)
static void bpf_array_free_percpu(struct bpf_array *array)
{
@@ -63,6 +63,7 @@ int array_map_alloc_check(union bpf_attr *attr)
if (attr->max_entries == 0 || attr->key_size != 4 ||
attr->value_size == 0 ||
attr->map_flags & ~ARRAY_CREATE_FLAG_MASK ||
+ !bpf_map_flags_access_ok(attr->map_flags) ||
(percpu && numa_node != NUMA_NO_NODE))
return -EINVAL;
@@ -160,6 +161,36 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key)
return array->value + array->elem_size * (index & array->index_mask);
}
+static int array_map_direct_value_addr(const struct bpf_map *map, u64 *imm,
+ u32 off)
+{
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
+
+ if (map->max_entries != 1)
+ return -ENOTSUPP;
+ if (off >= map->value_size)
+ return -EINVAL;
+
+ *imm = (unsigned long)array->value;
+ return 0;
+}
+
+static int array_map_direct_value_meta(const struct bpf_map *map, u64 imm,
+ u32 *off)
+{
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
+ u64 base = (unsigned long)array->value;
+ u64 range = array->elem_size;
+
+ if (map->max_entries != 1)
+ return -ENOTSUPP;
+ if (imm < base || imm >= base + range)
+ return -ENOENT;
+
+ *off = imm - base;
+ return 0;
+}
+
/* emit BPF instructions equivalent to C code of array_map_lookup_elem() */
static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
{
@@ -360,7 +391,8 @@ static void array_map_seq_show_elem(struct bpf_map *map, void *key,
return;
}
- seq_printf(m, "%u: ", *(u32 *)key);
+ if (map->btf_key_type_id)
+ seq_printf(m, "%u: ", *(u32 *)key);
btf_type_seq_show(map->btf, map->btf_value_type_id, value, m);
seq_puts(m, "\n");
@@ -397,6 +429,18 @@ static int array_map_check_btf(const struct bpf_map *map,
{
u32 int_data;
+ /* One exception for keyless BTF: .bss/.data/.rodata map */
+ if (btf_type_is_void(key_type)) {
+ if (map->map_type != BPF_MAP_TYPE_ARRAY ||
+ map->max_entries != 1)
+ return -EINVAL;
+
+ if (BTF_INFO_KIND(value_type->info) != BTF_KIND_DATASEC)
+ return -EINVAL;
+
+ return 0;
+ }
+
if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
return -EINVAL;
@@ -419,6 +463,8 @@ const struct bpf_map_ops array_map_ops = {
.map_update_elem = array_map_update_elem,
.map_delete_elem = array_map_delete_elem,
.map_gen_lookup = array_map_gen_lookup,
+ .map_direct_value_addr = array_map_direct_value_addr,
+ .map_direct_value_meta = array_map_direct_value_meta,
.map_seq_show_elem = array_map_seq_show_elem,
.map_check_btf = array_map_check_btf,
};
@@ -440,6 +486,9 @@ static int fd_array_map_alloc_check(union bpf_attr *attr)
/* only file descriptors can be stored in this type of map */
if (attr->value_size != sizeof(u32))
return -EINVAL;
+ /* Program read-only/write-only not supported for special maps yet. */
+ if (attr->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG))
+ return -EINVAL;
return array_map_alloc_check(attr);
}
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index bd3921b1514b..cad09858a5f2 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -185,6 +185,16 @@
i < btf_type_vlen(struct_type); \
i++, member++)
+#define for_each_vsi(i, struct_type, member) \
+ for (i = 0, member = btf_type_var_secinfo(struct_type); \
+ i < btf_type_vlen(struct_type); \
+ i++, member++)
+
+#define for_each_vsi_from(i, from, struct_type, member) \
+ for (i = from, member = btf_type_var_secinfo(struct_type) + from; \
+ i < btf_type_vlen(struct_type); \
+ i++, member++)
+
static DEFINE_IDR(btf_idr);
static DEFINE_SPINLOCK(btf_idr_lock);
@@ -262,6 +272,8 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = {
[BTF_KIND_RESTRICT] = "RESTRICT",
[BTF_KIND_FUNC] = "FUNC",
[BTF_KIND_FUNC_PROTO] = "FUNC_PROTO",
+ [BTF_KIND_VAR] = "VAR",
+ [BTF_KIND_DATASEC] = "DATASEC",
};
struct btf_kind_operations {
@@ -314,7 +326,7 @@ static bool btf_type_is_modifier(const struct btf_type *t)
return false;
}
-static bool btf_type_is_void(const struct btf_type *t)
+bool btf_type_is_void(const struct btf_type *t)
{
return t == &btf_void;
}
@@ -375,13 +387,36 @@ static bool btf_type_is_int(const struct btf_type *t)
return BTF_INFO_KIND(t->info) == BTF_KIND_INT;
}
+static bool btf_type_is_var(const struct btf_type *t)
+{
+ return BTF_INFO_KIND(t->info) == BTF_KIND_VAR;
+}
+
+static bool btf_type_is_datasec(const struct btf_type *t)
+{
+ return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC;
+}
+
+/* Types that act only as a source, not sink or intermediate
+ * type when resolving.
+ */
+static bool btf_type_is_resolve_source_only(const struct btf_type *t)
+{
+ return btf_type_is_var(t) ||
+ btf_type_is_datasec(t);
+}
+
/* What types need to be resolved?
*
* btf_type_is_modifier() is an obvious one.
*
* btf_type_is_struct() because its member refers to
* another type (through member->type).
-
+ *
+ * btf_type_is_var() because the variable refers to
+ * another type. btf_type_is_datasec() holds multiple
+ * btf_type_is_var() types that need resolving.
+ *
* btf_type_is_array() because its element (array->type)
* refers to another type. Array can be thought of a
* special case of struct while array just has the same
@@ -390,9 +425,11 @@ static bool btf_type_is_int(const struct btf_type *t)
static bool btf_type_needs_resolve(const struct btf_type *t)
{
return btf_type_is_modifier(t) ||
- btf_type_is_ptr(t) ||
- btf_type_is_struct(t) ||
- btf_type_is_array(t);
+ btf_type_is_ptr(t) ||
+ btf_type_is_struct(t) ||
+ btf_type_is_array(t) ||
+ btf_type_is_var(t) ||
+ btf_type_is_datasec(t);
}
/* t->size can be used */
@@ -403,6 +440,7 @@ static bool btf_type_has_size(const struct btf_type *t)
case BTF_KIND_STRUCT:
case BTF_KIND_UNION:
case BTF_KIND_ENUM:
+ case BTF_KIND_DATASEC:
return true;
}
@@ -467,6 +505,16 @@ static const struct btf_enum *btf_type_enum(const struct btf_type *t)
return (const struct btf_enum *)(t + 1);
}
+static const struct btf_var *btf_type_var(const struct btf_type *t)
+{
+ return (const struct btf_var *)(t + 1);
+}
+
+static const struct btf_var_secinfo *btf_type_var_secinfo(const struct btf_type *t)
+{
+ return (const struct btf_var_secinfo *)(t + 1);
+}
+
static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t)
{
return kind_ops[BTF_INFO_KIND(t->info)];
@@ -478,23 +526,31 @@ static bool btf_name_offset_valid(const struct btf *btf, u32 offset)
offset < btf->hdr.str_len;
}
-/* Only C-style identifier is permitted. This can be relaxed if
- * necessary.
- */
-static bool btf_name_valid_identifier(const struct btf *btf, u32 offset)
+static bool __btf_name_char_ok(char c, bool first, bool dot_ok)
+{
+ if ((first ? !isalpha(c) :
+ !isalnum(c)) &&
+ c != '_' &&
+ ((c == '.' && !dot_ok) ||
+ c != '.'))
+ return false;
+ return true;
+}
+
+static bool __btf_name_valid(const struct btf *btf, u32 offset, bool dot_ok)
{
/* offset must be valid */
const char *src = &btf->strings[offset];
const char *src_limit;
- if (!isalpha(*src) && *src != '_')
+ if (!__btf_name_char_ok(*src, true, dot_ok))
return false;
/* set a limit on identifier length */
src_limit = src + KSYM_NAME_LEN;
src++;
while (*src && src < src_limit) {
- if (!isalnum(*src) && *src != '_')
+ if (!__btf_name_char_ok(*src, false, dot_ok))
return false;
src++;
}
@@ -502,6 +558,19 @@ static bool btf_name_valid_identifier(const struct btf *btf, u32 offset)
return !*src;
}
+/* Only C-style identifier is permitted. This can be relaxed if
+ * necessary.
+ */
+static bool btf_name_valid_identifier(const struct btf *btf, u32 offset)
+{
+ return __btf_name_valid(btf, offset, false);
+}
+
+static bool btf_name_valid_section(const struct btf *btf, u32 offset)
+{
+ return __btf_name_valid(btf, offset, true);
+}
+
static const char *__btf_name_by_offset(const struct btf *btf, u32 offset)
{
if (!offset)
@@ -697,6 +766,32 @@ static void btf_verifier_log_member(struct btf_verifier_env *env,
__btf_verifier_log(log, "\n");
}
+__printf(4, 5)
+static void btf_verifier_log_vsi(struct btf_verifier_env *env,
+ const struct btf_type *datasec_type,
+ const struct btf_var_secinfo *vsi,
+ const char *fmt, ...)
+{
+ struct bpf_verifier_log *log = &env->log;
+ va_list args;
+
+ if (!bpf_verifier_log_needed(log))
+ return;
+ if (env->phase != CHECK_META)
+ btf_verifier_log_type(env, datasec_type, NULL);
+
+ __btf_verifier_log(log, "\t type_id=%u offset=%u size=%u",
+ vsi->type, vsi->offset, vsi->size);
+ if (fmt && *fmt) {
+ __btf_verifier_log(log, " ");
+ va_start(args, fmt);
+ bpf_verifier_vlog(log, fmt, args);
+ va_end(args);
+ }
+
+ __btf_verifier_log(log, "\n");
+}
+
static void btf_verifier_log_hdr(struct btf_verifier_env *env,
u32 btf_data_size)
{
@@ -974,7 +1069,8 @@ const struct btf_type *btf_type_id_size(const struct btf *btf,
} else if (btf_type_is_ptr(size_type)) {
size = sizeof(void *);
} else {
- if (WARN_ON_ONCE(!btf_type_is_modifier(size_type)))
+ if (WARN_ON_ONCE(!btf_type_is_modifier(size_type) &&
+ !btf_type_is_var(size_type)))
return NULL;
size = btf->resolved_sizes[size_type_id];
@@ -1509,7 +1605,7 @@ static int btf_modifier_resolve(struct btf_verifier_env *env,
u32 next_type_size = 0;
next_type = btf_type_by_id(btf, next_type_id);
- if (!next_type) {
+ if (!next_type || btf_type_is_resolve_source_only(next_type)) {
btf_verifier_log_type(env, v->t, "Invalid type_id");
return -EINVAL;
}
@@ -1542,6 +1638,53 @@ static int btf_modifier_resolve(struct btf_verifier_env *env,
return 0;
}
+static int btf_var_resolve(struct btf_verifier_env *env,
+ const struct resolve_vertex *v)
+{
+ const struct btf_type *next_type;
+ const struct btf_type *t = v->t;
+ u32 next_type_id = t->type;
+ struct btf *btf = env->btf;
+ u32 next_type_size;
+
+ next_type = btf_type_by_id(btf, next_type_id);
+ if (!next_type || btf_type_is_resolve_source_only(next_type)) {
+ btf_verifier_log_type(env, v->t, "Invalid type_id");
+ return -EINVAL;
+ }
+
+ if (!env_type_is_resolve_sink(env, next_type) &&
+ !env_type_is_resolved(env, next_type_id))
+ return env_stack_push(env, next_type, next_type_id);
+
+ if (btf_type_is_modifier(next_type)) {
+ const struct btf_type *resolved_type;
+ u32 resolved_type_id;
+
+ resolved_type_id = next_type_id;
+ resolved_type = btf_type_id_resolve(btf, &resolved_type_id);
+
+ if (btf_type_is_ptr(resolved_type) &&
+ !env_type_is_resolve_sink(env, resolved_type) &&
+ !env_type_is_resolved(env, resolved_type_id))
+ return env_stack_push(env, resolved_type,
+ resolved_type_id);
+ }
+
+ /* We must resolve to something concrete at this point, no
+ * forward types or similar that would resolve to size of
+ * zero is allowed.
+ */
+ if (!btf_type_id_size(btf, &next_type_id, &next_type_size)) {
+ btf_verifier_log_type(env, v->t, "Invalid type_id");
+ return -EINVAL;
+ }
+
+ env_stack_pop_resolved(env, next_type_id, next_type_size);
+
+ return 0;
+}
+
static int btf_ptr_resolve(struct btf_verifier_env *env,
const struct resolve_vertex *v)
{
@@ -1551,7 +1694,7 @@ static int btf_ptr_resolve(struct btf_verifier_env *env,
struct btf *btf = env->btf;
next_type = btf_type_by_id(btf, next_type_id);
- if (!next_type) {
+ if (!next_type || btf_type_is_resolve_source_only(next_type)) {
btf_verifier_log_type(env, v->t, "Invalid type_id");
return -EINVAL;
}
@@ -1609,6 +1752,15 @@ static void btf_modifier_seq_show(const struct btf *btf,
btf_type_ops(t)->seq_show(btf, t, type_id, data, bits_offset, m);
}
+static void btf_var_seq_show(const struct btf *btf, const struct btf_type *t,
+ u32 type_id, void *data, u8 bits_offset,
+ struct seq_file *m)
+{
+ t = btf_type_id_resolve(btf, &type_id);
+
+ btf_type_ops(t)->seq_show(btf, t, type_id, data, bits_offset, m);
+}
+
static void btf_ptr_seq_show(const struct btf *btf, const struct btf_type *t,
u32 type_id, void *data, u8 bits_offset,
struct seq_file *m)
@@ -1776,7 +1928,8 @@ static int btf_array_resolve(struct btf_verifier_env *env,
/* Check array->index_type */
index_type_id = array->index_type;
index_type = btf_type_by_id(btf, index_type_id);
- if (btf_type_nosize_or_null(index_type)) {
+ if (btf_type_is_resolve_source_only(index_type) ||
+ btf_type_nosize_or_null(index_type)) {
btf_verifier_log_type(env, v->t, "Invalid index");
return -EINVAL;
}
@@ -1795,7 +1948,8 @@ static int btf_array_resolve(struct btf_verifier_env *env,
/* Check array->type */
elem_type_id = array->type;
elem_type = btf_type_by_id(btf, elem_type_id);
- if (btf_type_nosize_or_null(elem_type)) {
+ if (btf_type_is_resolve_source_only(elem_type) ||
+ btf_type_nosize_or_null(elem_type)) {
btf_verifier_log_type(env, v->t,
"Invalid elem");
return -EINVAL;
@@ -2016,7 +2170,8 @@ static int btf_struct_resolve(struct btf_verifier_env *env,
const struct btf_type *member_type = btf_type_by_id(env->btf,
member_type_id);
- if (btf_type_nosize_or_null(member_type)) {
+ if (btf_type_is_resolve_source_only(member_type) ||
+ btf_type_nosize_or_null(member_type)) {
btf_verifier_log_member(env, v->t, member,
"Invalid member");
return -EINVAL;
@@ -2411,6 +2566,222 @@ static struct btf_kind_operations func_ops = {
.seq_show = btf_df_seq_show,
};
+static s32 btf_var_check_meta(struct btf_verifier_env *env,
+ const struct btf_type *t,
+ u32 meta_left)
+{
+ const struct btf_var *var;
+ u32 meta_needed = sizeof(*var);
+
+ if (meta_left < meta_needed) {
+ btf_verifier_log_basic(env, t,
+ "meta_left:%u meta_needed:%u",
+ meta_left, meta_needed);
+ return -EINVAL;
+ }
+
+ if (btf_type_vlen(t)) {
+ btf_verifier_log_type(env, t, "vlen != 0");
+ return -EINVAL;
+ }
+
+ if (btf_type_kflag(t)) {
+ btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
+ return -EINVAL;
+ }
+
+ if (!t->name_off ||
+ !__btf_name_valid(env->btf, t->name_off, true)) {
+ btf_verifier_log_type(env, t, "Invalid name");
+ return -EINVAL;
+ }
+
+ /* A var cannot be in type void */
+ if (!t->type || !BTF_TYPE_ID_VALID(t->type)) {
+ btf_verifier_log_type(env, t, "Invalid type_id");
+ return -EINVAL;
+ }
+
+ var = btf_type_var(t);
+ if (var->linkage != BTF_VAR_STATIC &&
+ var->linkage != BTF_VAR_GLOBAL_ALLOCATED) {
+ btf_verifier_log_type(env, t, "Linkage not supported");
+ return -EINVAL;
+ }
+
+ btf_verifier_log_type(env, t, NULL);
+
+ return meta_needed;
+}
+
+static void btf_var_log(struct btf_verifier_env *env, const struct btf_type *t)
+{
+ const struct btf_var *var = btf_type_var(t);
+
+ btf_verifier_log(env, "type_id=%u linkage=%u", t->type, var->linkage);
+}
+
+static const struct btf_kind_operations var_ops = {
+ .check_meta = btf_var_check_meta,
+ .resolve = btf_var_resolve,
+ .check_member = btf_df_check_member,
+ .check_kflag_member = btf_df_check_kflag_member,
+ .log_details = btf_var_log,
+ .seq_show = btf_var_seq_show,
+};
+
+static s32 btf_datasec_check_meta(struct btf_verifier_env *env,
+ const struct btf_type *t,
+ u32 meta_left)
+{
+ const struct btf_var_secinfo *vsi;
+ u64 last_vsi_end_off = 0, sum = 0;
+ u32 i, meta_needed;
+
+ meta_needed = btf_type_vlen(t) * sizeof(*vsi);
+ if (meta_left < meta_needed) {
+ btf_verifier_log_basic(env, t,
+ "meta_left:%u meta_needed:%u",
+ meta_left, meta_needed);
+ return -EINVAL;
+ }
+
+ if (!btf_type_vlen(t)) {
+ btf_verifier_log_type(env, t, "vlen == 0");
+ return -EINVAL;
+ }
+
+ if (!t->size) {
+ btf_verifier_log_type(env, t, "size == 0");
+ return -EINVAL;
+ }
+
+ if (btf_type_kflag(t)) {
+ btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
+ return -EINVAL;
+ }
+
+ if (!t->name_off ||
+ !btf_name_valid_section(env->btf, t->name_off)) {
+ btf_verifier_log_type(env, t, "Invalid name");
+ return -EINVAL;
+ }
+
+ btf_verifier_log_type(env, t, NULL);
+
+ for_each_vsi(i, t, vsi) {
+ /* A var cannot be in type void */
+ if (!vsi->type || !BTF_TYPE_ID_VALID(vsi->type)) {
+ btf_verifier_log_vsi(env, t, vsi,
+ "Invalid type_id");
+ return -EINVAL;
+ }
+
+ if (vsi->offset < last_vsi_end_off || vsi->offset >= t->size) {
+ btf_verifier_log_vsi(env, t, vsi,
+ "Invalid offset");
+ return -EINVAL;
+ }
+
+ if (!vsi->size || vsi->size > t->size) {
+ btf_verifier_log_vsi(env, t, vsi,
+ "Invalid size");
+ return -EINVAL;
+ }
+
+ last_vsi_end_off = vsi->offset + vsi->size;
+ if (last_vsi_end_off > t->size) {
+ btf_verifier_log_vsi(env, t, vsi,
+ "Invalid offset+size");
+ return -EINVAL;
+ }
+
+ btf_verifier_log_vsi(env, t, vsi, NULL);
+ sum += vsi->size;
+ }
+
+ if (t->size < sum) {
+ btf_verifier_log_type(env, t, "Invalid btf_info size");
+ return -EINVAL;
+ }
+
+ return meta_needed;
+}
+
+static int btf_datasec_resolve(struct btf_verifier_env *env,
+ const struct resolve_vertex *v)
+{
+ const struct btf_var_secinfo *vsi;
+ struct btf *btf = env->btf;
+ u16 i;
+
+ for_each_vsi_from(i, v->next_member, v->t, vsi) {
+ u32 var_type_id = vsi->type, type_id, type_size = 0;
+ const struct btf_type *var_type = btf_type_by_id(env->btf,
+ var_type_id);
+ if (!var_type || !btf_type_is_var(var_type)) {
+ btf_verifier_log_vsi(env, v->t, vsi,
+ "Not a VAR kind member");
+ return -EINVAL;
+ }
+
+ if (!env_type_is_resolve_sink(env, var_type) &&
+ !env_type_is_resolved(env, var_type_id)) {
+ env_stack_set_next_member(env, i + 1);
+ return env_stack_push(env, var_type, var_type_id);
+ }
+
+ type_id = var_type->type;
+ if (!btf_type_id_size(btf, &type_id, &type_size)) {
+ btf_verifier_log_vsi(env, v->t, vsi, "Invalid type");
+ return -EINVAL;
+ }
+
+ if (vsi->size < type_size) {
+ btf_verifier_log_vsi(env, v->t, vsi, "Invalid size");
+ return -EINVAL;
+ }
+ }
+
+ env_stack_pop_resolved(env, 0, 0);
+ return 0;
+}
+
+static void btf_datasec_log(struct btf_verifier_env *env,
+ const struct btf_type *t)
+{
+ btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t));
+}
+
+static void btf_datasec_seq_show(const struct btf *btf,
+ const struct btf_type *t, u32 type_id,
+ void *data, u8 bits_offset,
+ struct seq_file *m)
+{
+ const struct btf_var_secinfo *vsi;
+ const struct btf_type *var;
+ u32 i;
+
+ seq_printf(m, "section (\"%s\") = {", __btf_name_by_offset(btf, t->name_off));
+ for_each_vsi(i, t, vsi) {
+ var = btf_type_by_id(btf, vsi->type);
+ if (i)
+ seq_puts(m, ",");
+ btf_type_ops(var)->seq_show(btf, var, vsi->type,
+ data + vsi->offset, bits_offset, m);
+ }
+ seq_puts(m, "}");
+}
+
+static const struct btf_kind_operations datasec_ops = {
+ .check_meta = btf_datasec_check_meta,
+ .resolve = btf_datasec_resolve,
+ .check_member = btf_df_check_member,
+ .check_kflag_member = btf_df_check_kflag_member,
+ .log_details = btf_datasec_log,
+ .seq_show = btf_datasec_seq_show,
+};
+
static int btf_func_proto_check(struct btf_verifier_env *env,
const struct btf_type *t)
{
@@ -2542,6 +2913,8 @@ static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS] = {
[BTF_KIND_RESTRICT] = &modifier_ops,
[BTF_KIND_FUNC] = &func_ops,
[BTF_KIND_FUNC_PROTO] = &func_proto_ops,
+ [BTF_KIND_VAR] = &var_ops,
+ [BTF_KIND_DATASEC] = &datasec_ops,
};
static s32 btf_check_meta(struct btf_verifier_env *env,
@@ -2622,13 +2995,17 @@ static bool btf_resolve_valid(struct btf_verifier_env *env,
if (!env_type_is_resolved(env, type_id))
return false;
- if (btf_type_is_struct(t))
+ if (btf_type_is_struct(t) || btf_type_is_datasec(t))
return !btf->resolved_ids[type_id] &&
- !btf->resolved_sizes[type_id];
+ !btf->resolved_sizes[type_id];
- if (btf_type_is_modifier(t) || btf_type_is_ptr(t)) {
+ if (btf_type_is_modifier(t) || btf_type_is_ptr(t) ||
+ btf_type_is_var(t)) {
t = btf_type_id_resolve(btf, &type_id);
- return t && !btf_type_is_modifier(t);
+ return t &&
+ !btf_type_is_modifier(t) &&
+ !btf_type_is_var(t) &&
+ !btf_type_is_datasec(t);
}
if (btf_type_is_array(t)) {
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 4e807973aa80..fcde0f7b2585 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -11,7 +11,10 @@
#include <linux/kernel.h>
#include <linux/atomic.h>
#include <linux/cgroup.h>
+#include <linux/filter.h>
#include <linux/slab.h>
+#include <linux/sysctl.h>
+#include <linux/string.h>
#include <linux/bpf.h>
#include <linux/bpf-cgroup.h>
#include <net/sock.h>
@@ -701,7 +704,7 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
EXPORT_SYMBOL(__cgroup_bpf_check_dev_permission);
static const struct bpf_func_proto *
-cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+cgroup_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_map_lookup_elem:
@@ -710,6 +713,12 @@ cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_map_update_elem_proto;
case BPF_FUNC_map_delete_elem:
return &bpf_map_delete_elem_proto;
+ case BPF_FUNC_map_push_elem:
+ return &bpf_map_push_elem_proto;
+ case BPF_FUNC_map_pop_elem:
+ return &bpf_map_pop_elem_proto;
+ case BPF_FUNC_map_peek_elem:
+ return &bpf_map_peek_elem_proto;
case BPF_FUNC_get_current_uid_gid:
return &bpf_get_current_uid_gid_proto;
case BPF_FUNC_get_local_storage:
@@ -725,6 +734,12 @@ cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
}
}
+static const struct bpf_func_proto *
+cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+ return cgroup_base_func_proto(func_id, prog);
+}
+
static bool cgroup_dev_is_valid_access(int off, int size,
enum bpf_access_type type,
const struct bpf_prog *prog,
@@ -762,3 +777,356 @@ const struct bpf_verifier_ops cg_dev_verifier_ops = {
.get_func_proto = cgroup_dev_func_proto,
.is_valid_access = cgroup_dev_is_valid_access,
};
+
+/**
+ * __cgroup_bpf_run_filter_sysctl - Run a program on sysctl
+ *
+ * @head: sysctl table header
+ * @table: sysctl table
+ * @write: sysctl is being read (= 0) or written (= 1)
+ * @buf: pointer to buffer passed by user space
+ * @pcount: value-result argument: value is size of buffer pointed to by @buf,
+ * result is size of @new_buf if program set new value, initial value
+ * otherwise
+ * @ppos: value-result argument: value is position at which read from or write
+ * to sysctl is happening, result is new position if program overrode it,
+ * initial value otherwise
+ * @new_buf: pointer to pointer to new buffer that will be allocated if program
+ * overrides new value provided by user space on sysctl write
+ * NOTE: it's caller responsibility to free *new_buf if it was set
+ * @type: type of program to be executed
+ *
+ * Program is run when sysctl is being accessed, either read or written, and
+ * can allow or deny such access.
+ *
+ * This function will return %-EPERM if an attached program is found and
+ * returned value != 1 during execution. In all other cases 0 is returned.
+ */
+int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
+ struct ctl_table *table, int write,
+ void __user *buf, size_t *pcount,
+ loff_t *ppos, void **new_buf,
+ enum bpf_attach_type type)
+{
+ struct bpf_sysctl_kern ctx = {
+ .head = head,
+ .table = table,
+ .write = write,
+ .ppos = ppos,
+ .cur_val = NULL,
+ .cur_len = PAGE_SIZE,
+ .new_val = NULL,
+ .new_len = 0,
+ .new_updated = 0,
+ };
+ struct cgroup *cgrp;
+ int ret;
+
+ ctx.cur_val = kmalloc_track_caller(ctx.cur_len, GFP_KERNEL);
+ if (ctx.cur_val) {
+ mm_segment_t old_fs;
+ loff_t pos = 0;
+
+ old_fs = get_fs();
+ set_fs(KERNEL_DS);
+ if (table->proc_handler(table, 0, (void __user *)ctx.cur_val,
+ &ctx.cur_len, &pos)) {
+ /* Let BPF program decide how to proceed. */
+ ctx.cur_len = 0;
+ }
+ set_fs(old_fs);
+ } else {
+ /* Let BPF program decide how to proceed. */
+ ctx.cur_len = 0;
+ }
+
+ if (write && buf && *pcount) {
+ /* BPF program should be able to override new value with a
+ * buffer bigger than provided by user.
+ */
+ ctx.new_val = kmalloc_track_caller(PAGE_SIZE, GFP_KERNEL);
+ ctx.new_len = min_t(size_t, PAGE_SIZE, *pcount);
+ if (!ctx.new_val ||
+ copy_from_user(ctx.new_val, buf, ctx.new_len))
+ /* Let BPF program decide how to proceed. */
+ ctx.new_len = 0;
+ }
+
+ rcu_read_lock();
+ cgrp = task_dfl_cgroup(current);
+ ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN);
+ rcu_read_unlock();
+
+ kfree(ctx.cur_val);
+
+ if (ret == 1 && ctx.new_updated) {
+ *new_buf = ctx.new_val;
+ *pcount = ctx.new_len;
+ } else {
+ kfree(ctx.new_val);
+ }
+
+ return ret == 1 ? 0 : -EPERM;
+}
+EXPORT_SYMBOL(__cgroup_bpf_run_filter_sysctl);
+
+static ssize_t sysctl_cpy_dir(const struct ctl_dir *dir, char **bufp,
+ size_t *lenp)
+{
+ ssize_t tmp_ret = 0, ret;
+
+ if (dir->header.parent) {
+ tmp_ret = sysctl_cpy_dir(dir->header.parent, bufp, lenp);
+ if (tmp_ret < 0)
+ return tmp_ret;
+ }
+
+ ret = strscpy(*bufp, dir->header.ctl_table[0].procname, *lenp);
+ if (ret < 0)
+ return ret;
+ *bufp += ret;
+ *lenp -= ret;
+ ret += tmp_ret;
+
+ /* Avoid leading slash. */
+ if (!ret)
+ return ret;
+
+ tmp_ret = strscpy(*bufp, "/", *lenp);
+ if (tmp_ret < 0)
+ return tmp_ret;
+ *bufp += tmp_ret;
+ *lenp -= tmp_ret;
+
+ return ret + tmp_ret;
+}
+
+BPF_CALL_4(bpf_sysctl_get_name, struct bpf_sysctl_kern *, ctx, char *, buf,
+ size_t, buf_len, u64, flags)
+{
+ ssize_t tmp_ret = 0, ret;
+
+ if (!buf)
+ return -EINVAL;
+
+ if (!(flags & BPF_F_SYSCTL_BASE_NAME)) {
+ if (!ctx->head)
+ return -EINVAL;
+ tmp_ret = sysctl_cpy_dir(ctx->head->parent, &buf, &buf_len);
+ if (tmp_ret < 0)
+ return tmp_ret;
+ }
+
+ ret = strscpy(buf, ctx->table->procname, buf_len);
+
+ return ret < 0 ? ret : tmp_ret + ret;
+}
+
+static const struct bpf_func_proto bpf_sysctl_get_name_proto = {
+ .func = bpf_sysctl_get_name,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+};
+
+static int copy_sysctl_value(char *dst, size_t dst_len, char *src,
+ size_t src_len)
+{
+ if (!dst)
+ return -EINVAL;
+
+ if (!dst_len)
+ return -E2BIG;
+
+ if (!src || !src_len) {
+ memset(dst, 0, dst_len);
+ return -EINVAL;
+ }
+
+ memcpy(dst, src, min(dst_len, src_len));
+
+ if (dst_len > src_len) {
+ memset(dst + src_len, '\0', dst_len - src_len);
+ return src_len;
+ }
+
+ dst[dst_len - 1] = '\0';
+
+ return -E2BIG;
+}
+
+BPF_CALL_3(bpf_sysctl_get_current_value, struct bpf_sysctl_kern *, ctx,
+ char *, buf, size_t, buf_len)
+{
+ return copy_sysctl_value(buf, buf_len, ctx->cur_val, ctx->cur_len);
+}
+
+static const struct bpf_func_proto bpf_sysctl_get_current_value_proto = {
+ .func = bpf_sysctl_get_current_value,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+};
+
+BPF_CALL_3(bpf_sysctl_get_new_value, struct bpf_sysctl_kern *, ctx, char *, buf,
+ size_t, buf_len)
+{
+ if (!ctx->write) {
+ if (buf && buf_len)
+ memset(buf, '\0', buf_len);
+ return -EINVAL;
+ }
+ return copy_sysctl_value(buf, buf_len, ctx->new_val, ctx->new_len);
+}
+
+static const struct bpf_func_proto bpf_sysctl_get_new_value_proto = {
+ .func = bpf_sysctl_get_new_value,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+};
+
+BPF_CALL_3(bpf_sysctl_set_new_value, struct bpf_sysctl_kern *, ctx,
+ const char *, buf, size_t, buf_len)
+{
+ if (!ctx->write || !ctx->new_val || !ctx->new_len || !buf || !buf_len)
+ return -EINVAL;
+
+ if (buf_len > PAGE_SIZE - 1)
+ return -E2BIG;
+
+ memcpy(ctx->new_val, buf, buf_len);
+ ctx->new_len = buf_len;
+ ctx->new_updated = 1;
+
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_sysctl_set_new_value_proto = {
+ .func = bpf_sysctl_set_new_value,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+};
+
+static const struct bpf_func_proto *
+sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+ switch (func_id) {
+ case BPF_FUNC_strtol:
+ return &bpf_strtol_proto;
+ case BPF_FUNC_strtoul:
+ return &bpf_strtoul_proto;
+ case BPF_FUNC_sysctl_get_name:
+ return &bpf_sysctl_get_name_proto;
+ case BPF_FUNC_sysctl_get_current_value:
+ return &bpf_sysctl_get_current_value_proto;
+ case BPF_FUNC_sysctl_get_new_value:
+ return &bpf_sysctl_get_new_value_proto;
+ case BPF_FUNC_sysctl_set_new_value:
+ return &bpf_sysctl_set_new_value_proto;
+ default:
+ return cgroup_base_func_proto(func_id, prog);
+ }
+}
+
+static bool sysctl_is_valid_access(int off, int size, enum bpf_access_type type,
+ const struct bpf_prog *prog,
+ struct bpf_insn_access_aux *info)
+{
+ const int size_default = sizeof(__u32);
+
+ if (off < 0 || off + size > sizeof(struct bpf_sysctl) || off % size)
+ return false;
+
+ switch (off) {
+ case offsetof(struct bpf_sysctl, write):
+ if (type != BPF_READ)
+ return false;
+ bpf_ctx_record_field_size(info, size_default);
+ return bpf_ctx_narrow_access_ok(off, size, size_default);