summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2020-03-13 20:52:03 -0700
committerDavid S. Miller <davem@davemloft.net>2020-03-13 20:52:03 -0700
commit44ef976ab3c4ccd6c886714e5349caa53c477010 (patch)
treefad7059aad1e1ac040e59a2f4870400bc8e9e30a /net
parent48f5d5cb80b4e414cb97dd7dba43b0370bdee130 (diff)
parent832165d225f71040a2c1fc2407752e462d00de1f (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says: ==================== pull-request: bpf-next 2020-03-13 The following pull-request contains BPF updates for your *net-next* tree. We've added 86 non-merge commits during the last 12 day(s) which contain a total of 107 files changed, 5771 insertions(+), 1700 deletions(-). The main changes are: 1) Add modify_return attach type which allows to attach to a function via BPF trampoline and is run after the fentry and before the fexit programs and can pass a return code to the original caller, from KP Singh. 2) Generalize BPF's kallsyms handling and add BPF trampoline and dispatcher objects to be visible in /proc/kallsyms so they can be annotated in stack traces, from Jiri Olsa. 3) Extend BPF sockmap to allow for UDP next to existing TCP support in order in order to enable this for BPF based socket dispatch, from Lorenz Bauer. 4) Introduce a new bpftool 'prog profile' command which attaches to existing BPF programs via fentry and fexit hooks and reads out hardware counters during that period, from Song Liu. Example usage: bpftool prog profile id 337 duration 3 cycles instructions llc_misses 4228 run_cnt 3403698 cycles (84.08%) 3525294 instructions # 1.04 insn per cycle (84.05%) 13 llc_misses # 3.69 LLC misses per million isns (83.50%) 5) Batch of improvements to libbpf, bpftool and BPF selftests. Also addition of a new bpf_link abstraction to keep in particular BPF tracing programs attached even when the applicaion owning them exits, from Andrii Nakryiko. 6) New bpf_get_current_pid_tgid() helper for tracing to perform PID filtering and which returns the PID as seen by the init namespace, from Carlos Neira. 7) Refactor of RISC-V JIT code to move out common pieces and addition of a new RV32G BPF JIT compiler, from Luke Nelson. 8) Add gso_size context member to __sk_buff in order to be able to know whether a given skb is GSO or not, from Willem de Bruijn. 9) Add a new bpf_xdp_output() helper which reuses XDP's existing perf RB output implementation but can be called from tracepoint programs, from Eelco Chaudron. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/bpf/test_run.c64
-rw-r--r--net/core/filter.c65
-rw-r--r--net/core/sock_map.c157
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/tcp_bpf.c114
-rw-r--r--net/ipv4/tcp_ulp.c7
-rw-r--r--net/ipv4/udp_bpf.c53
7 files changed, 301 insertions, 160 deletions
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 562443f94133..4c921f5154e0 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -10,6 +10,7 @@
#include <net/bpf_sk_storage.h>
#include <net/sock.h>
#include <net/tcp.h>
+#include <linux/error-injection.h>
#define CREATE_TRACE_POINTS
#include <trace/events/bpf_test_run.h>
@@ -143,6 +144,14 @@ int noinline bpf_fentry_test6(u64 a, void *b, short c, int d, void *e, u64 f)
return a + (long)b + c + d + (long)e + f;
}
+int noinline bpf_modify_return_test(int a, int *b)
+{
+ *b += 1;
+ return a + *b;
+}
+
+ALLOW_ERROR_INJECTION(bpf_modify_return_test, ERRNO);
+
static void *bpf_test_init(const union bpf_attr *kattr, u32 size,
u32 headroom, u32 tailroom)
{
@@ -160,18 +169,48 @@ static void *bpf_test_init(const union bpf_attr *kattr, u32 size,
kfree(data);
return ERR_PTR(-EFAULT);
}
- if (bpf_fentry_test1(1) != 2 ||
- bpf_fentry_test2(2, 3) != 5 ||
- bpf_fentry_test3(4, 5, 6) != 15 ||
- bpf_fentry_test4((void *)7, 8, 9, 10) != 34 ||
- bpf_fentry_test5(11, (void *)12, 13, 14, 15) != 65 ||
- bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111) {
- kfree(data);
- return ERR_PTR(-EFAULT);
- }
+
return data;
}
+int bpf_prog_test_run_tracing(struct bpf_prog *prog,
+ const union bpf_attr *kattr,
+ union bpf_attr __user *uattr)
+{
+ u16 side_effect = 0, ret = 0;
+ int b = 2, err = -EFAULT;
+ u32 retval = 0;
+
+ switch (prog->expected_attach_type) {
+ case BPF_TRACE_FENTRY:
+ case BPF_TRACE_FEXIT:
+ if (bpf_fentry_test1(1) != 2 ||
+ bpf_fentry_test2(2, 3) != 5 ||
+ bpf_fentry_test3(4, 5, 6) != 15 ||
+ bpf_fentry_test4((void *)7, 8, 9, 10) != 34 ||
+ bpf_fentry_test5(11, (void *)12, 13, 14, 15) != 65 ||
+ bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111)
+ goto out;
+ break;
+ case BPF_MODIFY_RETURN:
+ ret = bpf_modify_return_test(1, &b);
+ if (b != 2)
+ side_effect = 1;
+ break;
+ default:
+ goto out;
+ }
+
+ retval = ((u32)side_effect << 16) | ret;
+ if (copy_to_user(&uattr->test.retval, &retval, sizeof(retval)))
+ goto out;
+
+ err = 0;
+out:
+ trace_bpf_test_finish(&err);
+ return err;
+}
+
static void *bpf_ctx_init(const union bpf_attr *kattr, u32 max_size)
{
void __user *data_in = u64_to_user_ptr(kattr->test.ctx_in);
@@ -277,6 +316,12 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb)
/* gso_segs is allowed */
if (!range_is_zero(__skb, offsetofend(struct __sk_buff, gso_segs),
+ offsetof(struct __sk_buff, gso_size)))
+ return -EINVAL;
+
+ /* gso_size is allowed */
+
+ if (!range_is_zero(__skb, offsetofend(struct __sk_buff, gso_size),
sizeof(struct __sk_buff)))
return -EINVAL;
@@ -297,6 +342,7 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb)
if (__skb->gso_segs > GSO_MAX_SEGS)
return -EINVAL;
skb_shinfo(skb)->gso_segs = __skb->gso_segs;
+ skb_shinfo(skb)->gso_size = __skb->gso_size;
return 0;
}
diff --git a/net/core/filter.c b/net/core/filter.c
index 4a08c9fb2be7..96350a743539 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4061,7 +4061,8 @@ BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff *, xdp, struct bpf_map *, map,
if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
return -EINVAL;
- if (unlikely(xdp_size > (unsigned long)(xdp->data_end - xdp->data)))
+ if (unlikely(!xdp ||
+ xdp_size > (unsigned long)(xdp->data_end - xdp->data)))
return -EFAULT;
return bpf_event_output(map, flags, meta, meta_size, xdp->data,
@@ -4079,6 +4080,19 @@ static const struct bpf_func_proto bpf_xdp_event_output_proto = {
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
};
+static int bpf_xdp_output_btf_ids[5];
+const struct bpf_func_proto bpf_xdp_output_proto = {
+ .func = bpf_xdp_event_output,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .arg2_type = ARG_CONST_MAP_PTR,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_PTR_TO_MEM,
+ .arg5_type = ARG_CONST_SIZE_OR_ZERO,
+ .btf_id = bpf_xdp_output_btf_ids,
+};
+
BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb)
{
return skb->sk ? sock_gen_cookie(skb->sk) : 0;
@@ -7139,6 +7153,27 @@ static u32 flow_dissector_convert_ctx_access(enum bpf_access_type type,
return insn - insn_buf;
}
+static struct bpf_insn *bpf_convert_shinfo_access(const struct bpf_insn *si,
+ struct bpf_insn *insn)
+{
+ /* si->dst_reg = skb_shinfo(SKB); */
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end),
+ BPF_REG_AX, si->src_reg,
+ offsetof(struct sk_buff, end));
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, head),
+ si->dst_reg, si->src_reg,
+ offsetof(struct sk_buff, head));
+ *insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX);
+#else
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end),
+ si->dst_reg, si->src_reg,
+ offsetof(struct sk_buff, end));
+#endif
+
+ return insn;
+}
+
static u32 bpf_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si,
struct bpf_insn *insn_buf,
@@ -7461,26 +7496,21 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct __sk_buff, gso_segs):
- /* si->dst_reg = skb_shinfo(SKB); */
-#ifdef NET_SKBUFF_DATA_USES_OFFSET
- *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end),
- BPF_REG_AX, si->src_reg,
- offsetof(struct sk_buff, end));
- *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, head),
- si->dst_reg, si->src_reg,
- offsetof(struct sk_buff, head));
- *insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX);
-#else
- *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end),
- si->dst_reg, si->src_reg,
- offsetof(struct sk_buff, end));
-#endif
+ insn = bpf_convert_shinfo_access(si, insn);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct skb_shared_info, gso_segs),
si->dst_reg, si->dst_reg,
bpf_target_off(struct skb_shared_info,
gso_segs, 2,
target_size));
break;
+ case offsetof(struct __sk_buff, gso_size):
+ insn = bpf_convert_shinfo_access(si, insn);
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct skb_shared_info, gso_size),
+ si->dst_reg, si->dst_reg,
+ bpf_target_off(struct skb_shared_info,
+ gso_size, 2,
+ target_size));
+ break;
case offsetof(struct __sk_buff, wire_len):
BUILD_BUG_ON(sizeof_field(struct qdisc_skb_cb, pkt_len) != 4);
@@ -8829,10 +8859,9 @@ const struct bpf_prog_ops sk_reuseport_prog_ops = {
};
#endif /* CONFIG_INET */
-DEFINE_BPF_DISPATCHER(bpf_dispatcher_xdp)
+DEFINE_BPF_DISPATCHER(xdp)
void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog)
{
- bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(bpf_dispatcher_xdp),
- prev_prog, prog);
+ bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), prev_prog, prog);
}
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 2e0f465295c3..a7075b3b4489 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -11,6 +11,7 @@
#include <linux/list.h>
#include <linux/jhash.h>
#include <linux/sock_diag.h>
+#include <net/udp.h>
struct bpf_stab {
struct bpf_map map;
@@ -141,12 +142,58 @@ static void sock_map_unref(struct sock *sk, void *link_raw)
}
}
+static int sock_map_init_proto(struct sock *sk, struct sk_psock *psock)
+{
+ struct proto *prot;
+
+ sock_owned_by_me(sk);
+
+ switch (sk->sk_type) {
+ case SOCK_STREAM:
+ prot = tcp_bpf_get_proto(sk, psock);
+ break;
+
+ case SOCK_DGRAM:
+ prot = udp_bpf_get_proto(sk, psock);
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ if (IS_ERR(prot))
+ return PTR_ERR(prot);
+
+ sk_psock_update_proto(sk, psock, prot);
+ return 0;
+}
+
+static struct sk_psock *sock_map_psock_get_checked(struct sock *sk)
+{
+ struct sk_psock *psock;
+
+ rcu_read_lock();
+ psock = sk_psock(sk);
+ if (psock) {
+ if (sk->sk_prot->close != sock_map_close) {
+ psock = ERR_PTR(-EBUSY);
+ goto out;
+ }
+
+ if (!refcount_inc_not_zero(&psock->refcnt))
+ psock = ERR_PTR(-EBUSY);
+ }
+out:
+ rcu_read_unlock();
+ return psock;
+}
+
static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
struct sock *sk)
{
struct bpf_prog *msg_parser, *skb_parser, *skb_verdict;
- bool skb_progs, sk_psock_is_new = false;
struct sk_psock *psock;
+ bool skb_progs;
int ret;
skb_verdict = READ_ONCE(progs->skb_verdict);
@@ -172,7 +219,7 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
}
}
- psock = sk_psock_get_checked(sk);
+ psock = sock_map_psock_get_checked(sk);
if (IS_ERR(psock)) {
ret = PTR_ERR(psock);
goto out_progs;
@@ -191,18 +238,14 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
ret = -ENOMEM;
goto out_progs;
}
- sk_psock_is_new = true;
}
if (msg_parser)
psock_set_prog(&psock->progs.msg_parser, msg_parser);
- if (sk_psock_is_new) {
- ret = tcp_bpf_init(sk);
- if (ret < 0)
- goto out_drop;
- } else {
- tcp_bpf_reinit(sk);
- }
+
+ ret = sock_map_init_proto(sk, psock);
+ if (ret < 0)
+ goto out_drop;
write_lock_bh(&sk->sk_callback_lock);
if (skb_progs && !psock->parser.enabled) {
@@ -235,20 +278,17 @@ static int sock_map_link_no_progs(struct bpf_map *map, struct sock *sk)
struct sk_psock *psock;
int ret;
- psock = sk_psock_get_checked(sk);
+ psock = sock_map_psock_get_checked(sk);
if (IS_ERR(psock))
return PTR_ERR(psock);
- if (psock) {
- tcp_bpf_reinit(sk);
- return 0;
+ if (!psock) {
+ psock = sk_psock_init(sk, map->numa_node);
+ if (!psock)
+ return -ENOMEM;
}
- psock = sk_psock_init(sk, map->numa_node);
- if (!psock)
- return -ENOMEM;
-
- ret = tcp_bpf_init(sk);
+ ret = sock_map_init_proto(sk, psock);
if (ret < 0)
sk_psock_put(sk, psock);
return ret;
@@ -384,7 +424,6 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx,
struct sock *sk, u64 flags)
{
struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
- struct inet_connection_sock *icsk = inet_csk(sk);
struct sk_psock_link *link;
struct sk_psock *psock;
struct sock *osk;
@@ -395,7 +434,7 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx,
return -EINVAL;
if (unlikely(idx >= map->max_entries))
return -E2BIG;
- if (unlikely(rcu_access_pointer(icsk->icsk_ulp_data)))
+ if (inet_csk_has_ulp(sk))
return -EINVAL;
link = sk_psock_init_link();
@@ -448,15 +487,31 @@ static bool sock_map_op_okay(const struct bpf_sock_ops_kern *ops)
ops->op == BPF_SOCK_OPS_TCP_LISTEN_CB;
}
-static bool sock_map_sk_is_suitable(const struct sock *sk)
+static bool sk_is_tcp(const struct sock *sk)
{
return sk->sk_type == SOCK_STREAM &&
sk->sk_protocol == IPPROTO_TCP;
}
+static bool sk_is_udp(const struct sock *sk)
+{
+ return sk->sk_type == SOCK_DGRAM &&
+ sk->sk_protocol == IPPROTO_UDP;
+}
+
+static bool sock_map_sk_is_suitable(const struct sock *sk)
+{
+ return sk_is_tcp(sk) || sk_is_udp(sk);
+}
+
static bool sock_map_sk_state_allowed(const struct sock *sk)
{
- return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_LISTEN);
+ if (sk_is_tcp(sk))
+ return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_LISTEN);
+ else if (sk_is_udp(sk))
+ return sk_hashed(sk);
+
+ return false;
}
static int sock_map_update_elem(struct bpf_map *map, void *key,
@@ -738,7 +793,6 @@ static int sock_hash_update_common(struct bpf_map *map, void *key,
struct sock *sk, u64 flags)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
- struct inet_connection_sock *icsk = inet_csk(sk);
u32 key_size = map->key_size, hash;
struct bpf_htab_elem *elem, *elem_new;
struct bpf_htab_bucket *bucket;
@@ -749,7 +803,7 @@ static int sock_hash_update_common(struct bpf_map *map, void *key,
WARN_ON_ONCE(!rcu_read_lock_held());
if (unlikely(flags > BPF_EXIST))
return -EINVAL;
- if (unlikely(icsk->icsk_ulp_data))
+ if (inet_csk_has_ulp(sk))
return -EINVAL;
link = sk_psock_init_link();
@@ -1129,7 +1183,7 @@ int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
return 0;
}
-void sk_psock_unlink(struct sock *sk, struct sk_psock_link *link)
+static void sock_map_unlink(struct sock *sk, struct sk_psock_link *link)
{
switch (link->map->map_type) {
case BPF_MAP_TYPE_SOCKMAP:
@@ -1142,3 +1196,54 @@ void sk_psock_unlink(struct sock *sk, struct sk_psock_link *link)
break;
}
}
+
+static void sock_map_remove_links(struct sock *sk, struct sk_psock *psock)
+{
+ struct sk_psock_link *link;
+
+ while ((link = sk_psock_link_pop(psock))) {
+ sock_map_unlink(sk, link);
+ sk_psock_free_link(link);
+ }
+}
+
+void sock_map_unhash(struct sock *sk)
+{
+ void (*saved_unhash)(struct sock *sk);
+ struct sk_psock *psock;
+
+ rcu_read_lock();
+ psock = sk_psock(sk);
+ if (unlikely(!psock)) {
+ rcu_read_unlock();
+ if (sk->sk_prot->unhash)
+ sk->sk_prot->unhash(sk);
+ return;
+ }
+
+ saved_unhash = psock->saved_unhash;
+ sock_map_remove_links(sk, psock);
+ rcu_read_unlock();
+ saved_unhash(sk);
+}
+
+void sock_map_close(struct sock *sk, long timeout)
+{
+ void (*saved_close)(struct sock *sk, long timeout);
+ struct sk_psock *psock;
+
+ lock_sock(sk);
+ rcu_read_lock();
+ psock = sk_psock(sk);
+ if (unlikely(!psock)) {
+ rcu_read_unlock();
+ release_sock(sk);
+ return sk->sk_prot->close(sk, timeout);
+ }
+
+ saved_close = psock->saved_close;
+ sock_map_remove_links(sk, psock);
+ rcu_read_unlock();
+ release_sock(sk);
+ saved_close(sk, timeout);
+}
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 9d97bace13c8..9e1a186a3671 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -61,6 +61,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_NET_SOCK_MSG) += tcp_bpf.o
+obj-$(CONFIG_BPF_STREAM_PARSER) += udp_bpf.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 7d6e1b75d4d4..fe7b4fbc31c1 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -528,57 +528,7 @@ out_err:
return copied ? copied : err;
}
-static void tcp_bpf_remove(struct sock *sk, struct sk_psock *psock)
-{
- struct sk_psock_link *link;
-
- while ((link = sk_psock_link_pop(psock))) {
- sk_psock_unlink(sk, link);
- sk_psock_free_link(link);
- }
-}
-
-static void tcp_bpf_unhash(struct sock *sk)
-{
- void (*saved_unhash)(struct sock *sk);
- struct sk_psock *psock;
-
- rcu_read_lock();
- psock = sk_psock(sk);
- if (unlikely(!psock)) {
- rcu_read_unlock();
- if (sk->sk_prot->unhash)
- sk->sk_prot->unhash(sk);
- return;
- }
-
- saved_unhash = psock->saved_unhash;
- tcp_bpf_remove(sk, psock);
- rcu_read_unlock();
- saved_unhash(sk);
-}
-
-static void tcp_bpf_close(struct sock *sk, long timeout)
-{
- void (*saved_close)(struct sock *sk, long timeout);
- struct sk_psock *psock;
-
- lock_sock(sk);
- rcu_read_lock();
- psock = sk_psock(sk);
- if (unlikely(!psock)) {
- rcu_read_unlock();
- release_sock(sk);
- return sk->sk_prot->close(sk, timeout);
- }
-
- saved_close = psock->saved_close;
- tcp_bpf_remove(sk, psock);
- rcu_read_unlock();
- release_sock(sk);
- saved_close(sk, timeout);
-}
-
+#ifdef CONFIG_BPF_STREAM_PARSER
enum {
TCP_BPF_IPV4,
TCP_BPF_IPV6,
@@ -599,8 +549,8 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS],
struct proto *base)
{
prot[TCP_BPF_BASE] = *base;
- prot[TCP_BPF_BASE].unhash = tcp_bpf_unhash;
- prot[TCP_BPF_BASE].close = tcp_bpf_close;
+ prot[TCP_BPF_BASE].unhash = sock_map_unhash;
+ prot[TCP_BPF_BASE].close = sock_map_close;
prot[TCP_BPF_BASE].recvmsg = tcp_bpf_recvmsg;
prot[TCP_BPF_BASE].stream_memory_read = tcp_bpf_stream_read;
@@ -629,28 +579,6 @@ static int __init tcp_bpf_v4_build_proto(void)
}
core_initcall(tcp_bpf_v4_build_proto);
-static void tcp_bpf_update_sk_prot(struct sock *sk, struct sk_psock *psock)
-{
- int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
- int config = psock->progs.msg_parser ? TCP_BPF_TX : TCP_BPF_BASE;
-
- sk_psock_update_proto(sk, psock, &tcp_bpf_prots[family][config]);
-}
-
-static void tcp_bpf_reinit_sk_prot(struct sock *sk, struct sk_psock *psock)
-{
- int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
- int config = psock->progs.msg_parser ? TCP_BPF_TX : TCP_BPF_BASE;
-
- /* Reinit occurs when program types change e.g. TCP_BPF_TX is removed
- * or added requiring sk_prot hook updates. We keep original saved
- * hooks in this case.
- *
- * Pairs with lockless read in sk_clone_lock().
- */
- WRITE_ONCE(sk->sk_prot, &tcp_bpf_prots[family][config]);
-}
-
static int tcp_bpf_assert_proto_ops(struct proto *ops)
{
/* In order to avoid retpoline, we make assumptions when we call
@@ -662,36 +590,21 @@ static int tcp_bpf_assert_proto_ops(struct proto *ops)
ops->sendpage == tcp_sendpage ? 0 : -ENOTSUPP;
}
-void tcp_bpf_reinit(struct sock *sk)
+struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock)
{
- struct sk_psock *psock;
-
- sock_owned_by_me(sk);
-
- rcu_read_lock();
- psock = sk_psock(sk);
- tcp_bpf_reinit_sk_prot(sk, psock);
- rcu_read_unlock();
-}
+ int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
+ int config = psock->progs.msg_parser ? TCP_BPF_TX : TCP_BPF_BASE;
-int tcp_bpf_init(struct sock *sk)
-{
- struct proto *ops = READ_ONCE(sk->sk_prot);
- struct sk_psock *psock;
+ if (!psock->sk_proto) {
+ struct proto *ops = READ_ONCE(sk->sk_prot);
- sock_owned_by_me(sk);
+ if (tcp_bpf_assert_proto_ops(ops))
+ return ERR_PTR(-EINVAL);
- rcu_read_lock();
- psock = sk_psock(sk);
- if (unlikely(!psock || psock->sk_proto ||
- tcp_bpf_assert_proto_ops(ops))) {
- rcu_read_unlock();
- return -EINVAL;
+ tcp_bpf_check_v6_needs_rebuild(sk, ops);
}
- tcp_bpf_check_v6_needs_rebuild(sk, ops);
- tcp_bpf_update_sk_prot(sk, psock);
- rcu_read_unlock();
- return 0;
+
+ return &tcp_bpf_prots[family][config];
}
/* If a child got cloned from a listening socket that had tcp_bpf
@@ -707,3 +620,4 @@ void tcp_bpf_clone(const struct sock *sk, struct sock *newsk)
if (prot == &tcp_bpf_prots[family][TCP_BPF_BASE])
newsk->sk_prot = sk->sk_prot_creator;
}
+#endif /* CONFIG_BPF_STREAM_PARSER */
diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c
index 2703f24c5d1a..7c27aa629af1 100644
--- a/net/ipv4/tcp_ulp.c
+++ b/net/ipv4/tcp_ulp.c
@@ -105,13 +105,6 @@ void tcp_update_ulp(struct sock *sk, struct proto *proto,
{
struct inet_connection_sock *icsk = inet_csk(sk);
- if (!icsk->icsk_ulp_ops) {
- sk->sk_write_space = write_space;
- /* Pairs with lockless read in sk_clone_lock() */
- WRITE_ONCE(sk->sk_prot, proto);
- return;
- }
-
if (icsk->icsk_ulp_ops->update)
icsk->icsk_ulp_ops->update(sk, proto, write_space);
}
diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c
new file mode 100644
index 000000000000..eddd973e6575
--- /dev/null
+++ b/net/ipv4/udp_bpf.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Cloudflare Ltd https://cloudflare.com */
+
+#include <linux/skmsg.h>
+#include <net/sock.h>
+#include <net/udp.h>
+
+enum {
+ UDP_BPF_IPV4,
+ UDP_BPF_IPV6,
+ UDP_BPF_NUM_PROTS,
+};
+
+static struct proto *udpv6_prot_saved __read_mostly;
+static DEFINE_SPINLOCK(udpv6_prot_lock);
+static struct proto udp_bpf_prots[UDP_BPF_NUM_PROTS];
+
+static void udp_bpf_rebuild_protos(struct proto *prot, const struct proto *base)
+{
+ *prot = *base;
+ prot->unhash = sock_map_unhash;
+ prot->close = sock_map_close;
+}
+
+static void udp_bpf_check_v6_needs_rebuild(struct sock *sk, struct proto *ops)
+{
+ if (sk->sk_family == AF_INET6 &&
+ unlikely(ops != smp_load_acquire(&udpv6_prot_saved))) {
+ spin_lock_bh(&udpv6_prot_lock);
+ if (likely(ops != udpv6_prot_saved)) {
+ udp_bpf_rebuild_protos(&udp_bpf_prots[UDP_BPF_IPV6], ops);
+ smp_store_release(&udpv6_prot_saved, ops);
+ }
+ spin_unlock_bh(&udpv6_prot_lock);
+ }
+}
+
+static int __init udp_bpf_v4_build_proto(void)
+{
+ udp_bpf_rebuild_protos(&udp_bpf_prots[UDP_BPF_IPV4], &udp_prot);
+ return 0;
+}
+core_initcall(udp_bpf_v4_build_proto);
+
+struct proto *udp_bpf_get_proto(struct sock *sk, struct sk_psock *psock)
+{
+ int family = sk->sk_family == AF_INET ? UDP_BPF_IPV4 : UDP_BPF_IPV6;
+
+ if (!psock->sk_proto)
+ udp_bpf_check_v6_needs_rebuild(sk, READ_ONCE(sk->sk_prot));
+
+ return &udp_bpf_prots[family];
+}