From 600c70bad6594cb124c641ed05355ca134650ea4 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 1 Jul 2019 10:38:39 -0700 Subject: bpf: allow wide (u64) aligned stores for some fields of bpf_sock_addr Since commit cd17d7770578 ("bpf/tools: sync bpf.h") clang decided that it can do a single u64 store into user_ip6[2] instead of two separate u32 ones: # 17: (18) r2 = 0x100000000000000 # ; ctx->user_ip6[2] = bpf_htonl(DST_REWRITE_IP6_2); # 19: (7b) *(u64 *)(r1 +16) = r2 # invalid bpf_context access off=16 size=8 >From the compiler point of view it does look like a correct thing to do, so let's support it on the kernel side. Credit to Andrii Nakryiko for a proper implementation of bpf_ctx_wide_store_ok. Cc: Andrii Nakryiko Cc: Yonghong Song Fixes: cd17d7770578 ("bpf/tools: sync bpf.h") Reported-by: kernel test robot Acked-by: Yonghong Song Acked-by: Andrii Nakryiko Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- net/core/filter.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index 089aaea0ccc6..4481e950f020 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -6890,6 +6890,16 @@ static bool sock_addr_is_valid_access(int off, int size, if (!bpf_ctx_narrow_access_ok(off, size, size_default)) return false; } else { + if (bpf_ctx_wide_store_ok(off, size, + struct bpf_sock_addr, + user_ip6)) + return true; + + if (bpf_ctx_wide_store_ok(off, size, + struct bpf_sock_addr, + msg_src_ip6)) + return true; + if (size != size_default) return false; } @@ -7730,9 +7740,6 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type, /* SOCK_ADDR_STORE_NESTED_FIELD_OFF() has semantic similar to * SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF() but for store operation. * - * It doesn't support SIZE argument though since narrow stores are not - * supported for now. - * * In addition it uses Temporary Field TF (member of struct S) as the 3rd * "register" since two registers available in convert_ctx_access are not * enough: we can't override neither SRC, since it contains value to store, nor @@ -7740,7 +7747,7 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type, * instructions. But we need a temporary place to save pointer to nested * structure whose field we want to store to. */ -#define SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, OFF, TF) \ +#define SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, SIZE, OFF, TF) \ do { \ int tmp_reg = BPF_REG_9; \ if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg) \ @@ -7751,8 +7758,7 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type, offsetof(S, TF)); \ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), tmp_reg, \ si->dst_reg, offsetof(S, F)); \ - *insn++ = BPF_STX_MEM( \ - BPF_FIELD_SIZEOF(NS, NF), tmp_reg, si->src_reg, \ + *insn++ = BPF_STX_MEM(SIZE, tmp_reg, si->src_reg, \ bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF), \ target_size) \ + OFF); \ @@ -7764,8 +7770,8 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type, TF) \ do { \ if (type == BPF_WRITE) { \ - SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, OFF, \ - TF); \ + SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, SIZE, \ + OFF, TF); \ } else { \ SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF( \ S, NS, F, NF, SIZE, OFF); \ -- cgit v1.2.3 From bf0bdd1343efbbf65b4d53aef1fce14acbd79d50 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Wed, 3 Jul 2019 15:09:16 +0300 Subject: xdp: fix race on generic receive path Unlike driver mode, generic xdp receive could be triggered by different threads on different CPU cores at the same time leading to the fill and rx queue breakage. For example, this could happen while sending packets from two processes to the first interface of veth pair while the second part of it is open with AF_XDP socket. Need to take a lock for each generic receive to avoid race. Fixes: c497176cb2e4 ("xsk: add Rx receive functions and poll support") Signed-off-by: Ilya Maximets Acked-by: Magnus Karlsson Tested-by: William Tu Signed-off-by: Daniel Borkmann --- net/xdp/xsk.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 74417a851ed5..0574f008954c 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -129,13 +129,17 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) u64 addr; int err; - if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index) - return -EINVAL; + spin_lock_bh(&xs->rx_lock); + + if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index) { + err = -EINVAL; + goto out_unlock; + } if (!xskq_peek_addr(xs->umem->fq, &addr) || len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) { - xs->rx_dropped++; - return -ENOSPC; + err = -ENOSPC; + goto out_drop; } addr += xs->umem->headroom; @@ -144,13 +148,21 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) memcpy(buffer, xdp->data_meta, len + metalen); addr += metalen; err = xskq_produce_batch_desc(xs->rx, addr, len); - if (!err) { - xskq_discard_addr(xs->umem->fq); - xsk_flush(xs); - return 0; - } + if (err) + goto out_drop; + + xskq_discard_addr(xs->umem->fq); + xskq_produce_flush_desc(xs->rx); + spin_unlock_bh(&xs->rx_lock); + + xs->sk.sk_data_ready(&xs->sk); + return 0; + +out_drop: xs->rx_dropped++; +out_unlock: + spin_unlock_bh(&xs->rx_lock); return err; } @@ -787,6 +799,7 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol, xs = xdp_sk(sk); mutex_init(&xs->mutex); + spin_lock_init(&xs->rx_lock); spin_lock_init(&xs->tx_completion_lock); mutex_lock(&net->xdp.lock); -- cgit v1.2.3