summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2018-12-20 17:31:36 -0800
committerDavid S. Miller <davem@davemloft.net>2018-12-20 17:31:36 -0800
commit339bbff2d6e005a5586adeffc3d69a0eea50a764 (patch)
treea5bedd1933215aa69acdb5dbbfcbafb29561fe3c /net
parente770454fabde2e0f8fb3e5039a2b6df8f128bc9b (diff)
parent1cf4a0ccc506b5c027afc5eaf3fddc83f96f31e7 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says: ==================== pull-request: bpf-next 2018-12-21 The following pull-request contains BPF updates for your *net-next* tree. There is a merge conflict in test_verifier.c. Result looks as follows: [...] }, { "calls: cross frame pruning", .insns = { [...] .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, .errstr_unpriv = "function calls to other bpf functions are allowed for root only", .result_unpriv = REJECT, .errstr = "!read_ok", .result = REJECT, }, { "jset: functional", .insns = { [...] { "jset: unknown const compare not taken", .insns = { BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32), BPF_JMP_IMM(BPF_JSET, BPF_REG_0, 1, 1), BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, .errstr_unpriv = "!read_ok", .result_unpriv = REJECT, .errstr = "!read_ok", .result = REJECT, }, [...] { "jset: range", .insns = { [...] }, .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, .result_unpriv = ACCEPT, .result = ACCEPT, }, The main changes are: 1) Various BTF related improvements in order to get line info working. Meaning, verifier will now annotate the corresponding BPF C code to the error log, from Martin and Yonghong. 2) Implement support for raw BPF tracepoints in modules, from Matt. 3) Add several improvements to verifier state logic, namely speeding up stacksafe check, optimizations for stack state equivalence test and safety checks for liveness analysis, from Alexei. 4) Teach verifier to make use of BPF_JSET instruction, add several test cases to kselftests and remove nfp specific JSET optimization now that verifier has awareness, from Jakub. 5) Improve BPF verifier's slot_type marking logic in order to allow more stack slot sharing, from Jiong. 6) Add sk_msg->size member for context access and add set of fixes and improvements to make sock_map with kTLS usable with openssl based applications, from John. 7) Several cleanups and documentation updates in bpftool as well as auto-mount of tracefs for "bpftool prog tracelog" command, from Quentin. 8) Include sub-program tags from now on in bpf_prog_info in order to have a reliable way for user space to get all tags of the program e.g. needed for kallsyms correlation, from Song. 9) Add BTF annotations for cgroup_local_storage BPF maps and implement bpf fs pretty print support, from Roman. 10) Fix bpftool in order to allow for cross-compilation, from Ivan. 11) Update of bpftool license to GPLv2-only + BSD-2-Clause in order to be compatible with libbfd and allow for Debian packaging, from Jakub. 12) Remove an obsolete prog->aux sanitation in dump and get rid of version check for prog load, from Daniel. 13) Fix a memory leak in libbpf's line info handling, from Prashant. 14) Fix cpumap's frame alignment for build_skb() so that skb_shared_info does not get unaligned, from Jesper. 15) Fix test_progs kselftest to work with older compilers which are less smart in optimizing (and thus throwing build error), from Stanislav. 16) Cleanup and simplify AF_XDP socket teardown, from Björn. 17) Fix sk lookup in BPF kselftest's test_sock_addr with regards to netns_id argument, from Andrey. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/core/filter.c30
-rw-r--r--net/core/skmsg.c23
-rw-r--r--net/ipv4/tcp_bpf.c15
-rw-r--r--net/tls/tls_main.c14
-rw-r--r--net/tls/tls_sw.c43
-rw-r--r--net/xdp/xsk.c16
6 files changed, 99 insertions, 42 deletions
diff --git a/net/core/filter.c b/net/core/filter.c
index f9348806e843..447dd1bad31f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -6313,6 +6313,9 @@ static bool sk_msg_is_valid_access(int off, int size,
if (type == BPF_WRITE)
return false;
+ if (off % size != 0)
+ return false;
+
switch (off) {
case offsetof(struct sk_msg_md, data):
info->reg_type = PTR_TO_PACKET;
@@ -6324,16 +6327,20 @@ static bool sk_msg_is_valid_access(int off, int size,
if (size != sizeof(__u64))
return false;
break;
- default:
+ case bpf_ctx_range(struct sk_msg_md, family):
+ case bpf_ctx_range(struct sk_msg_md, remote_ip4):
+ case bpf_ctx_range(struct sk_msg_md, local_ip4):
+ case bpf_ctx_range_till(struct sk_msg_md, remote_ip6[0], remote_ip6[3]):
+ case bpf_ctx_range_till(struct sk_msg_md, local_ip6[0], local_ip6[3]):
+ case bpf_ctx_range(struct sk_msg_md, remote_port):
+ case bpf_ctx_range(struct sk_msg_md, local_port):
+ case bpf_ctx_range(struct sk_msg_md, size):
if (size != sizeof(__u32))
return false;
- }
-
- if (off < 0 || off >= sizeof(struct sk_msg_md))
- return false;
- if (off % size != 0)
+ break;
+ default:
return false;
-
+ }
return true;
}
@@ -7418,6 +7425,9 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
int off;
#endif
+ /* convert ctx uses the fact sg element is first in struct */
+ BUILD_BUG_ON(offsetof(struct sk_msg, sg) != 0);
+
switch (si->off) {
case offsetof(struct sk_msg_md, data):
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg, data),
@@ -7530,6 +7540,12 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
offsetof(struct sock_common, skc_num));
break;
+
+ case offsetof(struct sk_msg_md, size):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg_sg, size),
+ si->dst_reg, si->src_reg,
+ offsetof(struct sk_msg_sg, size));
+ break;
}
return insn - insn_buf;
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 56a99d0c9aa0..86c9726fced8 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -403,7 +403,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
msg->skb = skb;
sk_psock_queue_msg(psock, msg);
- sk->sk_data_ready(sk);
+ sk_psock_data_ready(sk, psock);
return copied;
}
@@ -572,6 +572,7 @@ void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
{
rcu_assign_sk_user_data(sk, NULL);
sk_psock_cork_free(psock);
+ sk_psock_zap_ingress(psock);
sk_psock_restore_proto(sk, psock);
write_lock_bh(&sk->sk_callback_lock);
@@ -669,6 +670,22 @@ static void sk_psock_verdict_apply(struct sk_psock *psock,
bool ingress;
switch (verdict) {
+ case __SK_PASS:
+ sk_other = psock->sk;
+ if (sock_flag(sk_other, SOCK_DEAD) ||
+ !sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
+ goto out_free;
+ }
+ if (atomic_read(&sk_other->sk_rmem_alloc) <=
+ sk_other->sk_rcvbuf) {
+ struct tcp_skb_cb *tcp = TCP_SKB_CB(skb);
+
+ tcp->bpf.flags |= BPF_F_INGRESS;
+ skb_queue_tail(&psock->ingress_skb, skb);
+ schedule_work(&psock->work);
+ break;
+ }
+ goto out_free;
case __SK_REDIRECT:
sk_other = tcp_skb_bpf_redirect_fetch(skb);
if (unlikely(!sk_other))
@@ -735,7 +752,7 @@ static int sk_psock_strp_parse(struct strparser *strp, struct sk_buff *skb)
}
/* Called with socket lock held. */
-static void sk_psock_data_ready(struct sock *sk)
+static void sk_psock_strp_data_ready(struct sock *sk)
{
struct sk_psock *psock;
@@ -783,7 +800,7 @@ void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock)
return;
parser->saved_data_ready = sk->sk_data_ready;
- sk->sk_data_ready = sk_psock_data_ready;
+ sk->sk_data_ready = sk_psock_strp_data_ready;
sk->sk_write_space = sk_psock_write_space;
parser->enabled = true;
}
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index a47c1cdf90fc..1bb7321a256d 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -8,6 +8,7 @@
#include <linux/wait.h>
#include <net/inet_common.h>
+#include <net/tls.h>
static bool tcp_bpf_stream_read(const struct sock *sk)
{
@@ -198,7 +199,7 @@ static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
msg->sg.start = i;
msg->sg.size -= apply_bytes;
sk_psock_queue_msg(psock, tmp);
- sk->sk_data_ready(sk);
+ sk_psock_data_ready(sk, psock);
} else {
sk_msg_free(sk, tmp);
kfree(tmp);
@@ -218,6 +219,8 @@ static int tcp_bpf_push(struct sock *sk, struct sk_msg *msg, u32 apply_bytes,
u32 off;
while (1) {
+ bool has_tx_ulp;
+
sge = sk_msg_elem(msg, msg->sg.start);
size = (apply && apply_bytes < sge->length) ?
apply_bytes : sge->length;
@@ -226,7 +229,15 @@ static int tcp_bpf_push(struct sock *sk, struct sk_msg *msg, u32 apply_bytes,
tcp_rate_check_app_limited(sk);
retry:
- ret = do_tcp_sendpages(sk, page, off, size, flags);
+ has_tx_ulp = tls_sw_has_ctx_tx(sk);
+ if (has_tx_ulp) {
+ flags |= MSG_SENDPAGE_NOPOLICY;
+ ret = kernel_sendpage_locked(sk,
+ page, off, size, flags);
+ } else {
+ ret = do_tcp_sendpages(sk, page, off, size, flags);
+ }
+
if (ret <= 0)
return ret;
if (apply)
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 28887cf628b8..78cb4a584080 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -55,6 +55,8 @@ enum {
static struct proto *saved_tcpv6_prot;
static DEFINE_MUTEX(tcpv6_prot_mutex);
+static struct proto *saved_tcpv4_prot;
+static DEFINE_MUTEX(tcpv4_prot_mutex);
static LIST_HEAD(device_list);
static DEFINE_SPINLOCK(device_spinlock);
static struct proto tls_prots[TLS_NUM_PROTS][TLS_NUM_CONFIG][TLS_NUM_CONFIG];
@@ -700,6 +702,16 @@ static int tls_init(struct sock *sk)
mutex_unlock(&tcpv6_prot_mutex);
}
+ if (ip_ver == TLSV4 &&
+ unlikely(sk->sk_prot != smp_load_acquire(&saved_tcpv4_prot))) {
+ mutex_lock(&tcpv4_prot_mutex);
+ if (likely(sk->sk_prot != saved_tcpv4_prot)) {
+ build_protos(tls_prots[TLSV4], sk->sk_prot);
+ smp_store_release(&saved_tcpv4_prot, sk->sk_prot);
+ }
+ mutex_unlock(&tcpv4_prot_mutex);
+ }
+
ctx->tx_conf = TLS_BASE;
ctx->rx_conf = TLS_BASE;
update_sk_prot(sk, ctx);
@@ -731,8 +743,6 @@ static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = {
static int __init tls_register(void)
{
- build_protos(tls_prots[TLSV4], &tcp_prot);
-
tls_sw_proto_ops = inet_stream_ops;
tls_sw_proto_ops.splice_read = tls_sw_splice_read;
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index d4ecc66464e6..5aee9ae5ca53 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -686,12 +686,13 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk,
struct sk_psock *psock;
struct sock *sk_redir;
struct tls_rec *rec;
+ bool enospc, policy;
int err = 0, send;
u32 delta = 0;
- bool enospc;
+ policy = !(flags & MSG_SENDPAGE_NOPOLICY);
psock = sk_psock_get(sk);
- if (!psock)
+ if (!psock || !policy)
return tls_push_record(sk, flags, record_type);
more_data:
enospc = sk_msg_full(msg);
@@ -1017,8 +1018,8 @@ send_end:
return copied ? copied : ret;
}
-int tls_sw_sendpage(struct sock *sk, struct page *page,
- int offset, size_t size, int flags)
+int tls_sw_do_sendpage(struct sock *sk, struct page *page,
+ int offset, size_t size, int flags)
{
long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
struct tls_context *tls_ctx = tls_get_ctx(sk);
@@ -1033,15 +1034,7 @@ int tls_sw_sendpage(struct sock *sk, struct page *page,
int ret = 0;
bool eor;
- if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
- MSG_SENDPAGE_NOTLAST))
- return -ENOTSUPP;
-
- /* No MSG_EOR from splice, only look at MSG_MORE */
eor = !(flags & (MSG_MORE | MSG_SENDPAGE_NOTLAST));
-
- lock_sock(sk);
-
sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
/* Wait till there is any pending write on socket */
@@ -1145,10 +1138,34 @@ wait_for_memory:
}
sendpage_end:
ret = sk_stream_error(sk, flags, ret);
- release_sock(sk);
return copied ? copied : ret;
}
+int tls_sw_sendpage_locked(struct sock *sk, struct page *page,
+ int offset, size_t size, int flags)
+{
+ if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
+ MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY))
+ return -ENOTSUPP;
+
+ return tls_sw_do_sendpage(sk, page, offset, size, flags);
+}
+
+int tls_sw_sendpage(struct sock *sk, struct page *page,
+ int offset, size_t size, int flags)
+{
+ int ret;
+
+ if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
+ MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY))
+ return -ENOTSUPP;
+
+ lock_sock(sk);
+ ret = tls_sw_do_sendpage(sk, page, offset, size, flags);
+ release_sock(sk);
+ return ret;
+}
+
static struct sk_buff *tls_wait_data(struct sock *sk, struct sk_psock *psock,
int flags, long timeo, int *err)
{
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 07156f43d295..a03268454a27 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -366,6 +366,7 @@ static int xsk_release(struct socket *sock)
xskq_destroy(xs->rx);
xskq_destroy(xs->tx);
+ xdp_put_umem(xs->umem);
sock_orphan(sk);
sock->sk = NULL;
@@ -713,18 +714,6 @@ static const struct proto_ops xsk_proto_ops = {
.sendpage = sock_no_sendpage,
};
-static void xsk_destruct(struct sock *sk)
-{
- struct xdp_sock *xs = xdp_sk(sk);
-
- if (!sock_flag(sk, SOCK_DEAD))
- return;
-
- xdp_put_umem(xs->umem);
-
- sk_refcnt_debug_dec(sk);
-}
-
static int xsk_create(struct net *net, struct socket *sock, int protocol,
int kern)
{
@@ -751,9 +740,6 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol,
sk->sk_family = PF_XDP;
- sk->sk_destruct = xsk_destruct;
- sk_refcnt_debug_inc(sk);
-
sock_set_flag(sk, SOCK_RCU_FREE);
xs = xdp_sk(sk);