summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2020-12-28 15:20:48 -0800
committerDavid S. Miller <davem@davemloft.net>2020-12-28 15:26:11 -0800
commit4bfc4714849d005e6835bcffa3c29ebd6e5ee35d (patch)
treeeef007409f1c203165eba618cac11bd2fca65a07
parent1fef73597fa545c35fddc953979013882fbd4e55 (diff)
parenta61daaf351da7c8493f2586437617d60c24350b0 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Daniel Borkmann says: ==================== pull-request: bpf 2020-12-28 The following pull-request contains BPF updates for your *net* tree. There is a small merge conflict between bpf tree commit 69ca310f3416 ("bpf: Save correct stopping point in file seq iteration") and net tree commit 66ed594409a1 ("bpf/task_iter: In task_file_seq_get_next use task_lookup_next_fd_rcu"). The get_files_struct() does not exist anymore in net, so take the hunk in HEAD and add the `info->tid = curr_tid` to the error path: [...] curr_task = task_seq_get_next(ns, &curr_tid, true); if (!curr_task) { info->task = NULL; info->tid = curr_tid; return NULL; } /* set info->task and info->tid */ [...] We've added 10 non-merge commits during the last 9 day(s) which contain a total of 11 files changed, 75 insertions(+), 20 deletions(-). The main changes are: 1) Various AF_XDP fixes such as fill/completion ring leak on failed bind and fixing a race in skb mode's backpressure mechanism, from Magnus Karlsson. 2) Fix latency spikes on lockdep enabled kernels by adding a rescheduling point to BPF hashtab initialization, from Eric Dumazet. 3) Fix a splat in task iterator by saving the correct stopping point in the seq file iteration, from Jonathan Lemon. 4) Fix BPF maps selftest by adding retries in case hashtab returns EBUSY errors on update/deletes, from Andrii Nakryiko. 5) Fix BPF selftest error reporting to something more user friendly if the vmlinux BTF cannot be found, from Kamal Mostafa. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/xdp_sock.h4
-rw-r--r--include/net/xsk_buff_pool.h5
-rw-r--r--kernel/bpf/hashtab.c1
-rw-r--r--kernel/bpf/syscall.c1
-rw-r--r--kernel/bpf/task_iter.c18
-rw-r--r--net/xdp/xsk.c16
-rw-r--r--net/xdp/xsk_buff_pool.c3
-rw-r--r--net/xdp/xsk_queue.h5
-rw-r--r--tools/testing/selftests/bpf/Makefile3
-rw-r--r--tools/testing/selftests/bpf/test_maps.c48
-rw-r--r--tools/testing/selftests/bpf/xdpxceiver.c4
11 files changed, 81 insertions, 27 deletions
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 4f4e93bf814c..cc17bc957548 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -58,10 +58,6 @@ struct xdp_sock {
struct xsk_queue *tx ____cacheline_aligned_in_smp;
struct list_head tx_list;
- /* Mutual exclusion of NAPI TX thread and sendmsg error paths
- * in the SKB destructor callback.
- */
- spinlock_t tx_completion_lock;
/* Protects generic receive. */
spinlock_t rx_lock;
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index 01755b838c74..eaa8386dbc63 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -73,6 +73,11 @@ struct xsk_buff_pool {
bool dma_need_sync;
bool unaligned;
void *addrs;
+ /* Mutual exclusion of the completion ring in the SKB mode. Two cases to protect:
+ * NAPI TX thread and sendmsg error paths in the SKB destructor callback and when
+ * sockets share a single cq when the same netdev and queue id is shared.
+ */
+ spinlock_t cq_lock;
struct xdp_buff_xsk *free_heads[];
};
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 7e848200cd26..c1ac7f964bc9 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -152,6 +152,7 @@ static void htab_init_buckets(struct bpf_htab *htab)
lockdep_set_class(&htab->buckets[i].lock,
&htab->lockdep_key);
}
+ cond_resched();
}
}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 4caf06fe4152..c3bb03c8371f 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -17,7 +17,6 @@
#include <linux/fs.h>
#include <linux/license.h>
#include <linux/filter.h>
-#include <linux/version.h>
#include <linux/kernel.h>
#include <linux/idr.h>
#include <linux/cred.h>
diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index e73c07593024..3efe38191d1c 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -37,7 +37,7 @@ retry:
if (!task) {
++*tid;
goto retry;
- } else if (skip_if_dup_files && task->tgid != task->pid &&
+ } else if (skip_if_dup_files && !thread_group_leader(task) &&
task->files == task->group_leader->files) {
put_task_struct(task);
task = NULL;
@@ -151,14 +151,14 @@ again:
curr_task = info->task;
curr_fd = info->fd;
} else {
- curr_task = task_seq_get_next(ns, &curr_tid, true);
- if (!curr_task) {
- info->task = NULL;
- return NULL;
- }
-
- /* set info->task and info->tid */
- info->task = curr_task;
+ curr_task = task_seq_get_next(ns, &curr_tid, true);
+ if (!curr_task) {
+ info->task = NULL;
+ info->tid = curr_tid;
+ return NULL;
+ }
+
+ /* set info->task and info->tid */
if (curr_tid == info->tid) {
curr_fd = info->fd;
} else {
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index ac4a317038f1..8037b04a9edd 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -423,9 +423,9 @@ static void xsk_destruct_skb(struct sk_buff *skb)
struct xdp_sock *xs = xdp_sk(skb->sk);
unsigned long flags;
- spin_lock_irqsave(&xs->tx_completion_lock, flags);
+ spin_lock_irqsave(&xs->pool->cq_lock, flags);
xskq_prod_submit_addr(xs->pool->cq, addr);
- spin_unlock_irqrestore(&xs->tx_completion_lock, flags);
+ spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
sock_wfree(skb);
}
@@ -437,6 +437,7 @@ static int xsk_generic_xmit(struct sock *sk)
bool sent_frame = false;
struct xdp_desc desc;
struct sk_buff *skb;
+ unsigned long flags;
int err = 0;
mutex_lock(&xs->mutex);
@@ -468,10 +469,13 @@ static int xsk_generic_xmit(struct sock *sk)
* if there is space in it. This avoids having to implement
* any buffering in the Tx path.
*/
+ spin_lock_irqsave(&xs->pool->cq_lock, flags);
if (unlikely(err) || xskq_prod_reserve(xs->pool->cq)) {
+ spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
kfree_skb(skb);
goto out;
}
+ spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
skb->dev = xs->dev;
skb->priority = sk->sk_priority;
@@ -483,6 +487,9 @@ static int xsk_generic_xmit(struct sock *sk)
if (err == NETDEV_TX_BUSY) {
/* Tell user-space to retry the send */
skb->destructor = sock_wfree;
+ spin_lock_irqsave(&xs->pool->cq_lock, flags);
+ xskq_prod_cancel(xs->pool->cq);
+ spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
/* Free skb without triggering the perf drop trace */
consume_skb(skb);
err = -EAGAIN;
@@ -878,6 +885,10 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
}
}
+ /* FQ and CQ are now owned by the buffer pool and cleaned up with it. */
+ xs->fq_tmp = NULL;
+ xs->cq_tmp = NULL;
+
xs->dev = dev;
xs->zc = xs->umem->zc;
xs->queue_id = qid;
@@ -1299,7 +1310,6 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol,
xs->state = XSK_READY;
mutex_init(&xs->mutex);
spin_lock_init(&xs->rx_lock);
- spin_lock_init(&xs->tx_completion_lock);
INIT_LIST_HEAD(&xs->map_list);
spin_lock_init(&xs->map_list_lock);
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index 67a4494d63b6..20598eea658c 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -71,12 +71,11 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
INIT_LIST_HEAD(&pool->free_list);
INIT_LIST_HEAD(&pool->xsk_tx_list);
spin_lock_init(&pool->xsk_tx_list_lock);
+ spin_lock_init(&pool->cq_lock);
refcount_set(&pool->users, 1);
pool->fq = xs->fq_tmp;
pool->cq = xs->cq_tmp;
- xs->fq_tmp = NULL;
- xs->cq_tmp = NULL;
for (i = 0; i < pool->free_heads_cnt; i++) {
xskb = &pool->heads[i];
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 4a9663aa7afe..2823b7c3302d 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -334,6 +334,11 @@ static inline bool xskq_prod_is_full(struct xsk_queue *q)
return xskq_prod_nb_free(q, 1) ? false : true;
}
+static inline void xskq_prod_cancel(struct xsk_queue *q)
+{
+ q->cached_prod--;
+}
+
static inline int xskq_prod_reserve(struct xsk_queue *q)
{
if (xskq_prod_is_full(q))
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 8c33e999319a..c51df6b91bef 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -121,6 +121,9 @@ VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \
/sys/kernel/btf/vmlinux \
/boot/vmlinux-$(shell uname -r)
VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
+ifeq ($(VMLINUX_BTF),)
+$(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)")
+endif
# Define simple and short `make test_progs`, `make test_sysctl`, etc targets
# to build individual tests.
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 0ad3e6305ff0..51adc42b2b40 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -1312,22 +1312,58 @@ static void test_map_stress(void)
#define DO_UPDATE 1
#define DO_DELETE 0
+#define MAP_RETRIES 20
+
+static int map_update_retriable(int map_fd, const void *key, const void *value,
+ int flags, int attempts)
+{
+ while (bpf_map_update_elem(map_fd, key, value, flags)) {
+ if (!attempts || (errno != EAGAIN && errno != EBUSY))
+ return -errno;
+
+ usleep(1);
+ attempts--;
+ }
+
+ return 0;
+}
+
+static int map_delete_retriable(int map_fd, const void *key, int attempts)
+{
+ while (bpf_map_delete_elem(map_fd, key)) {
+ if (!attempts || (errno != EAGAIN && errno != EBUSY))
+ return -errno;
+
+ usleep(1);
+ attempts--;
+ }
+
+ return 0;
+}
+
static void test_update_delete(unsigned int fn, void *data)
{
int do_update = ((int *)data)[1];
int fd = ((int *)data)[0];
- int i, key, value;
+ int i, key, value, err;
for (i = fn; i < MAP_SIZE; i += TASKS) {
key = value = i;
if (do_update) {
- assert(bpf_map_update_elem(fd, &key, &value,
- BPF_NOEXIST) == 0);
- assert(bpf_map_update_elem(fd, &key, &value,
- BPF_EXIST) == 0);
+ err = map_update_retriable(fd, &key, &value, BPF_NOEXIST, MAP_RETRIES);
+ if (err)
+ printf("error %d %d\n", err, errno);
+ assert(err == 0);
+ err = map_update_retriable(fd, &key, &value, BPF_EXIST, MAP_RETRIES);
+ if (err)
+ printf("error %d %d\n", err, errno);
+ assert(err == 0);
} else {
- assert(bpf_map_delete_elem(fd, &key) == 0);
+ err = map_delete_retriable(fd, &key, MAP_RETRIES);
+ if (err)
+ printf("error %d %d\n", err, errno);
+ assert(err == 0);
}
}
}
diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 014dedaa4dd2..1e722ee76b1f 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -715,7 +715,7 @@ static void worker_pkt_dump(void)
int payload = *((uint32_t *)(pkt_buf[iter]->payload + PKT_HDR_SIZE));
if (payload == EOT) {
- ksft_print_msg("End-of-tranmission frame received\n");
+ ksft_print_msg("End-of-transmission frame received\n");
fprintf(stdout, "---------------------------------------\n");
break;
}
@@ -747,7 +747,7 @@ static void worker_pkt_validate(void)
}
if (payloadseqnum == EOT) {
- ksft_print_msg("End-of-tranmission frame received: PASS\n");
+ ksft_print_msg("End-of-transmission frame received: PASS\n");
sigvar = 1;
break;
}