summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2019-05-04 23:26:37 -0700
committerAlexei Starovoitov <ast@kernel.org>2019-05-04 23:26:38 -0700
commitec1c8fa04ffc1f90fdf101cd734ee7d2517883be (patch)
tree578931a064200ac50725b386d33f451a00c60d5b
parent6cea33701eb024bc6c920ab83940ee22afd29139 (diff)
parent5750902a6e9bc6adb77da8257c0e34db2bfdebb2 (diff)
Merge branch 'af_xdp-fixes'
Björn Töpel says: ==================== William found two bugs, when doing socket teardown within the same process. The first issue was an invalid munmap call, and the second one was an invalid XSKMAP cleanup. Both resulted in that the process kept references to the socket, which was not correctly cleaned up. When a new socket was created, the bind() call would fail, since the old socket was still lingering, refusing to give up the queue on the netdev. More details can be found in the individual commits. Thanks, Björn ==================== Reviewed-by: Jonathan Lemon <jonathan.lemon@gmail.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
-rw-r--r--tools/lib/bpf/xsk.c192
1 files changed, 100 insertions, 92 deletions
diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index 557ef8d1250d..525f1cc163b5 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -248,8 +248,7 @@ int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, __u64 size,
return 0;
out_mmap:
- munmap(umem->fill,
- off.fr.desc + umem->config.fill_size * sizeof(__u64));
+ munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64));
out_socket:
close(umem->fd);
out_umem_alloc:
@@ -388,21 +387,17 @@ static void xsk_delete_bpf_maps(struct xsk_socket *xsk)
{
close(xsk->qidconf_map_fd);
close(xsk->xsks_map_fd);
+ xsk->qidconf_map_fd = -1;
+ xsk->xsks_map_fd = -1;
}
-static int xsk_update_bpf_maps(struct xsk_socket *xsk, int qidconf_value,
- int xsks_value)
+static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
{
- bool qidconf_map_updated = false, xsks_map_updated = false;
+ __u32 i, *map_ids, num_maps, prog_len = sizeof(struct bpf_prog_info);
+ __u32 map_len = sizeof(struct bpf_map_info);
struct bpf_prog_info prog_info = {};
- __u32 prog_len = sizeof(prog_info);
struct bpf_map_info map_info;
- __u32 map_len = sizeof(map_info);
- __u32 *map_ids;
- int reset_value = 0;
- __u32 num_maps;
- unsigned int i;
- int err;
+ int fd, err;
err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len);
if (err)
@@ -423,66 +418,71 @@ static int xsk_update_bpf_maps(struct xsk_socket *xsk, int qidconf_value,
goto out_map_ids;
for (i = 0; i < prog_info.nr_map_ids; i++) {
- int fd;
+ if (xsk->qidconf_map_fd != -1 && xsk->xsks_map_fd != -1)
+ break;
fd = bpf_map_get_fd_by_id(map_ids[i]);
- if (fd < 0) {
- err = -errno;
- goto out_maps;
- }
+ if (fd < 0)
+ continue;
err = bpf_obj_get_info_by_fd(fd, &map_info, &map_len);
- if (err)
- goto out_maps;
+ if (err) {
+ close(fd);
+ continue;
+ }
if (!strcmp(map_info.name, "qidconf_map")) {
- err = bpf_map_update_elem(fd, &xsk->queue_id,
- &qidconf_value, 0);
- if (err)
- goto out_maps;
- qidconf_map_updated = true;
xsk->qidconf_map_fd = fd;
- } else if (!strcmp(map_info.name, "xsks_map")) {
- err = bpf_map_update_elem(fd, &xsk->queue_id,
- &xsks_value, 0);
- if (err)
- goto out_maps;
- xsks_map_updated = true;
+ continue;
+ }
+
+ if (!strcmp(map_info.name, "xsks_map")) {
xsk->xsks_map_fd = fd;
+ continue;
}
- if (qidconf_map_updated && xsks_map_updated)
- break;
+ close(fd);
}
- if (!(qidconf_map_updated && xsks_map_updated)) {
+ err = 0;
+ if (xsk->qidconf_map_fd < 0 || xsk->xsks_map_fd < 0) {
err = -ENOENT;
- goto out_maps;
+ xsk_delete_bpf_maps(xsk);
}
- err = 0;
- goto out_success;
-
-out_maps:
- if (qidconf_map_updated)
- (void)bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id,
- &reset_value, 0);
- if (xsks_map_updated)
- (void)bpf_map_update_elem(xsk->xsks_map_fd, &xsk->queue_id,
- &reset_value, 0);
-out_success:
- if (qidconf_map_updated)
- close(xsk->qidconf_map_fd);
- if (xsks_map_updated)
- close(xsk->xsks_map_fd);
out_map_ids:
free(map_ids);
return err;
}
+static void xsk_clear_bpf_maps(struct xsk_socket *xsk)
+{
+ int qid = false;
+
+ (void)bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, &qid, 0);
+ (void)bpf_map_delete_elem(xsk->xsks_map_fd, &xsk->queue_id);
+}
+
+static int xsk_set_bpf_maps(struct xsk_socket *xsk)
+{
+ int qid = true, fd = xsk->fd, err;
+
+ err = bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, &qid, 0);
+ if (err)
+ goto out;
+
+ err = bpf_map_update_elem(xsk->xsks_map_fd, &xsk->queue_id, &fd, 0);
+ if (err)
+ goto out;
+
+ return 0;
+out:
+ xsk_clear_bpf_maps(xsk);
+ return err;
+}
+
static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
{
- bool prog_attached = false;
__u32 prog_id = 0;
int err;
@@ -492,7 +492,6 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
return err;
if (!prog_id) {
- prog_attached = true;
err = xsk_create_bpf_maps(xsk);
if (err)
return err;
@@ -502,20 +501,21 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
goto out_maps;
} else {
xsk->prog_fd = bpf_prog_get_fd_by_id(prog_id);
+ err = xsk_lookup_bpf_maps(xsk);
+ if (err)
+ goto out_load;
}
- err = xsk_update_bpf_maps(xsk, true, xsk->fd);
+ err = xsk_set_bpf_maps(xsk);
if (err)
goto out_load;
return 0;
out_load:
- if (prog_attached)
- close(xsk->prog_fd);
+ close(xsk->prog_fd);
out_maps:
- if (prog_attached)
- xsk_delete_bpf_maps(xsk);
+ xsk_delete_bpf_maps(xsk);
return err;
}
@@ -524,11 +524,11 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
struct xsk_ring_cons *rx, struct xsk_ring_prod *tx,
const struct xsk_socket_config *usr_config)
{
+ void *rx_map = NULL, *tx_map = NULL;
struct sockaddr_xdp sxdp = {};
struct xdp_mmap_offsets off;
struct xsk_socket *xsk;
socklen_t optlen;
- void *map;
int err;
if (!umem || !xsk_ptr || !rx || !tx)
@@ -594,40 +594,40 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
}
if (rx) {
- map = xsk_mmap(NULL, off.rx.desc +
- xsk->config.rx_size * sizeof(struct xdp_desc),
- PROT_READ | PROT_WRITE,
- MAP_SHARED | MAP_POPULATE,
- xsk->fd, XDP_PGOFF_RX_RING);
- if (map == MAP_FAILED) {
+ rx_map = xsk_mmap(NULL, off.rx.desc +
+ xsk->config.rx_size * sizeof(struct xdp_desc),
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE,
+ xsk->fd, XDP_PGOFF_RX_RING);
+ if (rx_map == MAP_FAILED) {
err = -errno;
goto out_socket;
}
rx->mask = xsk->config.rx_size - 1;
rx->size = xsk->config.rx_size;
- rx->producer = map + off.rx.producer;
- rx->consumer = map + off.rx.consumer;
- rx->ring = map + off.rx.desc;
+ rx->producer = rx_map + off.rx.producer;
+ rx->consumer = rx_map + off.rx.consumer;
+ rx->ring = rx_map + off.rx.desc;
}
xsk->rx = rx;
if (tx) {
- map = xsk_mmap(NULL, off.tx.desc +
- xsk->config.tx_size * sizeof(struct xdp_desc),
- PROT_READ | PROT_WRITE,
- MAP_SHARED | MAP_POPULATE,
- xsk->fd, XDP_PGOFF_TX_RING);
- if (map == MAP_FAILED) {
+ tx_map = xsk_mmap(NULL, off.tx.desc +
+ xsk->config.tx_size * sizeof(struct xdp_desc),
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE,
+ xsk->fd, XDP_PGOFF_TX_RING);
+ if (tx_map == MAP_FAILED) {
err = -errno;
goto out_mmap_rx;
}
tx->mask = xsk->config.tx_size - 1;
tx->size = xsk->config.tx_size;
- tx->producer = map + off.tx.producer;
- tx->consumer = map + off.tx.consumer;
- tx->ring = map + off.tx.desc;
+ tx->producer = tx_map + off.tx.producer;
+ tx->consumer = tx_map + off.tx.consumer;
+ tx->ring = tx_map + off.tx.desc;
tx->cached_cons = xsk->config.tx_size;
}
xsk->tx = tx;
@@ -643,6 +643,9 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
goto out_mmap_tx;
}
+ xsk->qidconf_map_fd = -1;
+ xsk->xsks_map_fd = -1;
+
if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
err = xsk_setup_xdp_prog(xsk);
if (err)
@@ -654,13 +657,11 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
out_mmap_tx:
if (tx)
- munmap(xsk->tx,
- off.tx.desc +
+ munmap(tx_map, off.tx.desc +
xsk->config.tx_size * sizeof(struct xdp_desc));
out_mmap_rx:
if (rx)
- munmap(xsk->rx,
- off.rx.desc +
+ munmap(rx_map, off.rx.desc +
xsk->config.rx_size * sizeof(struct xdp_desc));
out_socket:
if (--umem->refcount)
@@ -685,10 +686,12 @@ int xsk_umem__delete(struct xsk_umem *umem)
optlen = sizeof(off);
err = getsockopt(umem->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
if (!err) {
- munmap(umem->fill->ring,
- off.fr.desc + umem->config.fill_size * sizeof(__u64));
- munmap(umem->comp->ring,
- off.cr.desc + umem->config.comp_size * sizeof(__u64));
+ (void)munmap(umem->fill->ring - off.fr.desc,
+ off.fr.desc +
+ umem->config.fill_size * sizeof(__u64));
+ (void)munmap(umem->comp->ring - off.cr.desc,
+ off.cr.desc +
+ umem->config.comp_size * sizeof(__u64));
}
close(umem->fd);
@@ -699,6 +702,7 @@ int xsk_umem__delete(struct xsk_umem *umem)
void xsk_socket__delete(struct xsk_socket *xsk)
{
+ size_t desc_sz = sizeof(struct xdp_desc);
struct xdp_mmap_offsets off;
socklen_t optlen;
int err;
@@ -706,19 +710,23 @@ void xsk_socket__delete(struct xsk_socket *xsk)
if (!xsk)
return;
- (void)xsk_update_bpf_maps(xsk, 0, 0);
+ xsk_clear_bpf_maps(xsk);
+ xsk_delete_bpf_maps(xsk);
optlen = sizeof(off);
err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
if (!err) {
- if (xsk->rx)
- munmap(xsk->rx->ring,
- off.rx.desc +
- xsk->config.rx_size * sizeof(struct xdp_desc));
- if (xsk->tx)
- munmap(xsk->tx->ring,
- off.tx.desc +
- xsk->config.tx_size * sizeof(struct xdp_desc));
+ if (xsk->rx) {
+ (void)munmap(xsk->rx->ring - off.rx.desc,
+ off.rx.desc +
+ xsk->config.rx_size * desc_sz);
+ }
+ if (xsk->tx) {
+ (void)munmap(xsk->tx->ring - off.tx.desc,
+ off.tx.desc +
+ xsk->config.tx_size * desc_sz);
+ }
+
}
xsk->umem->refcount--;