summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2020-06-01 14:57:15 -0700
committerAlexei Starovoitov <ast@kernel.org>2020-06-01 15:00:20 -0700
commitc48a24f00e7a8b90c817e3f7aa800a3150bb83d0 (patch)
treebba2031049e20cce4e5ff3d21f6749c6b9e625b5
parentbb2359f4dbe98e8863b4e885fc09269ef4682ec3 (diff)
parent9c441fe4c06a553ad770b6f21616327a3badf793 (diff)
Merge branch 'bpf_setsockopt-SO_BINDTODEVICE'
Ferenc Fejes says: ==================== This option makes it possible to programatically bind sockets to netdevices. With the help of this option sockets of VRF unaware applications could be distributed between multiple VRFs with an eBPF program. This lets the applications benefit from multiple possible routes. v2: - splitting up the patch to three parts - lock_sk parameter for optional locking in sock_bindtoindex - Stanislav Fomichev - testing the SO_BINDTODEVICE option - Andrii Nakryiko ==================== Signed-off-by: Alexei Starovoitov <ast@kernel.org>
-rw-r--r--include/net/sock.h2
-rw-r--r--net/core/filter.c28
-rw-r--r--net/core/sock.c10
-rw-r--r--net/ipv4/udp_tunnel.c2
-rw-r--r--net/ipv6/ip6_udp_tunnel.c2
-rw-r--r--tools/testing/selftests/bpf/progs/connect4_prog.c33
6 files changed, 69 insertions, 8 deletions
diff --git a/include/net/sock.h b/include/net/sock.h
index 6e9f713a7860..c53cc42b5ab9 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2690,7 +2690,7 @@ static inline bool sk_dev_equal_l3scope(struct sock *sk, int dif)
void sock_def_readable(struct sock *sk);
-int sock_bindtoindex(struct sock *sk, int ifindex);
+int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk);
void sock_enable_timestamps(struct sock *sk);
void sock_no_linger(struct sock *sk);
void sock_set_keepalive(struct sock *sk);
diff --git a/net/core/filter.c b/net/core/filter.c
index 85ff827aab73..ae82bcb03124 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4248,6 +4248,9 @@ static const struct bpf_func_proto bpf_get_socket_uid_proto = {
static int _bpf_setsockopt(struct sock *sk, int level, int optname,
char *optval, int optlen, u32 flags)
{
+ char devname[IFNAMSIZ];
+ struct net *net;
+ int ifindex;
int ret = 0;
int val;
@@ -4257,7 +4260,7 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
sock_owned_by_me(sk);
if (level == SOL_SOCKET) {
- if (optlen != sizeof(int))
+ if (optlen != sizeof(int) && optname != SO_BINDTODEVICE)
return -EINVAL;
val = *((int *)optval);
@@ -4298,6 +4301,29 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
sk_dst_reset(sk);
}
break;
+ case SO_BINDTODEVICE:
+ ret = -ENOPROTOOPT;
+#ifdef CONFIG_NETDEVICES
+ optlen = min_t(long, optlen, IFNAMSIZ - 1);
+ strncpy(devname, optval, optlen);
+ devname[optlen] = 0;
+
+ ifindex = 0;
+ if (devname[0] != '\0') {
+ struct net_device *dev;
+
+ ret = -ENODEV;
+
+ net = sock_net(sk);
+ dev = dev_get_by_name(net, devname);
+ if (!dev)
+ break;
+ ifindex = dev->ifindex;
+ dev_put(dev);
+ }
+ ret = sock_bindtoindex(sk, ifindex, false);
+#endif
+ break;
default:
ret = -EINVAL;
}
diff --git a/net/core/sock.c b/net/core/sock.c
index 61ec573221a6..6c4acf1f0220 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -594,13 +594,15 @@ out:
return ret;
}
-int sock_bindtoindex(struct sock *sk, int ifindex)
+int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk)
{
int ret;
- lock_sock(sk);
+ if (lock_sk)
+ lock_sock(sk);
ret = sock_bindtoindex_locked(sk, ifindex);
- release_sock(sk);
+ if (lock_sk)
+ release_sock(sk);
return ret;
}
@@ -646,7 +648,7 @@ static int sock_setbindtodevice(struct sock *sk, char __user *optval,
goto out;
}
- return sock_bindtoindex(sk, index);
+ return sock_bindtoindex(sk, index, true);
out:
#endif
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
index 2158e8bddf41..3eecba0874aa 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c
@@ -22,7 +22,7 @@ int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg,
goto error;
if (cfg->bind_ifindex) {
- err = sock_bindtoindex(sock->sk, cfg->bind_ifindex);
+ err = sock_bindtoindex(sock->sk, cfg->bind_ifindex, true);
if (err < 0)
goto error;
}
diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
index 2e0ad1bc84a8..cdc4d4ee2420 100644
--- a/net/ipv6/ip6_udp_tunnel.c
+++ b/net/ipv6/ip6_udp_tunnel.c
@@ -30,7 +30,7 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
goto error;
}
if (cfg->bind_ifindex) {
- err = sock_bindtoindex(sock->sk, cfg->bind_ifindex);
+ err = sock_bindtoindex(sock->sk, cfg->bind_ifindex, true);
if (err < 0)
goto error;
}
diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c
index c2c85c31cffd..1ab2c5eba86c 100644
--- a/tools/testing/selftests/bpf/progs/connect4_prog.c
+++ b/tools/testing/selftests/bpf/progs/connect4_prog.c
@@ -9,6 +9,8 @@
#include <linux/in6.h>
#include <sys/socket.h>
#include <netinet/tcp.h>
+#include <linux/if.h>
+#include <errno.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
@@ -21,6 +23,10 @@
#define TCP_CA_NAME_MAX 16
#endif
+#ifndef IFNAMSIZ
+#define IFNAMSIZ 16
+#endif
+
int _version SEC("version") = 1;
__attribute__ ((noinline))
@@ -75,6 +81,29 @@ static __inline int set_cc(struct bpf_sock_addr *ctx)
return 0;
}
+static __inline int bind_to_device(struct bpf_sock_addr *ctx)
+{
+ char veth1[IFNAMSIZ] = "test_sock_addr1";
+ char veth2[IFNAMSIZ] = "test_sock_addr2";
+ char missing[IFNAMSIZ] = "nonexistent_dev";
+ char del_bind[IFNAMSIZ] = "";
+
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+ &veth1, sizeof(veth1)))
+ return 1;
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+ &veth2, sizeof(veth2)))
+ return 1;
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+ &missing, sizeof(missing)) != -ENODEV)
+ return 1;
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+ &del_bind, sizeof(del_bind)))
+ return 1;
+
+ return 0;
+}
+
SEC("cgroup/connect4")
int connect_v4_prog(struct bpf_sock_addr *ctx)
{
@@ -88,6 +117,10 @@ int connect_v4_prog(struct bpf_sock_addr *ctx)
tuple.ipv4.daddr = bpf_htonl(DST_REWRITE_IP4);
tuple.ipv4.dport = bpf_htons(DST_REWRITE_PORT4);
+ /* Bind to device and unbind it. */
+ if (bind_to_device(ctx))
+ return 0;
+
if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
return 0;
else if (ctx->type == SOCK_STREAM)