summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/net/sock.h11
-rw-r--r--include/uapi/linux/bpf.h25
-rw-r--r--net/core/filter.c31
-rw-r--r--net/core/sock.c11
-rw-r--r--net/ipv4/ip_input.c3
-rw-r--r--net/ipv6/ip6_input.c3
-rw-r--r--net/sched/act_bpf.c3
-rw-r--r--tools/include/uapi/linux/bpf.h25
8 files changed, 108 insertions, 4 deletions
diff --git a/include/net/sock.h b/include/net/sock.h
index b5cca7bae69b..dc398cee7873 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1659,6 +1659,7 @@ void sock_rfree(struct sk_buff *skb);
void sock_efree(struct sk_buff *skb);
#ifdef CONFIG_INET
void sock_edemux(struct sk_buff *skb);
+void sock_pfree(struct sk_buff *skb);
#else
#define sock_edemux sock_efree
#endif
@@ -2526,6 +2527,16 @@ void sock_net_set(struct sock *sk, struct net *net)
write_pnet(&sk->sk_net, net);
}
+static inline bool
+skb_sk_is_prefetched(struct sk_buff *skb)
+{
+#ifdef CONFIG_INET
+ return skb->destructor == sock_pfree;
+#else
+ return false;
+#endif /* CONFIG_INET */
+}
+
static inline struct sock *skb_steal_sock(struct sk_buff *skb)
{
if (skb->sk) {
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f1fbc36f58d3..9f786a5a44ac 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2983,6 +2983,28 @@ union bpf_attr {
* **bpf_get_current_cgroup_id**\ ().
* Return
* The id is returned or 0 in case the id could not be retrieved.
+ *
+ * int bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags)
+ * Description
+ * Assign the *sk* to the *skb*. When combined with appropriate
+ * routing configuration to receive the packet towards the socket,
+ * will cause *skb* to be delivered to the specified socket.
+ * Subsequent redirection of *skb* via **bpf_redirect**\ (),
+ * **bpf_clone_redirect**\ () or other methods outside of BPF may
+ * interfere with successful delivery to the socket.
+ *
+ * This operation is only valid from TC ingress path.
+ *
+ * The *flags* argument must be zero.
+ * Return
+ * 0 on success, or a negative errno in case of failure.
+ *
+ * * **-EINVAL** Unsupported flags specified.
+ * * **-ENOENT** Socket is unavailable for assignment.
+ * * **-ENETUNREACH** Socket is unreachable (wrong netns).
+ * * **-EOPNOTSUPP** Unsupported operation, for example a
+ * call from outside of TC ingress.
+ * * **-ESOCKTNOSUPPORT** Socket type not supported (reuseport).
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -3108,7 +3130,8 @@ union bpf_attr {
FN(get_ns_current_pid_tgid), \
FN(xdp_output), \
FN(get_netns_cookie), \
- FN(get_current_ancestor_cgroup_id),
+ FN(get_current_ancestor_cgroup_id), \
+ FN(sk_assign),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
diff --git a/net/core/filter.c b/net/core/filter.c
index bb4a196c8809..ac5c1633f8d2 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5918,6 +5918,35 @@ static const struct bpf_func_proto bpf_tcp_gen_syncookie_proto = {
.arg5_type = ARG_CONST_SIZE,
};
+BPF_CALL_3(bpf_sk_assign, struct sk_buff *, skb, struct sock *, sk, u64, flags)
+{
+ if (flags != 0)
+ return -EINVAL;
+ if (!skb_at_tc_ingress(skb))
+ return -EOPNOTSUPP;
+ if (unlikely(dev_net(skb->dev) != sock_net(sk)))
+ return -ENETUNREACH;
+ if (unlikely(sk->sk_reuseport))
+ return -ESOCKTNOSUPPORT;
+ if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
+ return -ENOENT;
+
+ skb_orphan(skb);
+ skb->sk = sk;
+ skb->destructor = sock_pfree;
+
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_sk_assign_proto = {
+ .func = bpf_sk_assign,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_SOCK_COMMON,
+ .arg3_type = ARG_ANYTHING,
+};
+
#endif /* CONFIG_INET */
bool bpf_helper_changes_pkt_data(void *func)
@@ -6249,6 +6278,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_skb_ecn_set_ce_proto;
case BPF_FUNC_tcp_gen_syncookie:
return &bpf_tcp_gen_syncookie_proto;
+ case BPF_FUNC_sk_assign:
+ return &bpf_sk_assign_proto;
#endif
default:
return bpf_base_func_proto(func_id);
diff --git a/net/core/sock.c b/net/core/sock.c
index 0fc8937a7ff4..87e3a03c9056 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2071,6 +2071,17 @@ void sock_efree(struct sk_buff *skb)
}
EXPORT_SYMBOL(sock_efree);
+/* Buffer destructor for prefetch/receive path where reference count may
+ * not be held, e.g. for listen sockets.
+ */
+#ifdef CONFIG_INET
+void sock_pfree(struct sk_buff *skb)
+{
+ sock_gen_put(skb->sk);
+}
+EXPORT_SYMBOL(sock_pfree);
+#endif /* CONFIG_INET */
+
kuid_t sock_i_uid(struct sock *sk)
{
kuid_t uid;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index aa438c6758a7..b0c244af1e4d 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -509,7 +509,8 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net)
IPCB(skb)->iif = skb->skb_iif;
/* Must drop socket now because of tproxy. */
- skb_orphan(skb);
+ if (!skb_sk_is_prefetched(skb))
+ skb_orphan(skb);
return skb;
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 7b089d0ac8cd..e96304d8a4a7 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -285,7 +285,8 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev,
rcu_read_unlock();
/* Must drop socket now because of tproxy. */
- skb_orphan(skb);
+ if (!skb_sk_is_prefetched(skb))
+ skb_orphan(skb);
return skb;
err:
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 46f47e58b3be..54d5652cfe6c 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -12,6 +12,7 @@
#include <linux/bpf.h>
#include <net/netlink.h>
+#include <net/sock.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
@@ -53,6 +54,8 @@ static int tcf_bpf_act(struct sk_buff *skb, const struct tc_action *act,
bpf_compute_data_pointers(skb);
filter_res = BPF_PROG_RUN(filter, skb);
}
+ if (skb_sk_is_prefetched(skb) && filter_res != TC_ACT_OK)
+ skb_orphan(skb);
rcu_read_unlock();
/* A BPF program may overwrite the default action opcode.
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index f1fbc36f58d3..9f786a5a44ac 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2983,6 +2983,28 @@ union bpf_attr {
* **bpf_get_current_cgroup_id**\ ().
* Return
* The id is returned or 0 in case the id could not be retrieved.
+ *
+ * int bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags)
+ * Description
+ * Assign the *sk* to the *skb*. When combined with appropriate
+ * routing configuration to receive the packet towards the socket,
+ * will cause *skb* to be delivered to the specified socket.
+ * Subsequent redirection of *skb* via **bpf_redirect**\ (),
+ * **bpf_clone_redirect**\ () or other methods outside of BPF may
+ * interfere with successful delivery to the socket.
+ *
+ * This operation is only valid from TC ingress path.
+ *
+ * The *flags* argument must be zero.
+ * Return
+ * 0 on success, or a negative errno in case of failure.
+ *
+ * * **-EINVAL** Unsupported flags specified.
+ * * **-ENOENT** Socket is unavailable for assignment.
+ * * **-ENETUNREACH** Socket is unreachable (wrong netns).
+ * * **-EOPNOTSUPP** Unsupported operation, for example a
+ * call from outside of TC ingress.
+ * * **-ESOCKTNOSUPPORT** Socket type not supported (reuseport).
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -3108,7 +3130,8 @@ union bpf_attr {
FN(get_ns_current_pid_tgid), \
FN(xdp_output), \
FN(get_netns_cookie), \
- FN(get_current_ancestor_cgroup_id),
+ FN(get_current_ancestor_cgroup_id), \
+ FN(sk_assign),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call