summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/Kconfig8
-rw-r--r--net/ipv4/Makefile3
-rw-r--r--net/ipv4/af_inet.c15
-rw-r--r--net/ipv4/bpfilter/sockopt.c36
-rw-r--r--net/ipv4/cipso_ipv4.c10
-rw-r--r--net/ipv4/fib_trie.c2
-rw-r--r--net/ipv4/icmp.c59
-rw-r--r--net/ipv4/inet_connection_sock.c61
-rw-r--r--net/ipv4/inet_diag.c65
-rw-r--r--net/ipv4/inet_hashtables.c60
-rw-r--r--net/ipv4/ip_options.c43
-rw-r--r--net/ipv4/ip_output.c6
-rw-r--r--net/ipv4/ip_sockglue.c601
-rw-r--r--net/ipv4/ipmr.c17
-rw-r--r--net/ipv4/netfilter/arp_tables.c107
-rw-r--r--net/ipv4/netfilter/ip_tables.c104
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c2
-rw-r--r--net/ipv4/netfilter/nf_reject_ipv4.c21
-rw-r--r--net/ipv4/proc.c1
-rw-r--r--net/ipv4/raw.c30
-rw-r--r--net/ipv4/tcp.c60
-rw-r--r--net/ipv4/tcp_highspeed.c2
-rw-r--r--net/ipv4/tcp_htcp.c2
-rw-r--r--net/ipv4/tcp_input.c144
-rw-r--r--net/ipv4/tcp_ipv4.c167
-rw-r--r--net/ipv4/tcp_output.c14
-rw-r--r--net/ipv4/tcp_timer.c2
-rw-r--r--net/ipv4/tcp_veno.c2
-rw-r--r--net/ipv4/udp.c283
-rw-r--r--net/ipv4/udp_impl.h10
-rw-r--r--net/ipv4/udp_tunnel_core.c (renamed from net/ipv4/udp_tunnel.c)0
-rw-r--r--net/ipv4/udp_tunnel_nic.c890
-rw-r--r--net/ipv4/udp_tunnel_stub.c7
-rw-r--r--net/ipv4/udplite.c4
34 files changed, 1991 insertions, 847 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index e64e59b536d3..60db5a6487cc 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -10,7 +10,7 @@ config IP_MULTICAST
intend to participate in the MBONE, a high bandwidth network on top
of the Internet which carries audio and video broadcasts. More
information about the MBONE is on the WWW at
- <http://www.savetz.com/mbone/>. For most people, it's safe to say N.
+ <https://www.savetz.com/mbone/>. For most people, it's safe to say N.
config IP_ADVANCED_ROUTER
bool "IP: advanced router"
@@ -73,7 +73,7 @@ config IP_MULTIPLE_TABLES
If you need more information, see the Linux Advanced
Routing and Traffic Control documentation at
- <http://lartc.org/howto/lartc.rpdb.html>
+ <https://lartc.org/howto/lartc.rpdb.html>
If unsure, say N.
@@ -280,7 +280,7 @@ config SYN_COOKIES
continue to connect, even when your machine is under attack. There
is no need for the legitimate users to change their TCP/IP software;
SYN cookies work transparently to them. For technical information
- about SYN cookies, check out <http://cr.yp.to/syncookies.html>.
+ about SYN cookies, check out <https://cr.yp.to/syncookies.html>.
If you are SYN flooded, the source address reported by the kernel is
likely to have been forged by the attacker; it is only reported as
@@ -525,7 +525,7 @@ config TCP_CONG_HSTCP
A modification to TCP's congestion control mechanism for use
with large congestion windows. A table indicates how much to
increase the congestion window by when an ACK is received.
- For more detail see http://www.icir.org/floyd/hstcp.html
+ For more detail see https://www.icir.org/floyd/hstcp.html
config TCP_CONG_HYBLA
tristate "TCP-Hybla congestion control algorithm"
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 9e1a186a3671..5b77a46885b9 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -14,7 +14,7 @@ obj-y := route.o inetpeer.o protocol.o \
udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \
fib_frontend.o fib_semantics.o fib_trie.o fib_notifier.o \
inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o \
- metrics.o netlink.o nexthop.o
+ metrics.o netlink.o nexthop.o udp_tunnel_stub.o
obj-$(CONFIG_BPFILTER) += bpfilter/
@@ -29,6 +29,7 @@ gre-y := gre_demux.o
obj-$(CONFIG_NET_FOU) += fou.o
obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o
obj-$(CONFIG_NET_IPGRE) += ip_gre.o
+udp_tunnel-y := udp_tunnel_core.o udp_tunnel_nic.o
obj-$(CONFIG_NET_UDP_TUNNEL) += udp_tunnel.o
obj-$(CONFIG_NET_IPVTI) += ip_vti.o
obj-$(CONFIG_SYN_COOKIES) += syncookies.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 02aa5cb3a4fd..4307503a6f0b 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -411,6 +411,9 @@ int inet_release(struct socket *sock)
if (sk) {
long timeout;
+ if (!sk->sk_kern_sock)
+ BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk);
+
/* Applications forget to leave groups before exiting */
ip_mc_drop_socket(sk);
@@ -1040,8 +1043,6 @@ const struct proto_ops inet_stream_ops = {
.sendpage_locked = tcp_sendpage_locked,
.peek_len = tcp_peek_len,
#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_sock_common_setsockopt,
- .compat_getsockopt = compat_sock_common_getsockopt,
.compat_ioctl = inet_compat_ioctl,
#endif
.set_rcvlowat = tcp_set_rcvlowat,
@@ -1070,8 +1071,6 @@ const struct proto_ops inet_dgram_ops = {
.sendpage = inet_sendpage,
.set_peek_off = sk_set_peek_off,
#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_sock_common_setsockopt,
- .compat_getsockopt = compat_sock_common_getsockopt,
.compat_ioctl = inet_compat_ioctl,
#endif
};
@@ -1102,8 +1101,6 @@ static const struct proto_ops inet_sockraw_ops = {
.mmap = sock_no_mmap,
.sendpage = inet_sendpage,
#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_sock_common_setsockopt,
- .compat_getsockopt = compat_sock_common_getsockopt,
.compat_ioctl = inet_compat_ioctl,
#endif
};
@@ -1432,10 +1429,6 @@ static struct sk_buff *ipip_gso_segment(struct sk_buff *skb,
return inet_gso_segment(skb, features);
}
-INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *,
- struct sk_buff *));
-INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *,
- struct sk_buff *));
struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
{
const struct net_offload *ops;
@@ -1608,8 +1601,6 @@ int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
return -EINVAL;
}
-INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *, int));
-INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int));
int inet_gro_complete(struct sk_buff *skb, int nhoff)
{
__be16 newlen = htons(skb->len - nhoff);
diff --git a/net/ipv4/bpfilter/sockopt.c b/net/ipv4/bpfilter/sockopt.c
index 0480918bfc7c..94f18d2352d0 100644
--- a/net/ipv4/bpfilter/sockopt.c
+++ b/net/ipv4/bpfilter/sockopt.c
@@ -12,18 +12,16 @@
struct bpfilter_umh_ops bpfilter_ops;
EXPORT_SYMBOL_GPL(bpfilter_ops);
-static void bpfilter_umh_cleanup(struct umh_info *info)
+void bpfilter_umh_cleanup(struct umd_info *info)
{
- mutex_lock(&bpfilter_ops.lock);
- bpfilter_ops.stop = true;
fput(info->pipe_to_umh);
fput(info->pipe_from_umh);
- info->pid = 0;
- mutex_unlock(&bpfilter_ops.lock);
+ put_pid(info->tgid);
+ info->tgid = NULL;
}
+EXPORT_SYMBOL_GPL(bpfilter_umh_cleanup);
-static int bpfilter_mbox_request(struct sock *sk, int optname,
- char __user *optval,
+static int bpfilter_mbox_request(struct sock *sk, int optname, sockptr_t optval,
unsigned int optlen, bool is_set)
{
int err;
@@ -38,7 +36,11 @@ static int bpfilter_mbox_request(struct sock *sk, int optname,
goto out;
}
}
- if (bpfilter_ops.stop) {
+ if (bpfilter_ops.info.tgid &&
+ thread_group_exited(bpfilter_ops.info.tgid))
+ bpfilter_umh_cleanup(&bpfilter_ops.info);
+
+ if (!bpfilter_ops.info.tgid) {
err = bpfilter_ops.start();
if (err)
goto out;
@@ -49,29 +51,31 @@ out:
return err;
}
-int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char __user *optval,
+int bpfilter_ip_set_sockopt(struct sock *sk, int optname, sockptr_t optval,
unsigned int optlen)
{
return bpfilter_mbox_request(sk, optname, optval, optlen, true);
}
-int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval,
- int __user *optlen)
+int bpfilter_ip_get_sockopt(struct sock *sk, int optname,
+ char __user *user_optval, int __user *optlen)
{
- int len;
+ sockptr_t optval;
+ int err, len;
if (get_user(len, optlen))
return -EFAULT;
-
+ err = init_user_sockptr(&optval, user_optval);
+ if (err)
+ return err;
return bpfilter_mbox_request(sk, optname, optval, len, false);
}
static int __init bpfilter_sockopt_init(void)
{
mutex_init(&bpfilter_ops.lock);
- bpfilter_ops.stop = true;
- bpfilter_ops.info.cmdline = "bpfilter_umh";
- bpfilter_ops.info.cleanup = &bpfilter_umh_cleanup;
+ bpfilter_ops.info.tgid = NULL;
+ bpfilter_ops.info.driver_name = "bpfilter_umh";
return 0;
}
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index a23094b050f8..2eb71579f4d2 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -10,9 +10,9 @@
*
* The CIPSO draft specification can be found in the kernel's Documentation
* directory as well as the following URL:
- * http://tools.ietf.org/id/draft-ietf-cipso-ipsecurity-01.txt
+ * https://tools.ietf.org/id/draft-ietf-cipso-ipsecurity-01.txt
* The FIPS-188 specification can be found at the following URL:
- * http://www.itl.nist.gov/fipspubs/fip188.htm
+ * https://www.itl.nist.gov/fipspubs/fip188.htm
*
* Author: Paul Moore <paul.moore@hp.com>
*/
@@ -283,7 +283,7 @@ static int cipso_v4_cache_check(const unsigned char *key,
/**
* cipso_v4_cache_add - Add an entry to the CIPSO cache
- * @skb: the packet
+ * @cipso_ptr: pointer to CIPSO IP option
* @secattr: the packet's security attributes
*
* Description:
@@ -1535,6 +1535,7 @@ unsigned char *cipso_v4_optptr(const struct sk_buff *skb)
/**
* cipso_v4_validate - Validate a CIPSO option
+ * @skb: the packet
* @option: the start of the option, on error it is set to point to the error
*
* Description:
@@ -2066,7 +2067,7 @@ void cipso_v4_sock_delattr(struct sock *sk)
/**
* cipso_v4_req_delattr - Delete the CIPSO option from a request socket
- * @reg: the request socket
+ * @req: the request socket
*
* Description:
* Removes the CIPSO option from a request socket, if present.
@@ -2158,6 +2159,7 @@ int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr)
/**
* cipso_v4_skbuff_setattr - Set the CIPSO option on a packet
* @skb: the packet
+ * @doi_def: the DOI structure
* @secattr: the security attributes
*
* Description:
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 248f1c1959a6..dcb0802a47d5 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -13,7 +13,7 @@
*
* An experimental study of compression methods for dynamic tries
* Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
- * http://www.csc.kth.se/~snilsson/software/dyntrie2/
+ * https://www.csc.kth.se/~snilsson/software/dyntrie2/
*
* IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
* IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index e30515f89802..cf36f955bfe6 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -1116,6 +1116,65 @@ error:
goto drop;
}
+static bool ip_icmp_error_rfc4884_validate(const struct sk_buff *skb, int off)
+{
+ struct icmp_extobj_hdr *objh, _objh;
+ struct icmp_ext_hdr *exth, _exth;
+ u16 olen;
+
+ exth = skb_header_pointer(skb, off, sizeof(_exth), &_exth);
+ if (!exth)
+ return false;
+ if (exth->version != 2)
+ return true;
+
+ if (exth->checksum &&
+ csum_fold(skb_checksum(skb, off, skb->len - off, 0)))
+ return false;
+
+ off += sizeof(_exth);
+ while (off < skb->len) {
+ objh = skb_header_pointer(skb, off, sizeof(_objh), &_objh);
+ if (!objh)
+ return false;
+
+ olen = ntohs(objh->length);
+ if (olen < sizeof(_objh))
+ return false;
+
+ off += olen;
+ if (off > skb->len)
+ return false;
+ }
+
+ return true;
+}
+
+void ip_icmp_error_rfc4884(const struct sk_buff *skb,
+ struct sock_ee_data_rfc4884 *out,
+ int thlen, int off)
+{
+ int hlen;
+
+ /* original datagram headers: end of icmph to payload (skb->data) */
+ hlen = -skb_transport_offset(skb) - thlen;
+
+ /* per rfc 4884: minimal datagram length of 128 bytes */
+ if (off < 128 || off < hlen)
+ return;
+
+ /* kernel has stripped headers: return payload offset in bytes */
+ off -= hlen;
+ if (off + sizeof(struct icmp_ext_hdr) > skb->len)
+ return;
+
+ out->len = off;
+
+ if (!ip_icmp_error_rfc4884_validate(skb, off))
+ out->flags |= SO_EE_RFC4884_FLAG_INVALID;
+}
+EXPORT_SYMBOL_GPL(ip_icmp_error_rfc4884);
+
int icmp_err(struct sk_buff *skb, u32 info)
{
struct iphdr *iph = (struct iphdr *)skb->data;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index afaf582a5aa9..d1a3913eebe0 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -648,20 +648,19 @@ no_route:
EXPORT_SYMBOL_GPL(inet_csk_route_child_sock);
/* Decide when to expire the request and when to resend SYN-ACK */
-static inline void syn_ack_recalc(struct request_sock *req, const int thresh,
- const int max_retries,
- const u8 rskq_defer_accept,
- int *expire, int *resend)
+static void syn_ack_recalc(struct request_sock *req,
+ const int max_syn_ack_retries,
+ const u8 rskq_defer_accept,
+ int *expire, int *resend)
{
if (!rskq_defer_accept) {
- *expire = req->num_timeout >= thresh;
+ *expire = req->num_timeout >= max_syn_ack_retries;
*resend = 1;
return;
}
- *expire = req->num_timeout >= thresh &&
- (!inet_rsk(req)->acked || req->num_timeout >= max_retries);
- /*
- * Do not resend while waiting for data after ACK,
+ *expire = req->num_timeout >= max_syn_ack_retries &&
+ (!inet_rsk(req)->acked || req->num_timeout >= rskq_defer_accept);
+ /* Do not resend while waiting for data after ACK,
* start to resend on end of deferring period to give
* last chance for data or ACK to create established socket.
*/
@@ -720,15 +719,12 @@ static void reqsk_timer_handler(struct timer_list *t)
struct net *net = sock_net(sk_listener);
struct inet_connection_sock *icsk = inet_csk(sk_listener);
struct request_sock_queue *queue = &icsk->icsk_accept_queue;
- int qlen, expire = 0, resend = 0;
- int max_retries, thresh;
- u8 defer_accept;
+ int max_syn_ack_retries, qlen, expire = 0, resend = 0;
if (inet_sk_state_load(sk_listener) != TCP_LISTEN)
goto drop;
- max_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries;
- thresh = max_retries;
+ max_syn_ack_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries;
/* Normally all the openreqs are young and become mature
* (i.e. converted to established socket) for first timeout.
* If synack was not acknowledged for 1 second, it means
@@ -750,17 +746,14 @@ static void reqsk_timer_handler(struct timer_list *t)
if ((qlen << 1) > max(8U, READ_ONCE(sk_listener->sk_max_ack_backlog))) {
int young = reqsk_queue_len_young(queue) << 1;
- while (thresh > 2) {
+ while (max_syn_ack_retries > 2) {
if (qlen < young)
break;
- thresh--;
+ max_syn_ack_retries--;
young <<= 1;
}
}
- defer_accept = READ_ONCE(queue->rskq_defer_accept);
- if (defer_accept)
- max_retries = defer_accept;
- syn_ack_recalc(req, thresh, max_retries, defer_accept,
+ syn_ack_recalc(req, max_syn_ack_retries, READ_ONCE(queue->rskq_defer_accept),
&expire, &resend);
req->rsk_ops->syn_ack_timeout(req);
if (!expire &&
@@ -1064,34 +1057,6 @@ void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr)
}
EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr);
-#ifdef CONFIG_COMPAT
-int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen)
-{
- const struct inet_connection_sock *icsk = inet_csk(sk);
-
- if (icsk->icsk_af_ops->compat_getsockopt)
- return icsk->icsk_af_ops->compat_getsockopt(sk, level, optname,
- optval, optlen);
- return icsk->icsk_af_ops->getsockopt(sk, level, optname,
- optval, optlen);
-}
-EXPORT_SYMBOL_GPL(inet_csk_compat_getsockopt);
-
-int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen)
-{
- const struct inet_connection_sock *icsk = inet_csk(sk);
-
- if (icsk->icsk_af_ops->compat_setsockopt)
- return icsk->icsk_af_ops->compat_setsockopt(sk, level, optname,
- optval, optlen);
- return icsk->icsk_af_ops->setsockopt(sk, level, optname,
- optval, optlen);
-}
-EXPORT_SYMBOL_GPL(inet_csk_compat_setsockopt);
-#endif
-
static struct dst_entry *inet_csk_rebuild_route(struct sock *sk, struct flowi *fl)
{
const struct inet_sock *inet = inet_sk(sk);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 125f4f8a36b4..4a98dd736270 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -52,6 +52,11 @@ static DEFINE_MUTEX(inet_diag_table_mutex);
static const struct inet_diag_handler *inet_diag_lock_handler(int proto)
{
+ if (proto < 0 || proto >= IPPROTO_MAX) {
+ mutex_lock(&inet_diag_table_mutex);
+ return ERR_PTR(-ENOENT);
+ }
+
if (!inet_diag_table[proto])
sock_load_diag_module(AF_INET, proto);
@@ -181,6 +186,28 @@ errout:
}
EXPORT_SYMBOL_GPL(inet_diag_msg_attrs_fill);
+static void inet_diag_parse_attrs(const struct nlmsghdr *nlh, int hdrlen,
+ struct nlattr **req_nlas)
+{
+ struct nlattr *nla;
+ int remaining;
+
+ nlmsg_for_each_attr(nla, nlh, hdrlen, remaining) {
+ int type = nla_type(nla);
+
+ if (type < __INET_DIAG_REQ_MAX)
+ req_nlas[type] = nla;
+ }
+}
+
+static int inet_diag_get_protocol(const struct inet_diag_req_v2 *req,
+ const struct inet_diag_dump_data *data)
+{
+ if (data->req_nlas[INET_DIAG_REQ_PROTOCOL])
+ return nla_get_u32(data->req_nlas[INET_DIAG_REQ_PROTOCOL]);
+ return req->sdiag_protocol;
+}
+
#define MAX_DUMP_ALLOC_SIZE (KMALLOC_MAX_SIZE - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
@@ -198,7 +225,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
void *info = NULL;
cb_data = cb->data;
- handler = inet_diag_table[req->sdiag_protocol];
+ handler = inet_diag_table[inet_diag_get_protocol(req, cb_data)];
BUG_ON(!handler);
nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
@@ -539,20 +566,25 @@ EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk);
static int inet_diag_cmd_exact(int cmd, struct sk_buff *in_skb,
const struct nlmsghdr *nlh,
+ int hdrlen,
const struct inet_diag_req_v2 *req)
{
const struct inet_diag_handler *handler;
- int err;
+ struct inet_diag_dump_data dump_data;
+ int err, protocol;
- handler = inet_diag_lock_handler(req->sdiag_protocol);
+ memset(&dump_data, 0, sizeof(dump_data));
+ inet_diag_parse_attrs(nlh, hdrlen, dump_data.req_nlas);
+ protocol = inet_diag_get_protocol(req, &dump_data);
+
+ handler = inet_diag_lock_handler(protocol);
if (IS_ERR(handler)) {
err = PTR_ERR(handler);
} else if (cmd == SOCK_DIAG_BY_FAMILY) {
- struct inet_diag_dump_data empty_dump_data = {};
struct netlink_callback cb = {
.nlh = nlh,
.skb = in_skb,
- .data = &empty_dump_data,
+ .data = &dump_data,
};
err = handler->dump_one(&cb, req);
} else if (cmd == SOCK_DESTROY && handler->destroy) {
@@ -1103,13 +1135,16 @@ EXPORT_SYMBOL_GPL(inet_diag_dump_icsk);
static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
const struct inet_diag_req_v2 *r)
{
+ struct inet_diag_dump_data *cb_data = cb->data;
const struct inet_diag_handler *handler;
u32 prev_min_dump_alloc;
- int err = 0;
+ int protocol, err = 0;
+
+ protocol = inet_diag_get_protocol(r, cb_data);
again:
prev_min_dump_alloc = cb->min_dump_alloc;
- handler = inet_diag_lock_handler(r->sdiag_protocol);
+ handler = inet_diag_lock_handler(protocol);
if (!IS_ERR(handler))
handler->dump(skb, cb, r);
else
@@ -1139,19 +1174,13 @@ static int __inet_diag_dump_start(struct netlink_callback *cb, int hdrlen)
struct inet_diag_dump_data *cb_data;
struct sk_buff *skb = cb->skb;
struct nlattr *nla;
- int rem, err;
+ int err;
cb_data = kzalloc(sizeof(*cb_data), GFP_KERNEL);
if (!cb_data)
return -ENOMEM;
- nla_for_each_attr(nla, nlmsg_attrdata(nlh, hdrlen),
- nlmsg_attrlen(nlh, hdrlen), rem) {
- int type = nla_type(nla);
-
- if (type < __INET_DIAG_REQ_MAX)
- cb_data->req_nlas[type] = nla;
- }
+ inet_diag_parse_attrs(nlh, hdrlen, cb_data->req_nlas);
nla = cb_data->inet_diag_nla_bc;
if (nla) {
@@ -1237,7 +1266,8 @@ static int inet_diag_get_exact_compat(struct sk_buff *in_skb,
req.idiag_states = rc->idiag_states;
req.id = rc->id;
- return inet_diag_cmd_exact(SOCK_DIAG_BY_FAMILY, in_skb, nlh, &req);
+ return inet_diag_cmd_exact(SOCK_DIAG_BY_FAMILY, in_skb, nlh,
+ sizeof(struct inet_diag_req), &req);
}
static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh)
@@ -1279,7 +1309,8 @@ static int inet_diag_handler_cmd(struct sk_buff *skb, struct nlmsghdr *h)
return netlink_dump_start(net->diag_nlsk, skb, h, &c);
}
- return inet_diag_cmd_exact(h->nlmsg_type, skb, h, nlmsg_data(h));
+ return inet_diag_cmd_exact(h->nlmsg_type, skb, h, hdrlen,
+ nlmsg_data(h));
}
static
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 2bbaaf0c7176..4eb4cd8d20dd 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -246,6 +246,21 @@ static inline int compute_score(struct sock *sk, struct net *net,
return score;
}
+static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk,
+ struct sk_buff *skb, int doff,
+ __be32 saddr, __be16 sport,
+ __be32 daddr, unsigned short hnum)
+{
+ struct sock *reuse_sk = NULL;
+ u32 phash;
+
+ if (sk->sk_reuseport) {
+ phash = inet_ehashfn(net, daddr, hnum, saddr, sport);
+ reuse_sk = reuseport_select_sock(sk, phash, skb, doff);
+ }
+ return reuse_sk;
+}
+
/*
* Here are some nice properties to exploit here. The BSD API
* does not allow a listening sock to specify the remote port nor the
@@ -265,21 +280,17 @@ static struct sock *inet_lhash2_lookup(struct net *net,
struct inet_connection_sock *icsk;
struct sock *sk, *result = NULL;
int score, hiscore = 0;
- u32 phash = 0;
inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) {
sk = (struct sock *)icsk;
score = compute_score(sk, net, hnum, daddr,
dif, sdif, exact_dif);
if (score > hiscore) {
- if (sk->sk_reuseport) {
- phash = inet_ehashfn(net, daddr, hnum,
- saddr, sport);
- result = reuseport_select_sock(sk, phash,
- skb, doff);
- if (result)
- return result;
- }
+ result = lookup_reuseport(net, sk, skb, doff,
+ saddr, sport, daddr, hnum);
+ if (result)
+ return result;
+
result = sk;
hiscore = score;
}
@@ -288,6 +299,29 @@ static struct sock *inet_lhash2_lookup(struct net *net,
return result;
}
+static inline struct sock *inet_lookup_run_bpf(struct net *net,
+ struct inet_hashinfo *hashinfo,
+ struct sk_buff *skb, int doff,
+ __be32 saddr, __be16 sport,
+ __be32 daddr, u16 hnum)
+{
+ struct sock *sk, *reuse_sk;
+ bool no_reuseport;
+
+ if (hashinfo != &tcp_hashinfo)
+ return NULL; /* only TCP is supported */
+
+ no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_TCP,
+ saddr, sport, daddr, hnum, &sk);
+ if (no_reuseport || IS_ERR_OR_NULL(sk))
+ return sk;
+
+ reuse_sk = lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum);
+ if (reuse_sk)
+ sk = reuse_sk;
+ return sk;
+}
+
struct sock *__inet_lookup_listener(struct net *net,
struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
@@ -299,6 +333,14 @@ struct sock *__inet_lookup_listener(struct net *net,
struct sock *result = NULL;
unsigned int hash2;
+ /* Lookup redirect from BPF */
+ if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
+ result = inet_lookup_run_bpf(net, hashinfo, skb, doff,
+ saddr, sport, daddr, hnum);
+ if (result)
+ goto done;
+ }
+
hash2 = ipv4_portaddr_hash(net, daddr, hnum);
ilb2 = inet_lhash2_bucket(hashinfo, hash2);
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index ddaa01ec2bce..948747aac4e2 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -519,15 +519,20 @@ void ip_options_undo(struct ip_options *opt)
}
}
-static struct ip_options_rcu *ip_options_get_alloc(const int optlen)
+int ip_options_get(struct net *net, struct ip_options_rcu **optp,
+ sockptr_t data, int optlen)
{
- return kzalloc(sizeof(struct ip_options_rcu) + ((optlen + 3) & ~3),
+ struct ip_options_rcu *opt;
+
+ opt = kzalloc(sizeof(struct ip_options_rcu) + ((optlen + 3) & ~3),
GFP_KERNEL);
-}
+ if (!opt)
+ return -ENOMEM;
+ if (optlen && copy_from_sockptr(opt->opt.__data, data, optlen)) {
+ kfree(opt);
+ return -EFAULT;
+ }
-static int ip_options_get_finish(struct net *net, struct ip_options_rcu **optp,
- struct ip_options_rcu *opt, int optlen)
-{
while (optlen & 3)
opt->opt.__data[optlen++] = IPOPT_END;
opt->opt.optlen = optlen;
@@ -540,32 +545,6 @@ static int ip_options_get_finish(struct net *net, struct ip_options_rcu **optp,
return 0;
}
-int ip_options_get_from_user(struct net *net, struct ip_options_rcu **optp,
- unsigned char __user *data, int optlen)
-{
- struct ip_options_rcu *opt = ip_options_get_alloc(optlen);
-
- if (!opt)
- return -ENOMEM;
- if (optlen && copy_from_user(opt->opt.__data, data, optlen)) {
- kfree(opt);
- return -EFAULT;
- }
- return ip_options_get_finish(net, optp, opt, optlen);
-}
-
-int ip_options_get(struct net *net, struct ip_options_rcu **optp,
- unsigned char *data, int optlen)
-{
- struct ip_options_rcu *opt = ip_options_get_alloc(optlen);
-
- if (!opt)
- return -ENOMEM;
- if (optlen)
- memcpy(opt->opt.__data, data, optlen);
- return ip_options_get_finish(net, optp, opt, optlen);
-}
-
void ip_forward_options(struct sk_buff *skb)
{
struct ip_options *opt = &(IPCB(skb)->opt);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 17206677d503..61f802d5350c 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -539,6 +539,12 @@ no_route:
}
EXPORT_SYMBOL(__ip_queue_xmit);
+int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)
+{
+ return __ip_queue_xmit(sk, skb, fl, inet_sk(sk)->tos);
+}
+EXPORT_SYMBOL(ip_queue_xmit);
+
static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
{
to->pkt_type = from->pkt_type;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 84ec3703c909..d2c223554ff7 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -280,7 +280,8 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
err = cmsg->cmsg_len - sizeof(struct cmsghdr);
/* Our caller is responsible for freeing ipc->opt */
- err = ip_options_get(net, &ipc->opt, CMSG_DATA(cmsg),
+ err = ip_options_get(net, &ipc->opt,
<