summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-03-31 17:29:33 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-03-31 17:29:33 -0700
commit29d9f30d4ce6c7a38745a54a8cddface10013490 (patch)
tree85649ba6a7b39203584d8db9365e03f64e62c136 /net/ipv4
parent56a451b780676bc1cdac011735fe2869fa2e9abf (diff)
parent7f80ccfe996871ca69648efee74a60ae7ad0dcd9 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from David Miller: "Highlights: 1) Fix the iwlwifi regression, from Johannes Berg. 2) Support BSS coloring and 802.11 encapsulation offloading in hardware, from John Crispin. 3) Fix some potential Spectre issues in qtnfmac, from Sergey Matyukevich. 4) Add TTL decrement action to openvswitch, from Matteo Croce. 5) Allow paralleization through flow_action setup by not taking the RTNL mutex, from Vlad Buslov. 6) A lot of zero-length array to flexible-array conversions, from Gustavo A. R. Silva. 7) Align XDP statistics names across several drivers for consistency, from Lorenzo Bianconi. 8) Add various pieces of infrastructure for offloading conntrack, and make use of it in mlx5 driver, from Paul Blakey. 9) Allow using listening sockets in BPF sockmap, from Jakub Sitnicki. 10) Lots of parallelization improvements during configuration changes in mlxsw driver, from Ido Schimmel. 11) Add support to devlink for generic packet traps, which report packets dropped during ACL processing. And use them in mlxsw driver. From Jiri Pirko. 12) Support bcmgenet on ACPI, from Jeremy Linton. 13) Make BPF compatible with RT, from Thomas Gleixnet, Alexei Starovoitov, and your's truly. 14) Support XDP meta-data in virtio_net, from Yuya Kusakabe. 15) Fix sysfs permissions when network devices change namespaces, from Christian Brauner. 16) Add a flags element to ethtool_ops so that drivers can more simply indicate which coalescing parameters they actually support, and therefore the generic layer can validate the user's ethtool request. Use this in all drivers, from Jakub Kicinski. 17) Offload FIFO qdisc in mlxsw, from Petr Machata. 18) Support UDP sockets in sockmap, from Lorenz Bauer. 19) Fix stretch ACK bugs in several TCP congestion control modules, from Pengcheng Yang. 20) Support virtual functiosn in octeontx2 driver, from Tomasz Duszynski. 21) Add region operations for devlink and use it in ice driver to dump NVM contents, from Jacob Keller. 22) Add support for hw offload of MACSEC, from Antoine Tenart. 23) Add support for BPF programs that can be attached to LSM hooks, from KP Singh. 24) Support for multiple paths, path managers, and counters in MPTCP. From Peter Krystad, Paolo Abeni, Florian Westphal, Davide Caratti, and others. 25) More progress on adding the netlink interface to ethtool, from Michal Kubecek" * git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (2121 commits) net: ipv6: rpl_iptunnel: Fix potential memory leak in rpl_do_srh_inline cxgb4/chcr: nic-tls stats in ethtool net: dsa: fix oops while probing Marvell DSA switches net/bpfilter: remove superfluous testing message net: macb: Fix handling of fixed-link node net: dsa: ksz: Select KSZ protocol tag netdevsim: dev: Fix memory leak in nsim_dev_take_snapshot_write net: stmmac: add EHL 2.5Gbps PCI info and PCI ID net: stmmac: add EHL PSE0 & PSE1 1Gbps PCI info and PCI ID net: stmmac: create dwmac-intel.c to contain all Intel platform net: dsa: bcm_sf2: Support specifying VLAN tag egress rule net: dsa: bcm_sf2: Add support for matching VLAN TCI net: dsa: bcm_sf2: Move writing of CFP_DATA(5) into slicing functions net: dsa: bcm_sf2: Check earlier for FLOW_EXT and FLOW_MAC_EXT net: dsa: bcm_sf2: Disable learning for ASP port net: dsa: b53: Deny enslaving port 7 for 7278 into a bridge net: dsa: b53: Prevent tagged VLAN on port 7 for 7278 net: dsa: b53: Restore VLAN entries upon (re)configuration net: dsa: bcm_sf2: Fix overflow checks hv_netvsc: Remove unnecessary round_up for recv_completion_cnt ...
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/af_inet.c8
-rw-r--r--net/ipv4/ah4.c2
-rw-r--r--net/ipv4/arp.c2
-rw-r--r--net/ipv4/bpf_tcp_ca.c33
-rw-r--r--net/ipv4/devinet.c6
-rw-r--r--net/ipv4/esp4.c16
-rw-r--r--net/ipv4/esp4_offload.c32
-rw-r--r--net/ipv4/fib_lookup.h2
-rw-r--r--net/ipv4/fib_semantics.c26
-rw-r--r--net/ipv4/fib_trie.c10
-rw-r--r--net/ipv4/icmp.c2
-rw-r--r--net/ipv4/igmp.c2
-rw-r--r--net/ipv4/inet_connection_sock.c36
-rw-r--r--net/ipv4/inet_diag.c307
-rw-r--r--net/ipv4/ip_input.c3
-rw-r--r--net/ipv4/ip_output.c4
-rw-r--r--net/ipv4/ip_tunnel.c6
-rw-r--r--net/ipv4/ip_tunnel_core.c4
-rw-r--r--net/ipv4/ipmr.c2
-rw-r--r--net/ipv4/netfilter/arp_tables.c4
-rw-r--r--net/ipv4/netfilter/ip_tables.c4
-rw-r--r--net/ipv4/netfilter/nf_log_ipv4.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_pptp.c4
-rw-r--r--net/ipv4/nexthop.c2
-rw-r--r--net/ipv4/proc.c2
-rw-r--r--net/ipv4/raw.c2
-rw-r--r--net/ipv4/raw_diag.c24
-rw-r--r--net/ipv4/route.c61
-rw-r--r--net/ipv4/sysctl_net_ipv4.c33
-rw-r--r--net/ipv4/tcp.c29
-rw-r--r--net/ipv4/tcp_bic.c11
-rw-r--r--net/ipv4/tcp_bpf.c272
-rw-r--r--net/ipv4/tcp_diag.c8
-rw-r--r--net/ipv4/tcp_input.c6
-rw-r--r--net/ipv4/tcp_ipv4.c10
-rw-r--r--net/ipv4/tcp_minisocks.c9
-rw-r--r--net/ipv4/tcp_scalable.c17
-rw-r--r--net/ipv4/tcp_ulp.c9
-rw-r--r--net/ipv4/tcp_veno.c47
-rw-r--r--net/ipv4/tcp_yeah.c41
-rw-r--r--net/ipv4/udp.c24
-rw-r--r--net/ipv4/udp_bpf.c53
-rw-r--r--net/ipv4/udp_diag.c41
-rw-r--r--net/ipv4/udp_offload.c1
45 files changed, 705 insertions, 515 deletions
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 9d97bace13c8..9e1a186a3671 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -61,6 +61,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_NET_SOCK_MSG) += tcp_bpf.o
+obj-$(CONFIG_BPF_STREAM_PARSER) += udp_bpf.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 2fe295432c24..cf58e29cf746 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -872,7 +872,7 @@ int inet_shutdown(struct socket *sock, int how)
err = -ENOTCONN;
/* Hack to wake up other listeners, who can poll for
EPOLLHUP, even on eg. unconnected UDP sockets -- RR */
- /* fall through */
+ fallthrough;
default:
sk->sk_shutdown |= how;
if (sk->sk_prot->shutdown)
@@ -886,7 +886,7 @@ int inet_shutdown(struct socket *sock, int how)
case TCP_LISTEN:
if (!(how & RCV_SHUTDOWN))
break;
- /* fall through */
+ fallthrough;
case TCP_SYN_SENT:
err = sk->sk_prot->disconnect(sk, O_NONBLOCK);
sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
@@ -1793,6 +1793,10 @@ static __net_exit void ipv4_mib_exit_net(struct net *net)
free_percpu(net->mib.net_statistics);
free_percpu(net->mib.ip_statistics);
free_percpu(net->mib.tcp_statistics);
+#ifdef CONFIG_MPTCP
+ /* allocated on demand, see mptcp_init_sock() */
+ free_percpu(net->mib.mptcp_statistics);
+#endif
}
static __net_initdata struct pernet_operations ipv4_mib_ops = {
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 974179b3b314..d99e1be94019 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -107,7 +107,7 @@ static int ip_clear_mutable_options(const struct iphdr *iph, __be32 *daddr)
if (optlen < 6)
return -EINVAL;
memcpy(daddr, optptr+optlen-4, 4);
- /* Fall through */
+ fallthrough;
default:
memset(optptr, 0, optlen);
}
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 05eb42f347e8..687971d83b4e 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1181,7 +1181,7 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
case SIOCSARP:
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
- /* fall through */
+ fallthrough;
case SIOCGARP:
err = copy_from_user(&r, arg, sizeof(struct arpreq));
if (err)
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index 2bf3abeb1456..e3939f76b024 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -7,6 +7,7 @@
#include <linux/btf.h>
#include <linux/filter.h>
#include <net/tcp.h>
+#include <net/bpf_sk_storage.h>
static u32 optional_ops[] = {
offsetof(struct tcp_congestion_ops, init),
@@ -27,6 +28,27 @@ static u32 unsupported_ops[] = {
static const struct btf_type *tcp_sock_type;
static u32 tcp_sock_id, sock_id;
+static int btf_sk_storage_get_ids[5];
+static struct bpf_func_proto btf_sk_storage_get_proto __read_mostly;
+
+static int btf_sk_storage_delete_ids[5];
+static struct bpf_func_proto btf_sk_storage_delete_proto __read_mostly;
+
+static void convert_sk_func_proto(struct bpf_func_proto *to, int *to_btf_ids,
+ const struct bpf_func_proto *from)
+{
+ int i;
+
+ *to = *from;
+ to->btf_id = to_btf_ids;
+ for (i = 0; i < ARRAY_SIZE(to->arg_type); i++) {
+ if (to->arg_type[i] == ARG_PTR_TO_SOCKET) {
+ to->arg_type[i] = ARG_PTR_TO_BTF_ID;
+ to->btf_id[i] = tcp_sock_id;
+ }
+ }
+}
+
static int bpf_tcp_ca_init(struct btf *btf)
{
s32 type_id;
@@ -42,6 +64,13 @@ static int bpf_tcp_ca_init(struct btf *btf)
tcp_sock_id = type_id;
tcp_sock_type = btf_type_by_id(btf, tcp_sock_id);
+ convert_sk_func_proto(&btf_sk_storage_get_proto,
+ btf_sk_storage_get_ids,
+ &bpf_sk_storage_get_proto);
+ convert_sk_func_proto(&btf_sk_storage_delete_proto,
+ btf_sk_storage_delete_ids,
+ &bpf_sk_storage_delete_proto);
+
return 0;
}
@@ -167,6 +196,10 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id,
switch (func_id) {
case BPF_FUNC_tcp_send_ack:
return &bpf_tcp_send_ack_proto;
+ case BPF_FUNC_sk_storage_get:
+ return &btf_sk_storage_get_proto;
+ case BPF_FUNC_sk_storage_delete:
+ return &btf_sk_storage_delete_proto;
default:
return bpf_base_func_proto(func_id);
}
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index e4632bd2026d..30fa42f5997d 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1566,11 +1566,11 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
}
}
ip_mc_up(in_dev);
- /* fall through */
+ fallthrough;
case NETDEV_CHANGEADDR:
if (!IN_DEV_ARP_NOTIFY(in_dev))
break;
- /* fall through */
+ fallthrough;
case NETDEV_NOTIFY_PEERS:
/* Send gratuitous ARP to notify of link change */
inetdev_send_gratuitous_arp(dev, in_dev);
@@ -1588,7 +1588,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
if (inetdev_valid_mtu(dev->mtu))
break;
/* disable IP when MTU is not enough */
- /* fall through */
+ fallthrough;
case NETDEV_UNREGISTER:
inetdev_destroy(in_dev);
break;
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 103c7d599a3c..8b07f3a4f2db 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -341,22 +341,6 @@ static void esp_output_done_esn(struct crypto_async_request *base, int err)
esp_output_done(base, err);
}
-static void esp_output_fill_trailer(u8 *tail, int tfclen, int plen, __u8 proto)
-{
- /* Fill padding... */
- if (tfclen) {
- memset(tail, 0, tfclen);
- tail += tfclen;
- }
- do {
- int i;
- for (i = 0; i < plen - 2; i++)
- tail[i] = i + 1;
- } while (0);
- tail[plen - 2] = plen - 2;
- tail[plen - 1] = proto;
-}
-
static struct ip_esp_hdr *esp_output_udp_encap(struct sk_buff *skb,
int encap_type,
struct esp_info *esp,
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index e2e219c7854a..731022cff600 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -132,6 +132,36 @@ static struct sk_buff *xfrm4_transport_gso_segment(struct xfrm_state *x,
return segs;
}
+static struct sk_buff *xfrm4_beet_gso_segment(struct xfrm_state *x,
+ struct sk_buff *skb,
+ netdev_features_t features)
+{
+ struct xfrm_offload *xo = xfrm_offload(skb);
+ struct sk_buff *segs = ERR_PTR(-EINVAL);
+ const struct net_offload *ops;
+ int proto = xo->proto;
+
+ skb->transport_header += x->props.header_len;
+
+ if (proto == IPPROTO_BEETPH) {
+ struct ip_beet_phdr *ph = (struct ip_beet_phdr *)skb->data;
+
+ skb->transport_header += ph->hdrlen * 8;
+ proto = ph->nexthdr;
+ } else if (x->sel.family != AF_INET6) {
+ skb->transport_header -= IPV4_BEET_PHMAXLEN;
+ } else if (proto == IPPROTO_TCP) {
+ skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4;
+ }
+
+ __skb_pull(skb, skb_transport_offset(skb));
+ ops = rcu_dereference(inet_offloads[proto]);
+ if (likely(ops && ops->callbacks.gso_segment))
+ segs = ops->callbacks.gso_segment(skb, features);
+
+ return segs;
+}
+
static struct sk_buff *xfrm4_outer_mode_gso_segment(struct xfrm_state *x,
struct sk_buff *skb,
netdev_features_t features)
@@ -141,6 +171,8 @@ static struct sk_buff *xfrm4_outer_mode_gso_segment(struct xfrm_state *x,
return xfrm4_tunnel_gso_segment(x, skb, features);
case XFRM_MODE_TRANSPORT:
return xfrm4_transport_gso_segment(x, skb, features);
+ case XFRM_MODE_BEET:
+ return xfrm4_beet_gso_segment(x, skb, features);
}
return ERR_PTR(-EOPNOTSUPP);
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index c092e9a55790..818916b2a04d 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -35,7 +35,7 @@ static inline void fib_alias_accessed(struct fib_alias *fa)
void fib_release_info(struct fib_info *);
struct fib_info *fib_create_info(struct fib_config *cfg,
struct netlink_ext_ack *extack);
-int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
+int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi,
struct netlink_ext_ack *extack);
bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi);
int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index a803cdd9400a..6ed8c9317179 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -570,8 +570,9 @@ static int fib_detect_death(struct fib_info *fi, int order,
return 1;
}
-int fib_nh_common_init(struct fib_nh_common *nhc, struct nlattr *encap,
- u16 encap_type, void *cfg, gfp_t gfp_flags,
+int fib_nh_common_init(struct net *net, struct fib_nh_common *nhc,
+ struct nlattr *encap, u16 encap_type,
+ void *cfg, gfp_t gfp_flags,
struct netlink_ext_ack *extack)
{
int err;
@@ -589,8 +590,9 @@ int fib_nh_common_init(struct fib_nh_common *nhc, struct nlattr *encap,
err = -EINVAL;
goto lwt_failure;
}
- err = lwtunnel_build_state(encap_type, encap, nhc->nhc_family,
- cfg, &lwtstate, extack);
+ err = lwtunnel_build_state(net, encap_type, encap,
+ nhc->nhc_family, cfg, &lwtstate,
+ extack);
if (err)
goto lwt_failure;
@@ -614,7 +616,7 @@ int fib_nh_init(struct net *net, struct fib_nh *nh,
nh->fib_nh_family = AF_INET;
- err = fib_nh_common_init(&nh->nh_common, cfg->fc_encap,
+ err = fib_nh_common_init(net, &nh->nh_common, cfg->fc_encap,
cfg->fc_encap_type, cfg, GFP_KERNEL, extack);
if (err)
return err;
@@ -814,7 +816,7 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
#endif /* CONFIG_IP_ROUTE_MULTIPATH */
-static int fib_encap_match(u16 encap_type,
+static int fib_encap_match(struct net *net, u16 encap_type,
struct nlattr *encap,
const struct fib_nh *nh,
const struct fib_config *cfg,
@@ -826,7 +828,7 @@ static int fib_encap_match(u16 encap_type,
if (encap_type == LWTUNNEL_ENCAP_NONE)
return 0;
- ret = lwtunnel_build_state(encap_type, encap, AF_INET,
+ ret = lwtunnel_build_state(net, encap_type, encap, AF_INET,
cfg, &lwtstate, extack);
if (!ret) {
result = lwtunnel_cmp_encap(lwtstate, nh->fib_nh_lws);
@@ -836,7 +838,7 @@ static int fib_encap_match(u16 encap_type,
return result;
}
-int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
+int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi,
struct netlink_ext_ack *extack)
{
#ifdef CONFIG_IP_ROUTE_MULTIPATH
@@ -857,8 +859,8 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
struct fib_nh *nh = fib_info_nh(fi, 0);
if (cfg->fc_encap) {
- if (fib_encap_match(cfg->fc_encap_type, cfg->fc_encap,
- nh, cfg, extack))
+ if (fib_encap_match(net, cfg->fc_encap_type,
+ cfg->fc_encap, nh, cfg, extack))
return 1;
}
#ifdef CONFIG_IP_ROUTE_CLASSID
@@ -1962,7 +1964,7 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
case NETDEV_DOWN:
case NETDEV_UNREGISTER:
nexthop_nh->fib_nh_flags |= RTNH_F_DEAD;
- /* fall through */
+ fallthrough;
case NETDEV_CHANGE:
nexthop_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
break;
@@ -1984,7 +1986,7 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
case NETDEV_DOWN:
case NETDEV_UNREGISTER:
fi->fib_flags |= RTNH_F_DEAD;
- /* fall through */
+ fallthrough;
case NETDEV_CHANGE:
fi->fib_flags |= RTNH_F_LINKDOWN;
break;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index ff0c24371e33..4f334b425538 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -35,9 +35,6 @@
* Paul E. McKenney <paulmck@us.ibm.com>
* Patrick McHardy <kaber@trash.net>
*/
-
-#define VERSION "0.409"
-
#include <linux/cache.h>
#include <linux/uaccess.h>
#include <linux/bitops.h>
@@ -304,8 +301,6 @@ static inline void alias_free_mem_rcu(struct fib_alias *fa)
call_rcu(&fa->rcu, __alias_free_mem);
}
-#define TNODE_KMALLOC_MAX \
- ilog2((PAGE_SIZE - TNODE_SIZE(0)) / sizeof(struct key_vector *))
#define TNODE_VMALLOC_MAX \
ilog2((SIZE_MAX - TNODE_SIZE(0)) / sizeof(struct key_vector *))
@@ -1684,7 +1679,7 @@ int fib_table_delete(struct net *net, struct fib_table *tb,
fi->fib_prefsrc == cfg->fc_prefsrc) &&
(!cfg->fc_protocol ||
fi->fib_protocol == cfg->fc_protocol) &&
- fib_nh_match(cfg, fi, extack) == 0 &&
+ fib_nh_match(net, cfg, fi, extack) == 0 &&
fib_metrics_match(cfg, fi)) {
fa_to_delete = fa;
break;
@@ -2577,6 +2572,7 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v)
" %zd bytes, size of tnode: %zd bytes.\n",
LEAF_SIZE, TNODE_SIZE(0));
+ rcu_read_lock();
for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
struct hlist_head *head = &net->ipv4.fib_table_hash[h];
struct fib_table *tb;
@@ -2596,7 +2592,9 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v)
trie_show_usage(seq, t->stats);
#endif
}
+ cond_resched_rcu();
}
+ rcu_read_unlock();
return 0;
}
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index f369e7ce685b..fc61f51d87a3 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -865,7 +865,7 @@ static bool icmp_unreach(struct sk_buff *skb)
case 3:
if (!icmp_tag_validation(iph->protocol))
goto out;
- /* fall through */
+ fallthrough;
case 0:
info = ntohs(icmph->un.frag.mtu);
}
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 3b9c7a2725a9..47f0502b2101 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -107,8 +107,6 @@
#ifdef CONFIG_IP_MULTICAST
/* Parameter names and values are taken from igmp-v2-06 draft */
-#define IGMP_V2_UNSOLICITED_REPORT_INTERVAL (10*HZ)
-#define IGMP_V3_UNSOLICITED_REPORT_INTERVAL (1*HZ)
#define IGMP_QUERY_INTERVAL (125*HZ)
#define IGMP_QUERY_RESPONSE_INTERVAL (10*HZ)
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index d545fb99a8a1..5f34eb951627 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -131,7 +131,7 @@ static int inet_csk_bind_conflict(const struct sock *sk,
{
struct sock *sk2;
bool reuse = sk->sk_reuse;
- bool reuseport = !!sk->sk_reuseport && reuseport_ok;
+ bool reuseport = !!sk->sk_reuseport;
kuid_t uid = sock_i_uid((struct sock *)sk);
/*
@@ -146,17 +146,21 @@ static int inet_csk_bind_conflict(const struct sock *sk,
(!sk->sk_bound_dev_if ||
!sk2->sk_bound_dev_if ||
sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
- if ((!reuse || !sk2->sk_reuse ||
- sk2->sk_state == TCP_LISTEN) &&
- (!reuseport || !sk2->sk_reuseport ||
- rcu_access_pointer(sk->sk_reuseport_cb) ||
- (sk2->sk_state != TCP_TIME_WAIT &&
- !uid_eq(uid, sock_i_uid(sk2))))) {
- if (inet_rcv_saddr_equal(sk, sk2, true))
- break;
- }
- if (!relax && reuse && sk2->sk_reuse &&
+ if (reuse && sk2->sk_reuse &&
sk2->sk_state != TCP_LISTEN) {
+ if ((!relax ||
+ (!reuseport_ok &&
+ reuseport && sk2->sk_reuseport &&
+ !rcu_access_pointer(sk->sk_reuseport_cb) &&
+ (sk2->sk_state == TCP_TIME_WAIT ||
+ uid_eq(uid, sock_i_uid(sk2))))) &&
+ inet_rcv_saddr_equal(sk, sk2, true))
+ break;
+ } else if (!reuseport_ok ||
+ !reuseport || !sk2->sk_reuseport ||
+ rcu_access_pointer(sk->sk_reuseport_cb) ||
+ (sk2->sk_state != TCP_TIME_WAIT &&
+ !uid_eq(uid, sock_i_uid(sk2)))) {
if (inet_rcv_saddr_equal(sk, sk2, true))
break;
}
@@ -176,12 +180,14 @@ inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret, int *
int port = 0;
struct inet_bind_hashbucket *head;
struct net *net = sock_net(sk);
+ bool relax = false;
int i, low, high, attempt_half;
struct inet_bind_bucket *tb;
u32 remaining, offset;
int l3mdev;
l3mdev = inet_sk_bound_l3mdev(sk);
+ports_exhausted:
attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0;
other_half_scan:
inet_get_local_port_range(net, &low, &high);
@@ -219,7 +225,7 @@ other_parity_scan:
inet_bind_bucket_for_each(tb, &head->chain)
if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
tb->port == port) {
- if (!inet_csk_bind_conflict(sk, tb, false, false))
+ if (!inet_csk_bind_conflict(sk, tb, relax, false))
goto success;
goto next_port;
}
@@ -239,6 +245,12 @@ next_port:
attempt_half = 2;
goto other_half_scan;
}
+
+ if (net->ipv4.sysctl_ip_autobind_reuse && !relax) {
+ /* We still have a chance to connect to different destinations */
+ relax = true;
+ goto ports_exhausted;
+ }
return NULL;
success:
*port_ret = port;
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 8c8377568a78..5d50aad3cdbf 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -23,6 +23,7 @@
#include <net/inet_hashtables.h>
#include <net/inet_timewait_sock.h>
#include <net/inet6_hashtables.h>
+#include <net/bpf_sk_storage.h>
#include <net/netlink.h>
#include <linux/inet.h>
@@ -170,26 +171,28 @@ errout:
}
EXPORT_SYMBOL_GPL(inet_diag_msg_attrs_fill);
+#define MAX_DUMP_ALLOC_SIZE (KMALLOC_MAX_SIZE - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
+
int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
- struct sk_buff *skb, const struct inet_diag_req_v2 *req,
- struct user_namespace *user_ns,
- u32 portid, u32 seq, u16 nlmsg_flags,
- const struct nlmsghdr *unlh,
- bool net_admin)
+ struct sk_buff *skb, struct netlink_callback *cb,
+ const struct inet_diag_req_v2 *req,
+ u16 nlmsg_flags, bool net_admin)
{
const struct tcp_congestion_ops *ca_ops;
const struct inet_diag_handler *handler;
+ struct inet_diag_dump_data *cb_data;
int ext = req->idiag_ext;
struct inet_diag_msg *r;
struct nlmsghdr *nlh;
struct nlattr *attr;
void *info = NULL;
+ cb_data = cb->data;
handler = inet_diag_table[req->sdiag_protocol];
BUG_ON(!handler);
- nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r),
- nlmsg_flags);
+ nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ cb->nlh->nlmsg_type, sizeof(*r), nlmsg_flags);
if (!nlh)
return -EMSGSIZE;
@@ -201,7 +204,9 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
r->idiag_timer = 0;
r->idiag_retrans = 0;
- if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns, net_admin))
+ if (inet_diag_msg_attrs_fill(sk, skb, r, ext,
+ sk_user_ns(NETLINK_CB(cb->skb).sk),
+ net_admin))
goto errout;
if (ext & (1 << (INET_DIAG_MEMINFO - 1))) {
@@ -298,6 +303,48 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
goto errout;
}
+ /* Keep it at the end for potential retry with a larger skb,
+ * or else do best-effort fitting, which is only done for the
+ * first_nlmsg.
+ */
+ if (cb_data->bpf_stg_diag) {
+ bool first_nlmsg = ((unsigned char *)nlh == skb->data);
+ unsigned int prev_min_dump_alloc;
+ unsigned int total_nla_size = 0;
+ unsigned int msg_len;
+ int err;
+
+ msg_len = skb_tail_pointer(skb) - (unsigned char *)nlh;
+ err = bpf_sk_storage_diag_put(cb_data->bpf_stg_diag, sk, skb,
+ INET_DIAG_SK_BPF_STORAGES,
+ &total_nla_size);
+
+ if (!err)
+ goto out;
+
+ total_nla_size += msg_len;
+ prev_min_dump_alloc = cb->min_dump_alloc;
+ if (total_nla_size > prev_min_dump_alloc)
+ cb->min_dump_alloc = min_t(u32, total_nla_size,
+ MAX_DUMP_ALLOC_SIZE);
+
+ if (!first_nlmsg)
+ goto errout;
+
+ if (cb->min_dump_alloc > prev_min_dump_alloc)
+ /* Retry with pskb_expand_head() with
+ * __GFP_DIRECT_RECLAIM
+ */
+ goto errout;
+
+ WARN_ON_ONCE(total_nla_size <= prev_min_dump_alloc);
+
+ /* Send what we have for this sk
+ * and move on to the next sk in the following
+ * dump()
+ */
+ }
+
out:
nlmsg_end(skb, nlh);
return 0;
@@ -308,30 +355,19 @@ errout:
}
EXPORT_SYMBOL_GPL(inet_sk_diag_fill);
-static int inet_csk_diag_fill(struct sock *sk,
- struct sk_buff *skb,
- const struct inet_diag_req_v2 *req,
- struct user_namespace *user_ns,
- u32 portid, u32 seq, u16 nlmsg_flags,
- const struct nlmsghdr *unlh,
- bool net_admin)
-{
- return inet_sk_diag_fill(sk, inet_csk(sk), skb, req, user_ns,
- portid, seq, nlmsg_flags, unlh, net_admin);
-}
-
static int inet_twsk_diag_fill(struct sock *sk,
struct sk_buff *skb,
- u32 portid, u32 seq, u16 nlmsg_flags,
- const struct nlmsghdr *unlh)
+ struct netlink_callback *cb,
+ u16 nlmsg_flags)
{
struct inet_timewait_sock *tw = inet_twsk(sk);
struct inet_diag_msg *r;
struct nlmsghdr *nlh;
long tmo;
- nlh = nlmsg_put(skb, portid, seq, unl