summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-06-06 18:39:49 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2018-06-06 18:39:49 -0700
commit1c8c5a9d38f607c0b6fd12c91cbe1a4418762a21 (patch)
treedcc97181d4d187252e0cc8fdf29d9b365fa3ffd0 /net/ipv4
parent285767604576148fc1be7fcd112e4a90eb0d6ad2 (diff)
parent7170e6045a6a8b33f4fa5753589dc77b16198e2d (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) Add Maglev hashing scheduler to IPVS, from Inju Song. 2) Lots of new TC subsystem tests from Roman Mashak. 3) Add TCP zero copy receive and fix delayed acks and autotuning with SO_RCVLOWAT, from Eric Dumazet. 4) Add XDP_REDIRECT support to mlx5 driver, from Jesper Dangaard Brouer. 5) Add ttl inherit support to vxlan, from Hangbin Liu. 6) Properly separate ipv6 routes into their logically independant components. fib6_info for the routing table, and fib6_nh for sets of nexthops, which thus can be shared. From David Ahern. 7) Add bpf_xdp_adjust_tail helper, which can be used to generate ICMP messages from XDP programs. From Nikita V. Shirokov. 8) Lots of long overdue cleanups to the r8169 driver, from Heiner Kallweit. 9) Add BTF ("BPF Type Format"), from Martin KaFai Lau. 10) Add traffic condition monitoring to iwlwifi, from Luca Coelho. 11) Plumb extack down into fib_rules, from Roopa Prabhu. 12) Add Flower classifier offload support to igb, from Vinicius Costa Gomes. 13) Add UDP GSO support, from Willem de Bruijn. 14) Add documentation for eBPF helpers, from Quentin Monnet. 15) Add TLS tx offload to mlx5, from Ilya Lesokhin. 16) Allow applications to be given the number of bytes available to read on a socket via a control message returned from recvmsg(), from Soheil Hassas Yeganeh. 17) Add x86_32 eBPF JIT compiler, from Wang YanQing. 18) Add AF_XDP sockets, with zerocopy support infrastructure as well. From Björn Töpel. 19) Remove indirect load support from all of the BPF JITs and handle these operations in the verifier by translating them into native BPF instead. From Daniel Borkmann. 20) Add GRO support to ipv6 gre tunnels, from Eran Ben Elisha. 21) Allow XDP programs to do lookups in the main kernel routing tables for forwarding. From David Ahern. 22) Allow drivers to store hardware state into an ELF section of kernel dump vmcore files, and use it in cxgb4. From Rahul Lakkireddy. 23) Various RACK and loss detection improvements in TCP, from Yuchung Cheng. 24) Add TCP SACK compression, from Eric Dumazet. 25) Add User Mode Helper support and basic bpfilter infrastructure, from Alexei Starovoitov. 26) Support ports and protocol values in RTM_GETROUTE, from Roopa Prabhu. 27) Support bulking in ->ndo_xdp_xmit() API, from Jesper Dangaard Brouer. 28) Add lots of forwarding selftests, from Petr Machata. 29) Add generic network device failover driver, from Sridhar Samudrala. * ra.kernel.org:/pub/scm/linux/kernel/git/davem/net-next: (1959 commits) strparser: Add __strp_unpause and use it in ktls. rxrpc: Fix terminal retransmission connection ID to include the channel net: hns3: Optimize PF CMDQ interrupt switching process net: hns3: Fix for VF mailbox receiving unknown message net: hns3: Fix for VF mailbox cannot receiving PF response bnx2x: use the right constant Revert "net: sched: cls: Fix offloading when ingress dev is vxlan" net: dsa: b53: Fix for brcm tag issue in Cygnus SoC enic: fix UDP rss bits netdev-FAQ: clarify DaveM's position for stable backports rtnetlink: validate attributes in do_setlink() mlxsw: Add extack messages for port_{un, }split failures netdevsim: Add extack error message for devlink reload devlink: Add extack to reload and port_{un, }split operations net: metrics: add proper netlink validation ipmr: fix error path when ipmr_new_table fails ip6mr: only set ip6mr_table from setsockopt when ip6mr_new_table succeeds net: hns3: remove unused hclgevf_cfg_func_mta_filter netfilter: provide udp*_lib_lookup for nf_tproxy qed*: Utilize FW 8.37.2.0 ...
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/Makefile5
-rw-r--r--net/ipv4/af_inet.c5
-rw-r--r--net/ipv4/bpfilter/Makefile2
-rw-r--r--net/ipv4/bpfilter/sockopt.c43
-rw-r--r--net/ipv4/devinet.c15
-rw-r--r--net/ipv4/fib_frontend.c58
-rw-r--r--net/ipv4/fib_rules.c7
-rw-r--r--net/ipv4/fib_semantics.c45
-rw-r--r--net/ipv4/fib_trie.c14
-rw-r--r--net/ipv4/inet_connection_sock.c5
-rw-r--r--net/ipv4/ip_gre.c12
-rw-r--r--net/ipv4/ip_output.c45
-rw-r--r--net/ipv4/ip_sockglue.c17
-rw-r--r--net/ipv4/ip_tunnel_core.c6
-rw-r--r--net/ipv4/ipconfig.c150
-rw-r--r--net/ipv4/ipmr.c3
-rw-r--r--net/ipv4/ipmr_base.c8
-rw-r--r--net/ipv4/metrics.c55
-rw-r--r--net/ipv4/netfilter/Kconfig10
-rw-r--r--net/ipv4/netfilter/Makefile5
-rw-r--r--net/ipv4/netfilter/ip_tables.c7
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c2
-rw-r--r--net/ipv4/netfilter/iptable_nat.c88
-rw-r--r--net/ipv4/netfilter/nf_flow_table_ipv4.c255
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c4
-rw-r--r--net/ipv4/netfilter/nf_nat_l3proto_ipv4.c143
-rw-r--r--net/ipv4/netfilter/nf_nat_masquerade_ipv4.c8
-rw-r--r--net/ipv4/netfilter/nf_nat_pptp.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_gre.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_icmp.c2
-rw-r--r--net/ipv4/netfilter/nf_tproxy_ipv4.c147
-rw-r--r--net/ipv4/netfilter/nft_chain_nat_ipv4.c53
-rw-r--r--net/ipv4/netfilter/nft_masq_ipv4.c2
-rw-r--r--net/ipv4/netlink.c23
-rw-r--r--net/ipv4/proc.c3
-rw-r--r--net/ipv4/route.c177
-rw-r--r--net/ipv4/sysctl_net_ipv4.c22
-rw-r--r--net/ipv4/tcp.c204
-rw-r--r--net/ipv4/tcp_input.c271
-rw-r--r--net/ipv4/tcp_ipv4.c55
-rw-r--r--net/ipv4/tcp_minisocks.c2
-rw-r--r--net/ipv4/tcp_output.c112
-rw-r--r--net/ipv4/tcp_recovery.c80
-rw-r--r--net/ipv4/tcp_timer.c27
-rw-r--r--net/ipv4/udp.c120
-rw-r--r--net/ipv4/udp_offload.c101
46 files changed, 1637 insertions, 785 deletions
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index a07b7dd06def..eec9569ffa5c 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -13,7 +13,10 @@ obj-y := route.o inetpeer.o protocol.o \
tcp_offload.o datagram.o raw.o udp.o udplite.o \
udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \
fib_frontend.o fib_semantics.o fib_trie.o fib_notifier.o \
- inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o
+ inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o \
+ metrics.o netlink.o
+
+obj-$(CONFIG_BPFILTER) += bpfilter/
obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o
obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 8a59428e63ab..15e125558c76 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -994,7 +994,9 @@ const struct proto_ops inet_stream_ops = {
.getsockopt = sock_common_getsockopt,
.sendmsg = inet_sendmsg,
.recvmsg = inet_recvmsg,
- .mmap = sock_no_mmap,
+#ifdef CONFIG_MMU
+ .mmap = tcp_mmap,
+#endif
.sendpage = inet_sendpage,
.splice_read = tcp_splice_read,
.read_sock = tcp_read_sock,
@@ -1006,6 +1008,7 @@ const struct proto_ops inet_stream_ops = {
.compat_getsockopt = compat_sock_common_getsockopt,
.compat_ioctl = inet_compat_ioctl,
#endif
+ .set_rcvlowat = tcp_set_rcvlowat,
};
EXPORT_SYMBOL(inet_stream_ops);
diff --git a/net/ipv4/bpfilter/Makefile b/net/ipv4/bpfilter/Makefile
new file mode 100644
index 000000000000..ce262d76cc48
--- /dev/null
+++ b/net/ipv4/bpfilter/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_BPFILTER) += sockopt.o
+
diff --git a/net/ipv4/bpfilter/sockopt.c b/net/ipv4/bpfilter/sockopt.c
new file mode 100644
index 000000000000..5e04ed25bc0e
--- /dev/null
+++ b/net/ipv4/bpfilter/sockopt.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/uaccess.h>
+#include <linux/bpfilter.h>
+#include <uapi/linux/bpf.h>
+#include <linux/wait.h>
+#include <linux/kmod.h>
+
+int (*bpfilter_process_sockopt)(struct sock *sk, int optname,
+ char __user *optval,
+ unsigned int optlen, bool is_set);
+EXPORT_SYMBOL_GPL(bpfilter_process_sockopt);
+
+static int bpfilter_mbox_request(struct sock *sk, int optname,
+ char __user *optval,
+ unsigned int optlen, bool is_set)
+{
+ if (!bpfilter_process_sockopt) {
+ int err = request_module("bpfilter");
+
+ if (err)
+ return err;
+ if (!bpfilter_process_sockopt)
+ return -ECHILD;
+ }
+ return bpfilter_process_sockopt(sk, optname, optval, optlen, is_set);
+}
+
+int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char __user *optval,
+ unsigned int optlen)
+{
+ return bpfilter_mbox_request(sk, optname, optval, optlen, true);
+}
+
+int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval,
+ int __user *optlen)
+{
+ int len;
+
+ if (get_user(len, optlen))
+ return -EFAULT;
+
+ return bpfilter_mbox_request(sk, optname, optval, len, false);
+}
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 40f001782c1b..d7585ab1a77a 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -99,6 +99,7 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
[IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
[IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) },
[IFA_FLAGS] = { .type = NLA_U32 },
+ [IFA_RT_PRIORITY] = { .type = NLA_U32 },
};
#define IN4_ADDR_HSIZE_SHIFT 8
@@ -835,6 +836,9 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
else
memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
+ if (tb[IFA_RT_PRIORITY])
+ ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
+
if (tb[IFA_CACHEINFO]) {
struct ifa_cacheinfo *ci;
@@ -906,12 +910,20 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
extack);
} else {
+ u32 new_metric = ifa->ifa_rt_priority;
+
inet_free_ifa(ifa);
if (nlh->nlmsg_flags & NLM_F_EXCL ||
!(nlh->nlmsg_flags & NLM_F_REPLACE))
return -EEXIST;
ifa = ifa_existing;
+
+ if (ifa->ifa_rt_priority != new_metric) {
+ fib_modify_prefix_metric(ifa, new_metric);
+ ifa->ifa_rt_priority = new_metric;
+ }
+
set_ifa_lifetime(ifa, valid_lft, prefered_lft);
cancel_delayed_work(&check_lifetime_work);
queue_delayed_work(system_power_efficient_wq,
@@ -1549,6 +1561,7 @@ static size_t inet_nlmsg_size(void)
+ nla_total_size(4) /* IFA_BROADCAST */
+ nla_total_size(IFNAMSIZ) /* IFA_LABEL */
+ nla_total_size(4) /* IFA_FLAGS */
+ + nla_total_size(4) /* IFA_RT_PRIORITY */
+ nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
}
@@ -1618,6 +1631,8 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
(ifa->ifa_label[0] &&
nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
+ (ifa->ifa_rt_priority &&
+ nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
preferred, valid))
goto nla_put_failure;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index e66172aaf241..63aa39b3af03 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -354,8 +354,6 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
fl4.fl4_dport = 0;
}
- trace_fib_validate_source(dev, &fl4);
-
if (fib_lookup(net, &fl4, &res, 0))
goto last_resort;
if (res.type != RTN_UNICAST &&
@@ -650,6 +648,9 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
[RTA_UID] = { .type = NLA_U32 },
[RTA_MARK] = { .type = NLA_U32 },
[RTA_TABLE] = { .type = NLA_U32 },
+ [RTA_IP_PROTO] = { .type = NLA_U8 },
+ [RTA_SPORT] = { .type = NLA_U16 },
+ [RTA_DPORT] = { .type = NLA_U16 },
};
static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
@@ -846,7 +847,8 @@ out_err:
* to fib engine. It is legal, because all events occur
* only when netlink is already locked.
*/
-static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
+static void fib_magic(int cmd, int type, __be32 dst, int dst_len,
+ struct in_ifaddr *ifa, u32 rt_priority)
{
struct net *net = dev_net(ifa->ifa_dev->dev);
u32 tb_id = l3mdev_fib_table(ifa->ifa_dev->dev);
@@ -856,6 +858,7 @@ static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifad
.fc_type = type,
.fc_dst = dst,
.fc_dst_len = dst_len,
+ .fc_priority = rt_priority,
.fc_prefsrc = ifa->ifa_local,
.fc_oif = ifa->ifa_dev->dev->ifindex,
.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
@@ -901,31 +904,57 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
}
}
- fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
+ fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim, 0);
if (!(dev->flags & IFF_UP))
return;
/* Add broadcast address, if it is explicitly assigned. */
if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
- fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
+ fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32,
+ prim, 0);
if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) &&
(prefix != addr || ifa->ifa_prefixlen < 32)) {
if (!(ifa->ifa_flags & IFA_F_NOPREFIXROUTE))
fib_magic(RTM_NEWROUTE,
dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
- prefix, ifa->ifa_prefixlen, prim);
+ prefix, ifa->ifa_prefixlen, prim,
+ ifa->ifa_rt_priority);
/* Add network specific broadcasts, when it takes a sense */
if (ifa->ifa_prefixlen < 31) {
- fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
+ fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32,
+ prim, 0);
fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix | ~mask,
- 32, prim);
+ 32, prim, 0);
}
}
}
+void fib_modify_prefix_metric(struct in_ifaddr *ifa, u32 new_metric)
+{
+ __be32 prefix = ifa->ifa_address & ifa->ifa_mask;
+ struct in_device *in_dev = ifa->ifa_dev;
+ struct net_device *dev = in_dev->dev;
+
+ if (!(dev->flags & IFF_UP) ||
+ ifa->ifa_flags & (IFA_F_SECONDARY | IFA_F_NOPREFIXROUTE) ||
+ ipv4_is_zeronet(prefix) ||
+ prefix == ifa->ifa_local || ifa->ifa_prefixlen == 32)
+ return;
+
+ /* add the new */
+ fib_magic(RTM_NEWROUTE,
+ dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
+ prefix, ifa->ifa_prefixlen, ifa, new_metric);
+
+ /* delete the old */
+ fib_magic(RTM_DELROUTE,
+ dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
+ prefix, ifa->ifa_prefixlen, ifa, ifa->ifa_rt_priority);
+}
+
/* Delete primary or secondary address.
* Optionally, on secondary address promotion consider the addresses
* from subnet iprim as deleted, even if they are in device list.
@@ -967,7 +996,7 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
if (!(ifa->ifa_flags & IFA_F_NOPREFIXROUTE))
fib_magic(RTM_DELROUTE,
dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
- any, ifa->ifa_prefixlen, prim);
+ any, ifa->ifa_prefixlen, prim, 0);
subnet = 1;
}
@@ -1051,17 +1080,20 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
no_promotions:
if (!(ok & BRD_OK))
- fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
+ fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32,
+ prim, 0);
if (subnet && ifa->ifa_prefixlen < 31) {
if (!(ok & BRD1_OK))
- fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
+ fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32,
+ prim, 0);
if (!(ok & BRD0_OK))
- fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
+ fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32,
+ prim, 0);
}
if (!(ok & LOCAL_OK)) {
unsigned int addr_type;
- fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
+ fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim, 0);
/* Check, that this local address finally disappeared. */
addr_type = inet_addr_type_dev_table(dev_net(dev), dev,
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 737d11bc8838..f8eb78d042a4 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -213,14 +213,17 @@ static const struct nla_policy fib4_rule_policy[FRA_MAX+1] = {
static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
struct fib_rule_hdr *frh,
- struct nlattr **tb)
+ struct nlattr **tb,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
int err = -EINVAL;
struct fib4_rule *rule4 = (struct fib4_rule *) rule;
- if (frh->tos & ~IPTOS_TOS_MASK)
+ if (frh->tos & ~IPTOS_TOS_MASK) {
+ NL_SET_ERR_MSG(extack, "Invalid tos");
goto errout;
+ }
/* split local/main if they are not already split */
err = fib_unmerge(net);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index c27122f01b87..f3c89ccf14c5 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -717,6 +717,8 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
nla_strlcpy(tmp, nla, sizeof(tmp));
val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca);
} else {
+ if (nla_len(nla) != sizeof(u32))
+ return false;
val = nla_get_u32(nla);
}
@@ -1019,47 +1021,8 @@ static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc)
static int
fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg)
{
- bool ecn_ca = false;
- struct nlattr *nla;
- int remaining;
-
- if (!cfg->fc_mx)
- return 0;
-
- nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
- int type = nla_type(nla);
- u32 val;
-
- if (!type)
- continue;
- if (type > RTAX_MAX)
- return -EINVAL;
-
- if (type == RTAX_CC_ALGO) {
- char tmp[TCP_CA_NAME_MAX];
-
- nla_strlcpy(tmp, nla, sizeof(tmp));
- val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca);
- if (val == TCP_CA_UNSPEC)
- return -EINVAL;
- } else {
- val = nla_get_u32(nla);
- }
- if (type == RTAX_ADVMSS && val > 65535 - 40)
- val = 65535 - 40;
- if (type == RTAX_MTU && val > 65535 - 15)
- val = 65535 - 15;
- if (type == RTAX_HOPLIMIT && val > 255)
- val = 255;
- if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
- return -EINVAL;
- fi->fib_metrics->metrics[type - 1] = val;
- }
-
- if (ecn_ca)
- fi->fib_metrics->metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
-
- return 0;
+ return ip_metrics_convert(fi->fib_net, cfg->fc_mx, cfg->fc_mx_len,
+ fi->fib_metrics->metrics);
}
struct fib_info *fib_create_info(struct fib_config *cfg,
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 99c23a0cb8ca..5bc0c89e81e4 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1326,14 +1326,14 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
unsigned long index;
t_key cindex;
- trace_fib_table_lookup(tb->tb_id, flp);
-
pn = t->kv;
cindex = 0;
n = get_child_rcu(pn, cindex);
- if (!n)
+ if (!n) {
+ trace_fib_table_lookup(tb->tb_id, flp, NULL, -EAGAIN);
return -EAGAIN;
+ }
#ifdef CONFIG_IP_FIB_TRIE_STATS
this_cpu_inc(stats->gets);
@@ -1416,8 +1416,11 @@ backtrace:
* nothing for us to do as we do not have any
* further nodes to parse.
*/
- if (IS_TRIE(pn))
+ if (IS_TRIE(pn)) {
+ trace_fib_table_lookup(tb->tb_id, flp,
+ NULL, -EAGAIN);
return -EAGAIN;
+ }
#ifdef CONFIG_IP_FIB_TRIE_STATS
this_cpu_inc(stats->backtrack);
#endif
@@ -1459,6 +1462,7 @@ found:
#ifdef CONFIG_IP_FIB_TRIE_STATS
this_cpu_inc(stats->semantic_match_passed);
#endif
+ trace_fib_table_lookup(tb->tb_id, flp, NULL, err);
return err;
}
if (fi->fib_flags & RTNH_F_DEAD)
@@ -1494,7 +1498,7 @@ found:
#ifdef CONFIG_IP_FIB_TRIE_STATS
this_cpu_inc(stats->semantic_match_passed);
#endif
- trace_fib_table_lookup_nh(nh);
+ trace_fib_table_lookup(tb->tb_id, flp, nh, err);
return err;
}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 881ac6d046f2..33a88e045efd 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -27,11 +27,6 @@
#include <net/sock_reuseport.h>
#include <net/addrconf.h>
-#ifdef INET_CSK_DEBUG
-const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
-EXPORT_SYMBOL(inet_csk_timer_bug_msg);
-#endif
-
#if IS_ENABLED(CONFIG_IPV6)
/* match_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses if IPv6
* only, and any IPv4 addresses if not IPv6 only
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index f200b304f76c..2d8efeecf619 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -578,6 +578,8 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
int tunnel_hlen;
int version;
__be16 df;
+ int nhoff;
+ int thoff;
tun_info = skb_tunnel_info(skb);
if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
@@ -605,6 +607,16 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
truncate = true;
}
+ nhoff = skb_network_header(skb) - skb_mac_header(skb);
+ if (skb->protocol == htons(ETH_P_IP) &&
+ (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
+ truncate = true;
+
+ thoff = skb_transport_header(skb) - skb_mac_header(skb);
+ if (skb->protocol == htons(ETH_P_IPV6) &&
+ (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff))
+ truncate = true;
+
if (version == 1) {
erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
ntohl(md->u.index), truncate, true);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index d54abc097800..af5a830ff6ad 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -878,11 +878,14 @@ static int __ip_append_data(struct sock *sk,
struct rtable *rt = (struct rtable *)cork->dst;
unsigned int wmem_alloc_delta = 0;
u32 tskey = 0;
+ bool paged;
skb = skb_peek_tail(queue);
exthdrlen = !skb ? rt->dst.header_len : 0;
- mtu = cork->fragsize;
+ mtu = cork->gso_size ? IP_MAX_MTU : cork->fragsize;
+ paged = !!cork->gso_size;
+
if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
tskey = sk->sk_tskey++;
@@ -906,8 +909,8 @@ static int __ip_append_data(struct sock *sk,
if (transhdrlen &&
length + fragheaderlen <= mtu &&
rt->dst.dev->features & (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM) &&
- !(flags & MSG_MORE) &&
- !exthdrlen)
+ (!(flags & MSG_MORE) || cork->gso_size) &&
+ (!exthdrlen || (rt->dst.dev->features & NETIF_F_HW_ESP_TX_CSUM)))
csummode = CHECKSUM_PARTIAL;
cork->length += length;
@@ -933,6 +936,7 @@ static int __ip_append_data(struct sock *sk,
unsigned int fraglen;
unsigned int fraggap;
unsigned int alloclen;
+ unsigned int pagedlen = 0;
struct sk_buff *skb_prev;
alloc_new_skb:
skb_prev = skb;
@@ -953,8 +957,12 @@ alloc_new_skb:
if ((flags & MSG_MORE) &&
!(rt->dst.dev->features&NETIF_F_SG))
alloclen = mtu;
- else
+ else if (!paged)
alloclen = fraglen;
+ else {
+ alloclen = min_t(int, fraglen, MAX_HEADER);
+ pagedlen = fraglen - alloclen;
+ }
alloclen += exthdrlen;
@@ -998,7 +1006,7 @@ alloc_new_skb:
/*
* Find where to start putting bytes.
*/
- data = skb_put(skb, fraglen + exthdrlen);
+ data = skb_put(skb, fraglen + exthdrlen - pagedlen);
skb_set_network_header(skb, exthdrlen);
skb->transport_header = (skb->network_header +
fragheaderlen);
@@ -1014,7 +1022,7 @@ alloc_new_skb:
pskb_trim_unique(skb_prev, maxfraglen);
}
- copy = datalen - transhdrlen - fraggap;
+ copy = datalen - transhdrlen - fraggap - pagedlen;
if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
err = -EFAULT;
kfree_skb(skb);
@@ -1022,7 +1030,7 @@ alloc_new_skb:
}
offset += copy;
- length -= datalen - fraggap;
+ length -= copy + transhdrlen;
transhdrlen = 0;
exthdrlen = 0;
csummode = CHECKSUM_NONE;
@@ -1136,6 +1144,8 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
*rtp = NULL;
cork->fragsize = ip_sk_use_pmtu(sk) ?
dst_mtu(&rt->dst) : rt->dst.dev->mtu;
+
+ cork->gso_size = sk->sk_type == SOCK_DGRAM ? ipc->gso_size : 0;
cork->dst = &rt->dst;
cork->length = 0;
cork->ttl = ipc->ttl;
@@ -1215,7 +1225,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
return -EOPNOTSUPP;
hh_len = LL_RESERVED_SPACE(rt->dst.dev);
- mtu = cork->fragsize;
+ mtu = cork->gso_size ? IP_MAX_MTU : cork->fragsize;
fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
@@ -1471,9 +1481,8 @@ struct sk_buff *ip_make_skb(struct sock *sk,
int len, int odd, struct sk_buff *skb),
void *from, int length, int transhdrlen,
struct ipcm_cookie *ipc, struct rtable **rtp,
- unsigned int flags)
+ struct inet_cork *cork, unsigned int flags)
{
- struct inet_cork cork;
struct sk_buff_head queue;
int err;
@@ -1482,22 +1491,22 @@ struct sk_buff *ip_make_skb(struct sock *sk,
__skb_queue_head_init(&queue);
- cork.flags = 0;
- cork.addr = 0;
- cork.opt = NULL;
- err = ip_setup_cork(sk, &cork, ipc, rtp);
+ cork->flags = 0;
+ cork->addr = 0;
+ cork->opt = NULL;
+ err = ip_setup_cork(sk, cork, ipc, rtp);
if (err)
return ERR_PTR(err);
- err = __ip_append_data(sk, fl4, &queue, &cork,
+ err = __ip_append_data(sk, fl4, &queue, cork,
&current->task_frag, getfrag,
from, length, transhdrlen, flags);
if (err) {
- __ip_flush_pending_frames(sk, &queue, &cork);
+ __ip_flush_pending_frames(sk, &queue, cork);
return ERR_PTR(err);
}
- return __ip_make_skb(sk, fl4, &queue, &cork);
+ return __ip_make_skb(sk, fl4, &queue, cork);
}
/*
@@ -1553,7 +1562,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
oif = skb->skb_iif;
flowi4_init_output(&fl4, oif,
- IP4_REPLY_MARK(net, skb->mark),
+ IP4_REPLY_MARK(net, skb->mark) ?: sk->sk_mark,
RT_TOS(arg->tos),
RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol,
ip_reply_arg_flowi_flags(arg),
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 57bbb060faaf..fc32fdbeefa6 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/