summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/Makefile5
-rw-r--r--net/ipv4/af_inet.c5
-rw-r--r--net/ipv4/bpfilter/Makefile2
-rw-r--r--net/ipv4/bpfilter/sockopt.c43
-rw-r--r--net/ipv4/devinet.c15
-rw-r--r--net/ipv4/fib_frontend.c58
-rw-r--r--net/ipv4/fib_rules.c7
-rw-r--r--net/ipv4/fib_semantics.c45
-rw-r--r--net/ipv4/fib_trie.c14
-rw-r--r--net/ipv4/inet_connection_sock.c5
-rw-r--r--net/ipv4/ip_gre.c12
-rw-r--r--net/ipv4/ip_output.c45
-rw-r--r--net/ipv4/ip_sockglue.c17
-rw-r--r--net/ipv4/ip_tunnel_core.c6
-rw-r--r--net/ipv4/ipconfig.c150
-rw-r--r--net/ipv4/ipmr.c3
-rw-r--r--net/ipv4/ipmr_base.c8
-rw-r--r--net/ipv4/metrics.c55
-rw-r--r--net/ipv4/netfilter/Kconfig10
-rw-r--r--net/ipv4/netfilter/Makefile5
-rw-r--r--net/ipv4/netfilter/ip_tables.c7
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c2
-rw-r--r--net/ipv4/netfilter/iptable_nat.c88
-rw-r--r--net/ipv4/netfilter/nf_flow_table_ipv4.c255
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c4
-rw-r--r--net/ipv4/netfilter/nf_nat_l3proto_ipv4.c143
-rw-r--r--net/ipv4/netfilter/nf_nat_masquerade_ipv4.c8
-rw-r--r--net/ipv4/netfilter/nf_nat_pptp.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_gre.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_icmp.c2
-rw-r--r--net/ipv4/netfilter/nf_tproxy_ipv4.c147
-rw-r--r--net/ipv4/netfilter/nft_chain_nat_ipv4.c53
-rw-r--r--net/ipv4/netfilter/nft_masq_ipv4.c2
-rw-r--r--net/ipv4/netlink.c23
-rw-r--r--net/ipv4/proc.c3
-rw-r--r--net/ipv4/route.c177
-rw-r--r--net/ipv4/sysctl_net_ipv4.c22
-rw-r--r--net/ipv4/tcp.c204
-rw-r--r--net/ipv4/tcp_input.c271
-rw-r--r--net/ipv4/tcp_ipv4.c55
-rw-r--r--net/ipv4/tcp_minisocks.c2
-rw-r--r--net/ipv4/tcp_output.c112
-rw-r--r--net/ipv4/tcp_recovery.c80
-rw-r--r--net/ipv4/tcp_timer.c27
-rw-r--r--net/ipv4/udp.c120
-rw-r--r--net/ipv4/udp_offload.c101
46 files changed, 1637 insertions, 785 deletions
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index a07b7dd06def..eec9569ffa5c 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -13,7 +13,10 @@ obj-y := route.o inetpeer.o protocol.o \
tcp_offload.o datagram.o raw.o udp.o udplite.o \
udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \
fib_frontend.o fib_semantics.o fib_trie.o fib_notifier.o \
- inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o
+ inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o \
+ metrics.o netlink.o
+
+obj-$(CONFIG_BPFILTER) += bpfilter/
obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o
obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 8a59428e63ab..15e125558c76 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -994,7 +994,9 @@ const struct proto_ops inet_stream_ops = {
.getsockopt = sock_common_getsockopt,
.sendmsg = inet_sendmsg,
.recvmsg = inet_recvmsg,
- .mmap = sock_no_mmap,
+#ifdef CONFIG_MMU
+ .mmap = tcp_mmap,
+#endif
.sendpage = inet_sendpage,
.splice_read = tcp_splice_read,
.read_sock = tcp_read_sock,
@@ -1006,6 +1008,7 @@ const struct proto_ops inet_stream_ops = {
.compat_getsockopt = compat_sock_common_getsockopt,
.compat_ioctl = inet_compat_ioctl,
#endif
+ .set_rcvlowat = tcp_set_rcvlowat,
};
EXPORT_SYMBOL(inet_stream_ops);
diff --git a/net/ipv4/bpfilter/Makefile b/net/ipv4/bpfilter/Makefile
new file mode 100644
index 000000000000..ce262d76cc48
--- /dev/null
+++ b/net/ipv4/bpfilter/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_BPFILTER) += sockopt.o
+
diff --git a/net/ipv4/bpfilter/sockopt.c b/net/ipv4/bpfilter/sockopt.c
new file mode 100644
index 000000000000..5e04ed25bc0e
--- /dev/null
+++ b/net/ipv4/bpfilter/sockopt.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/uaccess.h>
+#include <linux/bpfilter.h>
+#include <uapi/linux/bpf.h>
+#include <linux/wait.h>
+#include <linux/kmod.h>
+
+int (*bpfilter_process_sockopt)(struct sock *sk, int optname,
+ char __user *optval,
+ unsigned int optlen, bool is_set);
+EXPORT_SYMBOL_GPL(bpfilter_process_sockopt);
+
+static int bpfilter_mbox_request(struct sock *sk, int optname,
+ char __user *optval,
+ unsigned int optlen, bool is_set)
+{
+ if (!bpfilter_process_sockopt) {
+ int err = request_module("bpfilter");
+
+ if (err)
+ return err;
+ if (!bpfilter_process_sockopt)
+ return -ECHILD;
+ }
+ return bpfilter_process_sockopt(sk, optname, optval, optlen, is_set);
+}
+
+int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char __user *optval,
+ unsigned int optlen)
+{
+ return bpfilter_mbox_request(sk, optname, optval, optlen, true);
+}
+
+int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval,
+ int __user *optlen)
+{
+ int len;
+
+ if (get_user(len, optlen))
+ return -EFAULT;
+
+ return bpfilter_mbox_request(sk, optname, optval, len, false);
+}
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 40f001782c1b..d7585ab1a77a 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -99,6 +99,7 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
[IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
[IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) },
[IFA_FLAGS] = { .type = NLA_U32 },
+ [IFA_RT_PRIORITY] = { .type = NLA_U32 },
};
#define IN4_ADDR_HSIZE_SHIFT 8
@@ -835,6 +836,9 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
else
memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
+ if (tb[IFA_RT_PRIORITY])
+ ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
+
if (tb[IFA_CACHEINFO]) {
struct ifa_cacheinfo *ci;
@@ -906,12 +910,20 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
extack);
} else {
+ u32 new_metric = ifa->ifa_rt_priority;
+
inet_free_ifa(ifa);
if (nlh->nlmsg_flags & NLM_F_EXCL ||
!(nlh->nlmsg_flags & NLM_F_REPLACE))
return -EEXIST;
ifa = ifa_existing;
+
+ if (ifa->ifa_rt_priority != new_metric) {
+ fib_modify_prefix_metric(ifa, new_metric);
+ ifa->ifa_rt_priority = new_metric;
+ }
+
set_ifa_lifetime(ifa, valid_lft, prefered_lft);
cancel_delayed_work(&check_lifetime_work);
queue_delayed_work(system_power_efficient_wq,
@@ -1549,6 +1561,7 @@ static size_t inet_nlmsg_size(void)
+ nla_total_size(4) /* IFA_BROADCAST */
+ nla_total_size(IFNAMSIZ) /* IFA_LABEL */
+ nla_total_size(4) /* IFA_FLAGS */
+ + nla_total_size(4) /* IFA_RT_PRIORITY */
+ nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
}
@@ -1618,6 +1631,8 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
(ifa->ifa_label[0] &&
nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
+ (ifa->ifa_rt_priority &&
+ nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
preferred, valid))
goto nla_put_failure;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index e66172aaf241..63aa39b3af03 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -354,8 +354,6 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
fl4.fl4_dport = 0;
}
- trace_fib_validate_source(dev, &fl4);
-
if (fib_lookup(net, &fl4, &res, 0))
goto last_resort;
if (res.type != RTN_UNICAST &&
@@ -650,6 +648,9 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
[RTA_UID] = { .type = NLA_U32 },
[RTA_MARK] = { .type = NLA_U32 },
[RTA_TABLE] = { .type = NLA_U32 },
+ [RTA_IP_PROTO] = { .type = NLA_U8 },
+ [RTA_SPORT] = { .type = NLA_U16 },
+ [RTA_DPORT] = { .type = NLA_U16 },
};
static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
@@ -846,7 +847,8 @@ out_err:
* to fib engine. It is legal, because all events occur
* only when netlink is already locked.
*/
-static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
+static void fib_magic(int cmd, int type, __be32 dst, int dst_len,
+ struct in_ifaddr *ifa, u32 rt_priority)
{
struct net *net = dev_net(ifa->ifa_dev->dev);
u32 tb_id = l3mdev_fib_table(ifa->ifa_dev->dev);
@@ -856,6 +858,7 @@ static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifad
.fc_type = type,
.fc_dst = dst,
.fc_dst_len = dst_len,
+ .fc_priority = rt_priority,
.fc_prefsrc = ifa->ifa_local,
.fc_oif = ifa->ifa_dev->dev->ifindex,
.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
@@ -901,31 +904,57 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
}
}
- fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
+ fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim, 0);
if (!(dev->flags & IFF_UP))
return;
/* Add broadcast address, if it is explicitly assigned. */
if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
- fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
+ fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32,
+ prim, 0);
if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) &&
(prefix != addr || ifa->ifa_prefixlen < 32)) {
if (!(ifa->ifa_flags & IFA_F_NOPREFIXROUTE))
fib_magic(RTM_NEWROUTE,
dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
- prefix, ifa->ifa_prefixlen, prim);
+ prefix, ifa->ifa_prefixlen, prim,
+ ifa->ifa_rt_priority);
/* Add network specific broadcasts, when it takes a sense */
if (ifa->ifa_prefixlen < 31) {
- fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
+ fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32,
+ prim, 0);
fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix | ~mask,
- 32, prim);
+ 32, prim, 0);
}
}
}
+void fib_modify_prefix_metric(struct in_ifaddr *ifa, u32 new_metric)
+{
+ __be32 prefix = ifa->ifa_address & ifa->ifa_mask;
+ struct in_device *in_dev = ifa->ifa_dev;
+ struct net_device *dev = in_dev->dev;
+
+ if (!(dev->flags & IFF_UP) ||
+ ifa->ifa_flags & (IFA_F_SECONDARY | IFA_F_NOPREFIXROUTE) ||
+ ipv4_is_zeronet(prefix) ||
+ prefix == ifa->ifa_local || ifa->ifa_prefixlen == 32)
+ return;
+
+ /* add the new */
+ fib_magic(RTM_NEWROUTE,
+ dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
+ prefix, ifa->ifa_prefixlen, ifa, new_metric);
+
+ /* delete the old */
+ fib_magic(RTM_DELROUTE,
+ dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
+ prefix, ifa->ifa_prefixlen, ifa, ifa->ifa_rt_priority);
+}
+
/* Delete primary or secondary address.
* Optionally, on secondary address promotion consider the addresses
* from subnet iprim as deleted, even if they are in device list.
@@ -967,7 +996,7 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
if (!(ifa->ifa_flags & IFA_F_NOPREFIXROUTE))
fib_magic(RTM_DELROUTE,
dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
- any, ifa->ifa_prefixlen, prim);
+ any, ifa->ifa_prefixlen, prim, 0);
subnet = 1;
}
@@ -1051,17 +1080,20 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
no_promotions:
if (!(ok & BRD_OK))
- fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
+ fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32,
+ prim, 0);
if (subnet && ifa->ifa_prefixlen < 31) {
if (!(ok & BRD1_OK))
- fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
+ fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32,
+ prim, 0);
if (!(ok & BRD0_OK))
- fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
+ fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32,
+ prim, 0);
}
if (!(ok & LOCAL_OK)) {
unsigned int addr_type;
- fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
+ fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim, 0);
/* Check, that this local address finally disappeared. */
addr_type = inet_addr_type_dev_table(dev_net(dev), dev,
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 737d11bc8838..f8eb78d042a4 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -213,14 +213,17 @@ static const struct nla_policy fib4_rule_policy[FRA_MAX+1] = {
static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
struct fib_rule_hdr *frh,
- struct nlattr **tb)
+ struct nlattr **tb,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
int err = -EINVAL;
struct fib4_rule *rule4 = (struct fib4_rule *) rule;
- if (frh->tos & ~IPTOS_TOS_MASK)
+ if (frh->tos & ~IPTOS_TOS_MASK) {
+ NL_SET_ERR_MSG(extack, "Invalid tos");
goto errout;
+ }
/* split local/main if they are not already split */
err = fib_unmerge(net);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index c27122f01b87..f3c89ccf14c5 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -717,6 +717,8 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
nla_strlcpy(tmp, nla, sizeof(tmp));
val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca);
} else {
+ if (nla_len(nla) != sizeof(u32))
+ return false;
val = nla_get_u32(nla);
}
@@ -1019,47 +1021,8 @@ static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc)
static int
fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg)
{
- bool ecn_ca = false;
- struct nlattr *nla;
- int remaining;
-
- if (!cfg->fc_mx)
- return 0;
-
- nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
- int type = nla_type(nla);
- u32 val;
-
- if (!type)
- continue;
- if (type > RTAX_MAX)
- return -EINVAL;
-
- if (type == RTAX_CC_ALGO) {
- char tmp[TCP_CA_NAME_MAX];
-
- nla_strlcpy(tmp, nla, sizeof(tmp));
- val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca);
- if (val == TCP_CA_UNSPEC)
- return -EINVAL;
- } else {
- val = nla_get_u32(nla);
- }
- if (type == RTAX_ADVMSS && val > 65535 - 40)
- val = 65535 - 40;
- if (type == RTAX_MTU && val > 65535 - 15)
- val = 65535 - 15;
- if (type == RTAX_HOPLIMIT && val > 255)
- val = 255;
- if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
- return -EINVAL;
- fi->fib_metrics->metrics[type - 1] = val;
- }
-
- if (ecn_ca)
- fi->fib_metrics->metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
-
- return 0;
+ return ip_metrics_convert(fi->fib_net, cfg->fc_mx, cfg->fc_mx_len,
+ fi->fib_metrics->metrics);
}
struct fib_info *fib_create_info(struct fib_config *cfg,
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 99c23a0cb8ca..5bc0c89e81e4 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1326,14 +1326,14 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
unsigned long index;
t_key cindex;
- trace_fib_table_lookup(tb->tb_id, flp);
-
pn = t->kv;
cindex = 0;
n = get_child_rcu(pn, cindex);
- if (!n)
+ if (!n) {
+ trace_fib_table_lookup(tb->tb_id, flp, NULL, -EAGAIN);
return -EAGAIN;
+ }
#ifdef CONFIG_IP_FIB_TRIE_STATS
this_cpu_inc(stats->gets);
@@ -1416,8 +1416,11 @@ backtrace:
* nothing for us to do as we do not have any
* further nodes to parse.
*/
- if (IS_TRIE(pn))
+ if (IS_TRIE(pn)) {
+ trace_fib_table_lookup(tb->tb_id, flp,
+ NULL, -EAGAIN);
return -EAGAIN;
+ }
#ifdef CONFIG_IP_FIB_TRIE_STATS
this_cpu_inc(stats->backtrack);
#endif
@@ -1459,6 +1462,7 @@ found:
#ifdef CONFIG_IP_FIB_TRIE_STATS
this_cpu_inc(stats->semantic_match_passed);
#endif
+ trace_fib_table_lookup(tb->tb_id, flp, NULL, err);
return err;
}
if (fi->fib_flags & RTNH_F_DEAD)
@@ -1494,7 +1498,7 @@ found:
#ifdef CONFIG_IP_FIB_TRIE_STATS
this_cpu_inc(stats->semantic_match_passed);
#endif
- trace_fib_table_lookup_nh(nh);
+ trace_fib_table_lookup(tb->tb_id, flp, nh, err);
return err;
}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 881ac6d046f2..33a88e045efd 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -27,11 +27,6 @@
#include <net/sock_reuseport.h>
#include <net/addrconf.h>
-#ifdef INET_CSK_DEBUG
-const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
-EXPORT_SYMBOL(inet_csk_timer_bug_msg);
-#endif
-
#if IS_ENABLED(CONFIG_IPV6)
/* match_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses if IPv6
* only, and any IPv4 addresses if not IPv6 only
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index f200b304f76c..2d8efeecf619 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -578,6 +578,8 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
int tunnel_hlen;
int version;
__be16 df;
+ int nhoff;
+ int thoff;
tun_info = skb_tunnel_info(skb);
if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
@@ -605,6 +607,16 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
truncate = true;
}
+ nhoff = skb_network_header(skb) - skb_mac_header(skb);
+ if (skb->protocol == htons(ETH_P_IP) &&
+ (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
+ truncate = true;
+
+ thoff = skb_transport_header(skb) - skb_mac_header(skb);
+ if (skb->protocol == htons(ETH_P_IPV6) &&
+ (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff))
+ truncate = true;
+
if (version == 1) {
erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
ntohl(md->u.index), truncate, true);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index d54abc097800..af5a830ff6ad 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -878,11 +878,14 @@ static int __ip_append_data(struct sock *sk,
struct rtable *rt = (struct rtable *)cork->dst;
unsigned int wmem_alloc_delta = 0;
u32 tskey = 0;
+ bool paged;
skb = skb_peek_tail(queue);
exthdrlen = !skb ? rt->dst.header_len : 0;
- mtu = cork->fragsize;
+ mtu = cork->gso_size ? IP_MAX_MTU : cork->fragsize;
+ paged = !!cork->gso_size;
+
if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
tskey = sk->sk_tskey++;
@@ -906,8 +909,8 @@ static int __ip_append_data(struct sock *sk,
if (transhdrlen &&
length + fragheaderlen <= mtu &&
rt->dst.dev->features & (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM) &&
- !(flags & MSG_MORE) &&
- !exthdrlen)
+ (!(flags & MSG_MORE) || cork->gso_size) &&
+ (!exthdrlen || (rt->dst.dev->features & NETIF_F_HW_ESP_TX_CSUM)))
csummode = CHECKSUM_PARTIAL;
cork->length += length;
@@ -933,6 +936,7 @@ static int __ip_append_data(struct sock *sk,
unsigned int fraglen;
unsigned int fraggap;
unsigned int alloclen;
+ unsigned int pagedlen = 0;
struct sk_buff *skb_prev;
alloc_new_skb:
skb_prev = skb;
@@ -953,8 +957,12 @@ alloc_new_skb:
if ((flags & MSG_MORE) &&
!(rt->dst.dev->features&NETIF_F_SG))
alloclen = mtu;
- else
+ else if (!paged)
alloclen = fraglen;
+ else {
+ alloclen = min_t(int, fraglen, MAX_HEADER);
+ pagedlen = fraglen - alloclen;
+ }
alloclen += exthdrlen;
@@ -998,7 +1006,7 @@ alloc_new_skb:
/*
* Find where to start putting bytes.
*/
- data = skb_put(skb, fraglen + exthdrlen);
+ data = skb_put(skb, fraglen + exthdrlen - pagedlen);
skb_set_network_header(skb, exthdrlen);
skb->transport_header = (skb->network_header +
fragheaderlen);
@@ -1014,7 +1022,7 @@ alloc_new_skb:
pskb_trim_unique(skb_prev, maxfraglen);
}
- copy = datalen - transhdrlen - fraggap;
+ copy = datalen - transhdrlen - fraggap - pagedlen;
if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
err = -EFAULT;
kfree_skb(skb);
@@ -1022,7 +1030,7 @@ alloc_new_skb:
}
offset += copy;
- length -= datalen - fraggap;
+ length -= copy + transhdrlen;
transhdrlen = 0;
exthdrlen = 0;
csummode = CHECKSUM_NONE;
@@ -1136,6 +1144,8 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
*rtp = NULL;
cork->fragsize = ip_sk_use_pmtu(sk) ?
dst_mtu(&rt->dst) : rt->dst.dev->mtu;
+
+ cork->gso_size = sk->sk_type == SOCK_DGRAM ? ipc->gso_size : 0;
cork->dst = &rt->dst;
cork->length = 0;
cork->ttl = ipc->ttl;
@@ -1215,7 +1225,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
return -EOPNOTSUPP;
hh_len = LL_RESERVED_SPACE(rt->dst.dev);
- mtu = cork->fragsize;
+ mtu = cork->gso_size ? IP_MAX_MTU : cork->fragsize;
fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
@@ -1471,9 +1481,8 @@ struct sk_buff *ip_make_skb(struct sock *sk,
int len, int odd, struct sk_buff *skb),
void *from, int length, int transhdrlen,
struct ipcm_cookie *ipc, struct rtable **rtp,
- unsigned int flags)
+ struct inet_cork *cork, unsigned int flags)
{
- struct inet_cork cork;
struct sk_buff_head queue;
int err;
@@ -1482,22 +1491,22 @@ struct sk_buff *ip_make_skb(struct sock *sk,
__skb_queue_head_init(&queue);
- cork.flags = 0;
- cork.addr = 0;
- cork.opt = NULL;
- err = ip_setup_cork(sk, &cork, ipc, rtp);
+ cork->flags = 0;
+ cork->addr = 0;
+ cork->opt = NULL;
+ err = ip_setup_cork(sk, cork, ipc, rtp);
if (err)
return ERR_PTR(err);
- err = __ip_append_data(sk, fl4, &queue, &cork,
+ err = __ip_append_data(sk, fl4, &queue, cork,
&current->task_frag, getfrag,
from, length, transhdrlen, flags);
if (err) {
- __ip_flush_pending_frames(sk, &queue, &cork);
+ __ip_flush_pending_frames(sk, &queue, cork);
return ERR_PTR(err);
}
- return __ip_make_skb(sk, fl4, &queue, &cork);
+ return __ip_make_skb(sk, fl4, &queue, cork);
}
/*
@@ -1553,7 +1562,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
oif = skb->skb_iif;
flowi4_init_output(&fl4, oif,
- IP4_REPLY_MARK(net, skb->mark),
+ IP4_REPLY_MARK(net, skb->mark) ?: sk->sk_mark,
RT_TOS(arg->tos),
RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol,
ip_reply_arg_flowi_flags(arg),
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 57bbb060faaf..fc32fdbeefa6 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -47,6 +47,8 @@
#include <linux/errqueue.h>
#include <linux/uaccess.h>
+#include <linux/bpfilter.h>
+
/*
* SOL_IP control messages.
*/
@@ -1242,6 +1244,11 @@ int ip_setsockopt(struct sock *sk, int level,
return -ENOPROTOOPT;
err = do_ip_setsockopt(sk, level, optname, optval, optlen);
+#ifdef CONFIG_BPFILTER
+ if (optname >= BPFILTER_IPT_SO_SET_REPLACE &&
+ optname < BPFILTER_IPT_SET_MAX)
+ err = bpfilter_ip_set_sockopt(sk, optname, optval, optlen);
+#endif
#ifdef CONFIG_NETFILTER
/* we need to exclude all possible ENOPROTOOPTs except default case */
if (err == -ENOPROTOOPT && optname != IP_HDRINCL &&
@@ -1550,6 +1557,11 @@ int ip_getsockopt(struct sock *sk, int level,
int err;
err = do_ip_getsockopt(sk, level, optname, optval, optlen, 0);
+#ifdef CONFIG_BPFILTER
+ if (optname >= BPFILTER_IPT_SO_GET_INFO &&
+ optname < BPFILTER_IPT_GET_MAX)
+ err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen);
+#endif
#ifdef CONFIG_NETFILTER
/* we need to exclude all possible ENOPROTOOPTs except default case */
if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS &&
@@ -1582,6 +1594,11 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname,
err = do_ip_getsockopt(sk, level, optname, optval, optlen,
MSG_CMSG_COMPAT);
+#ifdef CONFIG_BPFILTER
+ if (optname >= BPFILTER_IPT_SO_GET_INFO &&
+ optname < BPFILTER_IPT_GET_MAX)
+ err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen);
+#endif
#ifdef CONFIG_NETF