summaryrefslogtreecommitdiffstats
path: root/net/ipv6
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-09-06 14:45:08 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-09-06 14:45:08 -0700
commitaae3dbb4776e7916b6cd442d00159bea27a695c1 (patch)
treed074c5d783a81e7e2e084b1eba77f57459da7e37 /net/ipv6
parentec3604c7a5aae8953545b0d05495357009a960e5 (diff)
parent66bed8465a808400eb14562510e26c8818082cb8 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) Support ipv6 checksum offload in sunvnet driver, from Shannon Nelson. 2) Move to RB-tree instead of custom AVL code in inetpeer, from Eric Dumazet. 3) Allow generic XDP to work on virtual devices, from John Fastabend. 4) Add bpf device maps and XDP_REDIRECT, which can be used to build arbitrary switching frameworks using XDP. From John Fastabend. 5) Remove UFO offloads from the tree, gave us little other than bugs. 6) Remove the IPSEC flow cache, from Florian Westphal. 7) Support ipv6 route offload in mlxsw driver. 8) Support VF representors in bnxt_en, from Sathya Perla. 9) Add support for forward error correction modes to ethtool, from Vidya Sagar Ravipati. 10) Add time filter for packet scheduler action dumping, from Jamal Hadi Salim. 11) Extend the zerocopy sendmsg() used by virtio and tap to regular sockets via MSG_ZEROCOPY. From Willem de Bruijn. 12) Significantly rework value tracking in the BPF verifier, from Edward Cree. 13) Add new jump instructions to eBPF, from Daniel Borkmann. 14) Rework rtnetlink plumbing so that operations can be run without taking the RTNL semaphore. From Florian Westphal. 15) Support XDP in tap driver, from Jason Wang. 16) Add 32-bit eBPF JIT for ARM, from Shubham Bansal. 17) Add Huawei hinic ethernet driver. 18) Allow to report MD5 keys in TCP inet_diag dumps, from Ivan Delalande. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1780 commits) i40e: point wb_desc at the nvm_wb_desc during i40e_read_nvm_aq i40e: avoid NVM acquire deadlock during NVM update drivers: net: xgene: Remove return statement from void function drivers: net: xgene: Configure tx/rx delay for ACPI drivers: net: xgene: Read tx/rx delay for ACPI rocker: fix kcalloc parameter order rds: Fix non-atomic operation on shared flag variable net: sched: don't use GFP_KERNEL under spin lock vhost_net: correctly check tx avail during rx busy polling net: mdio-mux: add mdio_mux parameter to mdio_mux_init() rxrpc: Make service connection lookup always check for retry net: stmmac: Delete dead code for MDIO registration gianfar: Fix Tx flow control deactivation cxgb4: Ignore MPS_TX_INT_CAUSE[Bubble] for T6 cxgb4: Fix pause frame count in t4_get_port_stats cxgb4: fix memory leak tun: rename generic_xdp to skb_xdp tun: reserve extra headroom only when XDP is set net: dsa: bcm_sf2: Configure IMP port TC2QOS mapping net: dsa: bcm_sf2: Advertise number of egress queues ...
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/Kconfig16
-rw-r--r--net/ipv6/Makefile4
-rw-r--r--net/ipv6/addrconf.c60
-rw-r--r--net/ipv6/addrlabel.c22
-rw-r--r--net/ipv6/af_inet6.c5
-rw-r--r--net/ipv6/esp6.c61
-rw-r--r--net/ipv6/esp6_offload.c5
-rw-r--r--net/ipv6/exthdrs.c4
-rw-r--r--net/ipv6/fib6_notifier.c63
-rw-r--r--net/ipv6/fib6_rules.c69
-rw-r--r--net/ipv6/icmp.c27
-rw-r--r--net/ipv6/ila/ila_xlat.c2
-rw-r--r--net/ipv6/inet6_hashtables.c28
-rw-r--r--net/ipv6/ip6_fib.c134
-rw-r--r--net/ipv6/ip6_output.c77
-rw-r--r--net/ipv6/ip6_vti.c31
-rw-r--r--net/ipv6/ip6mr.c2
-rw-r--r--net/ipv6/ndisc.c3
-rw-r--r--net/ipv6/netfilter/ip6_tables.c14
-rw-r--r--net/ipv6/netfilter/ip6t_SYNPROXY.c2
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c2
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c42
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c25
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c12
-rw-r--r--net/ipv6/netfilter/nf_defrag_ipv6_hooks.c2
-rw-r--r--net/ipv6/netfilter/nf_log_ipv6.c2
-rw-r--r--net/ipv6/netfilter/nf_nat_l3proto_ipv6.c6
-rw-r--r--net/ipv6/netfilter/nf_nat_masquerade_ipv6.c4
-rw-r--r--net/ipv6/netfilter/nft_fib_ipv6.c29
-rw-r--r--net/ipv6/raw.c13
-rw-r--r--net/ipv6/reassembly.c12
-rw-r--r--net/ipv6/route.c138
-rw-r--r--net/ipv6/seg6.c7
-rw-r--r--net/ipv6/seg6_hmac.c7
-rw-r--r--net/ipv6/seg6_iptunnel.c82
-rw-r--r--net/ipv6/seg6_local.c938
-rw-r--r--net/ipv6/sysctl_net_ipv6.c8
-rw-r--r--net/ipv6/tcp_ipv6.c23
-rw-r--r--net/ipv6/udp.c50
-rw-r--r--net/ipv6/udp_offload.c100
-rw-r--r--net/ipv6/xfrm6_input.c4
-rw-r--r--net/ipv6/xfrm6_policy.c20
42 files changed, 1621 insertions, 534 deletions
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 48c452959d2c..ea71e4b0ab7a 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -308,22 +308,12 @@ config IPV6_SEG6_LWTUNNEL
depends on IPV6
select LWTUNNEL
select DST_CACHE
+ select IPV6_MULTIPLE_TABLES
---help---
Support for encapsulation of packets within an outer IPv6
header and a Segment Routing Header using the lightweight
- tunnels mechanism.
-
- If unsure, say N.
-
-config IPV6_SEG6_INLINE
- bool "IPv6: direct Segment Routing Header insertion "
- depends on IPV6_SEG6_LWTUNNEL
- ---help---
- Support for direct insertion of the Segment Routing Header,
- also known as inline mode. Be aware that direct insertion of
- extension headers (as opposed to encapsulation) may break
- multiple mechanisms such as PMTUD or IPSec AH. Use this feature
- only if you know exactly what you are doing.
+ tunnels mechanism. Also enable support for advanced local
+ processing of SRv6 packets based on their active segment.
If unsure, say N.
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 217e9ff0e24b..10e342363793 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -9,7 +9,7 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \
exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \
- udp_offload.o seg6.o
+ udp_offload.o seg6.o fib6_notifier.o
ipv6-offload := ip6_offload.o tcpv6_offload.o exthdrs_offload.o
@@ -23,7 +23,7 @@ ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o
ipv6-$(CONFIG_PROC_FS) += proc.o
ipv6-$(CONFIG_SYN_COOKIES) += syncookies.o
ipv6-$(CONFIG_NETLABEL) += calipso.o
-ipv6-$(CONFIG_IPV6_SEG6_LWTUNNEL) += seg6_iptunnel.o
+ipv6-$(CONFIG_IPV6_SEG6_LWTUNNEL) += seg6_iptunnel.o seg6_local.o
ipv6-$(CONFIG_IPV6_SEG6_HMAC) += seg6_hmac.o
ipv6-objs += $(ipv6-y)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 936e9ab4dda5..c2e2a78787ec 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3030,9 +3030,6 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
static void init_loopback(struct net_device *dev)
{
struct inet6_dev *idev;
- struct net_device *sp_dev;
- struct inet6_ifaddr *sp_ifa;
- struct rt6_info *sp_rt;
/* ::1 */
@@ -3045,45 +3042,6 @@ static void init_loopback(struct net_device *dev)
}
add_addr(idev, &in6addr_loopback, 128, IFA_HOST);
-
- /* Add routes to other interface's IPv6 addresses */
- for_each_netdev(dev_net(dev), sp_dev) {
- if (!strcmp(sp_dev->name, dev->name))
- continue;
-
- idev = __in6_dev_get(sp_dev);
- if (!idev)
- continue;
-
- read_lock_bh(&idev->lock);
- list_for_each_entry(sp_ifa, &idev->addr_list, if_list) {
-
- if (sp_ifa->flags & (IFA_F_DADFAILED | IFA_F_TENTATIVE))
- continue;
-
- if (sp_ifa->rt) {
- /* This dst has been added to garbage list when
- * lo device down, release this obsolete dst and
- * reallocate a new router for ifa.
- */
- if (!atomic_read(&sp_ifa->rt->rt6i_ref)) {
- ip6_rt_put(sp_ifa->rt);
- sp_ifa->rt = NULL;
- } else {
- continue;
- }
- }
-
- sp_rt = addrconf_dst_alloc(idev, &sp_ifa->addr, false);
-
- /* Failure cases are ignored */
- if (!IS_ERR(sp_rt)) {
- sp_ifa->rt = sp_rt;
- ip6_ins_rt(sp_rt);
- }
- }
- read_unlock_bh(&idev->lock);
- }
}
void addrconf_add_linklocal(struct inet6_dev *idev,
@@ -3321,11 +3279,11 @@ static void addrconf_gre_config(struct net_device *dev)
static int fixup_permanent_addr(struct inet6_dev *idev,
struct inet6_ifaddr *ifp)
{
- /* rt6i_ref == 0 means the host route was removed from the
+ /* !rt6i_node means the host route was removed from the
* FIB, for example, if 'lo' device is taken down. In that
* case regenerate the host route.
*/
- if (!ifp->rt || !atomic_read(&ifp->rt->rt6i_ref)) {
+ if (!ifp->rt || !ifp->rt->rt6i_node) {
struct rt6_info *rt, *prev;
rt = addrconf_dst_alloc(idev, &ifp->addr, false);
@@ -6605,21 +6563,21 @@ int __init addrconf_init(void)
rtnl_af_register(&inet6_ops);
err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo,
- NULL);
+ 0);
if (err < 0)
goto errout;
/* Only the first call to __rtnl_register can fail */
- __rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL, NULL);
- __rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL, NULL);
+ __rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL, 0);
+ __rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL, 0);
__rtnl_register(PF_INET6, RTM_GETADDR, inet6_rtm_getaddr,
- inet6_dump_ifaddr, NULL);
+ inet6_dump_ifaddr, 0);
__rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL,
- inet6_dump_ifmcaddr, NULL);
+ inet6_dump_ifmcaddr, 0);
__rtnl_register(PF_INET6, RTM_GETANYCAST, NULL,
- inet6_dump_ifacaddr, NULL);
+ inet6_dump_ifacaddr, 0);
__rtnl_register(PF_INET6, RTM_GETNETCONF, inet6_netconf_get_devconf,
- inet6_netconf_dump_devconf, NULL);
+ inet6_netconf_dump_devconf, 0);
ipv6_addr_label_rtnl_register();
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 7a428f65c7ec..b055bc79f56d 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -405,6 +405,18 @@ static const struct nla_policy ifal_policy[IFAL_MAX+1] = {
[IFAL_LABEL] = { .len = sizeof(u32), },
};
+static bool addrlbl_ifindex_exists(struct net *net, int ifindex)
+{
+
+ struct net_device *dev;
+
+ rcu_read_lock();
+ dev = dev_get_by_index_rcu(net, ifindex);
+ rcu_read_unlock();
+
+ return dev != NULL;
+}
+
static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
@@ -439,7 +451,7 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh,
switch (nlh->nlmsg_type) {
case RTM_NEWADDRLABEL:
if (ifal->ifal_index &&
- !__dev_get_by_index(net, ifal->ifal_index))
+ !addrlbl_ifindex_exists(net, ifal->ifal_index))
return -EINVAL;
err = ip6addrlbl_add(net, pfx, ifal->ifal_prefixlen,
@@ -548,7 +560,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
return -EINVAL;
if (ifal->ifal_index &&
- !__dev_get_by_index(net, ifal->ifal_index))
+ !addrlbl_ifindex_exists(net, ifal->ifal_index))
return -EINVAL;
if (!tb[IFAL_ADDRESS])
@@ -593,10 +605,10 @@ out:
void __init ipv6_addr_label_rtnl_register(void)
{
__rtnl_register(PF_INET6, RTM_NEWADDRLABEL, ip6addrlbl_newdel,
- NULL, NULL);
+ NULL, RTNL_FLAG_DOIT_UNLOCKED);
__rtnl_register(PF_INET6, RTM_DELADDRLABEL, ip6addrlbl_newdel,
- NULL, NULL);
+ NULL, RTNL_FLAG_DOIT_UNLOCKED);
__rtnl_register(PF_INET6, RTM_GETADDRLABEL, ip6addrlbl_get,
- ip6addrlbl_dump, NULL);
+ ip6addrlbl_dump, RTNL_FLAG_DOIT_UNLOCKED);
}
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index a88b5b5b7955..fe5262fd6aa5 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -210,7 +210,8 @@ lookup_protocol:
np->mcast_hops = IPV6_DEFAULT_MCASTHOPS;
np->mc_loop = 1;
np->pmtudisc = IPV6_PMTUDISC_WANT;
- np->autoflowlabel = ip6_default_np_autolabel(sock_net(sk));
+ np->autoflowlabel = ip6_default_np_autolabel(net);
+ np->repflow = net->ipv6.sysctl.flowlabel_reflect;
sk->sk_ipv6only = net->ipv6.sysctl.bindv6only;
/* Init the ipv4 part of the socket since we can have sockets
@@ -554,6 +555,8 @@ const struct proto_ops inet6_stream_ops = {
.recvmsg = inet_recvmsg, /* ok */
.mmap = sock_no_mmap,
.sendpage = inet_sendpage,
+ .sendmsg_locked = tcp_sendmsg_locked,
+ .sendpage_locked = tcp_sendpage_locked,
.splice_read = tcp_splice_read,
.read_sock = tcp_read_sock,
.peek_len = tcp_peek_len,
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index ab64f367d11c..89910e2c10f4 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -463,28 +463,30 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
return esp6_output_tail(x, skb, &esp);
}
-int esp6_input_done2(struct sk_buff *skb, int err)
+static inline int esp_remove_trailer(struct sk_buff *skb)
{
struct xfrm_state *x = xfrm_input_state(skb);
struct xfrm_offload *xo = xfrm_offload(skb);
struct crypto_aead *aead = x->data;
- int alen = crypto_aead_authsize(aead);
- int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
- int elen = skb->len - hlen;
- int hdr_len = skb_network_header_len(skb);
- int padlen;
+ int alen, hlen, elen;
+ int padlen, trimlen;
+ __wsum csumdiff;
u8 nexthdr[2];
+ int ret;
- if (!xo || (xo && !(xo->flags & CRYPTO_DONE)))
- kfree(ESP_SKB_CB(skb)->tmp);
+ alen = crypto_aead_authsize(aead);
+ hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
+ elen = skb->len - hlen;
- if (unlikely(err))
+ if (xo && (xo->flags & XFRM_ESP_NO_TRAILER)) {
+ ret = xo->proto;
goto out;
+ }
if (skb_copy_bits(skb, skb->len - alen - 2, nexthdr, 2))
BUG();
- err = -EINVAL;
+ ret = -EINVAL;
padlen = nexthdr[0];
if (padlen + 2 + alen >= elen) {
net_dbg_ratelimited("ipsec esp packet is garbage padlen=%d, elen=%d\n",
@@ -492,17 +494,46 @@ int esp6_input_done2(struct sk_buff *skb, int err)
goto out;
}
- /* ... check padding bits here. Silly. :-) */
+ trimlen = alen + padlen + 2;
+ if (skb->ip_summed == CHECKSUM_COMPLETE) {
+ csumdiff = skb_checksum(skb, skb->len - trimlen, trimlen, 0);
+ skb->csum = csum_block_sub(skb->csum, csumdiff,
+ skb->len - trimlen);
+ }
+ pskb_trim(skb, skb->len - trimlen);
+
+ ret = nexthdr[1];
+
+out:
+ return ret;
+}
- pskb_trim(skb, skb->len - alen - padlen - 2);
- __skb_pull(skb, hlen);
+int esp6_input_done2(struct sk_buff *skb, int err)
+{
+ struct xfrm_state *x = xfrm_input_state(skb);
+ struct xfrm_offload *xo = xfrm_offload(skb);
+ struct crypto_aead *aead = x->data;
+ int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
+ int hdr_len = skb_network_header_len(skb);
+
+ if (!xo || (xo && !(xo->flags & CRYPTO_DONE)))
+ kfree(ESP_SKB_CB(skb)->tmp);
+
+ if (unlikely(err))
+ goto out;
+
+ err = esp_remove_trailer(skb);
+ if (unlikely(err < 0))
+ goto out;
+
+ skb_postpull_rcsum(skb, skb_network_header(skb),
+ skb_network_header_len(skb));
+ skb_pull_rcsum(skb, hlen);
if (x->props.mode == XFRM_MODE_TUNNEL)
skb_reset_transport_header(skb);
else
skb_set_transport_header(skb, -hdr_len);
- err = nexthdr[1];
-
/* RFC4303: Drop dummy packets without any error */
if (err == IPPROTO_NONE)
err = -EINVAL;
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 1cf437f75b0b..333a478aa161 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -209,11 +209,13 @@ out:
static int esp6_input_tail(struct xfrm_state *x, struct sk_buff *skb)
{
struct crypto_aead *aead = x->data;
+ struct xfrm_offload *xo = xfrm_offload(skb);
if (!pskb_may_pull(skb, sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead)))
return -EINVAL;
- skb->ip_summed = CHECKSUM_NONE;
+ if (!(xo->flags & CRYPTO_DONE))
+ skb->ip_summed = CHECKSUM_NONE;
return esp6_input_done2(skb, 0);
}
@@ -332,3 +334,4 @@ module_init(esp6_offload_init);
module_exit(esp6_offload_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Steffen Klassert <steffen.klassert@secunet.com>");
+MODULE_ALIAS_XFRM_OFFLOAD_TYPE(AF_INET6, XFRM_PROTO_ESP);
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 3cec529c6113..95516138e861 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -882,7 +882,7 @@ static void ipv6_push_rthdr4(struct sk_buff *skb, u8 *proto,
(hops - 1) * sizeof(struct in6_addr));
sr_phdr->segments[0] = **addr_p;
- *addr_p = &sr_ihdr->segments[hops - 1];
+ *addr_p = &sr_ihdr->segments[sr_ihdr->segments_left];
#ifdef CONFIG_IPV6_SEG6_HMAC
if (sr_has_hmac(sr_phdr)) {
@@ -1174,7 +1174,7 @@ struct in6_addr *fl6_update_dst(struct flowi6 *fl6,
{
struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *)opt->srcrt;
- fl6->daddr = srh->segments[srh->first_segment];
+ fl6->daddr = srh->segments[srh->segments_left];
break;
}
default:
diff --git a/net/ipv6/fib6_notifier.c b/net/ipv6/fib6_notifier.c
new file mode 100644
index 000000000000..05f82baaa99e
--- /dev/null
+++ b/net/ipv6/fib6_notifier.c
@@ -0,0 +1,63 @@
+#include <linux/notifier.h>
+#include <linux/socket.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <net/net_namespace.h>
+#include <net/fib_notifier.h>
+#include <net/netns/ipv6.h>
+#include <net/ip6_fib.h>
+
+int call_fib6_notifier(struct notifier_block *nb, struct net *net,
+ enum fib_event_type event_type,
+ struct fib_notifier_info *info)
+{
+ info->family = AF_INET6;
+ return call_fib_notifier(nb, net, event_type, info);
+}
+
+int call_fib6_notifiers(struct net *net, enum fib_event_type event_type,
+ struct fib_notifier_info *info)
+{
+ info->family = AF_INET6;
+ return call_fib_notifiers(net, event_type, info);
+}
+
+static unsigned int fib6_seq_read(struct net *net)
+{
+ return fib6_tables_seq_read(net) + fib6_rules_seq_read(net);
+}
+
+static int fib6_dump(struct net *net, struct notifier_block *nb)
+{
+ int err;
+
+ err = fib6_rules_dump(net, nb);
+ if (err)
+ return err;
+
+ return fib6_tables_dump(net, nb);
+}
+
+static const struct fib_notifier_ops fib6_notifier_ops_template = {
+ .family = AF_INET6,
+ .fib_seq_read = fib6_seq_read,
+ .fib_dump = fib6_dump,
+ .owner = THIS_MODULE,
+};
+
+int __net_init fib6_notifier_init(struct net *net)
+{
+ struct fib_notifier_ops *ops;
+
+ ops = fib_notifier_ops_register(&fib6_notifier_ops_template, net);
+ if (IS_ERR(ops))
+ return PTR_ERR(ops);
+ net->ipv6.notifier_ops = ops;
+
+ return 0;
+}
+
+void __net_exit fib6_notifier_exit(struct net *net)
+{
+ fib_notifier_ops_unregister(net->ipv6.notifier_ops);
+}
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index ec849d88a662..b240f24a6e52 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -14,6 +14,7 @@
*/
#include <linux/netdevice.h>
+#include <linux/notifier.h>
#include <linux/export.h>
#include <net/fib_rules.h>
@@ -29,22 +30,65 @@ struct fib6_rule {
u8 tclass;
};
-struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
- int flags, pol_lookup_t lookup)
+static bool fib6_rule_matchall(const struct fib_rule *rule)
+{
+ struct fib6_rule *r = container_of(rule, struct fib6_rule, common);
+
+ if (r->dst.plen || r->src.plen || r->tclass)
+ return false;
+ return fib_rule_matchall(rule);
+}
+
+bool fib6_rule_default(const struct fib_rule *rule)
{
- struct fib_lookup_arg arg = {
- .lookup_ptr = lookup,
- .flags = FIB_LOOKUP_NOREF,
- };
+ if (!fib6_rule_matchall(rule) || rule->action != FR_ACT_TO_TBL ||
+ rule->l3mdev)
+ return false;
+ if (rule->table != RT6_TABLE_LOCAL && rule->table != RT6_TABLE_MAIN)
+ return false;
+ return true;
+}
+EXPORT_SYMBOL_GPL(fib6_rule_default);
- /* update flow if oif or iif point to device enslaved to l3mdev */
- l3mdev_update_flow(net, flowi6_to_flowi(fl6));
+int fib6_rules_dump(struct net *net, struct notifier_block *nb)
+{
+ return fib_rules_dump(net, nb, AF_INET6);
+}
- fib_rules_lookup(net->ipv6.fib6_rules_ops,
- flowi6_to_flowi(fl6), flags, &arg);
+unsigned int fib6_rules_seq_read(struct net *net)
+{
+ return fib_rules_seq_read(net, AF_INET6);
+}
- if (arg.result)
- return arg.result;
+struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
+ int flags, pol_lookup_t lookup)
+{
+ if (net->ipv6.fib6_has_custom_rules) {
+ struct fib_lookup_arg arg = {
+ .lookup_ptr = lookup,
+ .flags = FIB_LOOKUP_NOREF,
+ };
+
+ /* update flow if oif or iif point to device enslaved to l3mdev */
+ l3mdev_update_flow(net, flowi6_to_flowi(fl6));
+
+ fib_rules_lookup(net->ipv6.fib6_rules_ops,
+ flowi6_to_flowi(fl6), flags, &arg);
+
+ if (arg.result)
+ return arg.result;
+ } else {
+ struct rt6_info *rt;
+
+ rt = lookup(net, net->ipv6.fib6_local_tbl, fl6, flags);
+ if (rt != net->ipv6.ip6_null_entry && rt->dst.error != -EAGAIN)
+ return &rt->dst;
+ ip6_rt_put(rt);
+ rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
+ if (rt->dst.error != -EAGAIN)
+ return &rt->dst;
+ ip6_rt_put(rt);
+ }
dst_hold(&net->ipv6.ip6_null_entry->dst);
return &net->ipv6.ip6_null_entry->dst;
@@ -214,6 +258,7 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
rule6->dst.plen = frh->dst_len;
rule6->tclass = frh->tos;
+ net->ipv6.fib6_has_custom_rules = true;
err = 0;
errout:
return err;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 8d7b113958b1..5acb54405b10 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -399,6 +399,24 @@ relookup_failed:
return ERR_PTR(err);
}
+static int icmp6_iif(const struct sk_buff *skb)
+{
+ int iif = skb->dev->ifindex;
+
+ /* for local traffic to local address, skb dev is the loopback
+ * device. Check if there is a dst attached to the skb and if so
+ * get the real device index.
+ */
+ if (unlikely(iif == LOOPBACK_IFINDEX)) {
+ const struct rt6_info *rt6 = skb_rt6_info(skb);
+
+ if (rt6)
+ iif = rt6->rt6i_idev->dev->ifindex;
+ }
+
+ return iif;
+}
+
/*
* Send an ICMP message in response to a packet in error
*/
@@ -459,9 +477,9 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
* Source addr check
*/
- if (__ipv6_addr_needs_scope_id(addr_type))
- iif = skb->dev->ifindex;
- else {
+ if (__ipv6_addr_needs_scope_id(addr_type)) {
+ iif = icmp6_iif(skb);
+ } else {
dst = skb_dst(skb);
iif = l3mdev_master_ifindex(dst ? dst->dev : skb->dev);
}
@@ -508,6 +526,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
fl6.fl6_icmp_type = type;
fl6.fl6_icmp_code = code;
fl6.flowi6_uid = sock_net_uid(net, NULL);
+ fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
sk = icmpv6_xmit_lock(net);
@@ -682,7 +701,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
fl6.daddr = ipv6_hdr(skb)->saddr;
if (saddr)
fl6.saddr = *saddr;
- fl6.flowi6_oif = skb->dev->ifindex;
+ fl6.flowi6_oif = icmp6_iif(skb);
fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
fl6.flowi6_mark = mark;
fl6.flowi6_uid = sock_net_uid(net, NULL);
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 77f7f8c7d93d..5bd419c1abc8 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -208,7 +208,7 @@ ila_nf_input(void *priv,
return NF_ACCEPT;
}
-static struct nf_hook_ops ila_nf_hook_ops[] __read_mostly = {
+static const struct nf_hook_ops ila_nf_hook_ops[] = {
{
.hook = ila_nf_input,
.pf = NFPROTO_IPV6,
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index b13b8f93079d..b01858f5deb1 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -56,7 +56,7 @@ struct sock *__inet6_lookup_established(struct net *net,
const __be16 sport,
const struct in6_addr *daddr,
const u16 hnum,
- const int dif)
+ const int dif, const int sdif)
{
struct sock *sk;
const struct hlist_nulls_node *node;
@@ -73,12 +73,12 @@ begin:
sk_nulls_for_each_rcu(sk, node, &head->chain) {
if (sk->sk_hash != hash)
continue;
- if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif))
+ if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif, sdif))
continue;
if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
goto out;
- if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports, dif))) {
+ if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports, dif, sdif))) {
sock_gen_put(sk);
goto begin;
}
@@ -96,7 +96,7 @@ EXPORT_SYMBOL(__inet6_lookup_established);
static inline int compute_score(struct sock *sk, struct net *net,
const unsigned short hnum,
const struct in6_addr *daddr,
- const int dif, bool exact_dif)
+ const int dif, const int sdif, bool exact_dif)
{
int score = -1;
@@ -110,9 +110,13 @@ static inline int compute_score(struct sock *sk, struct net *net,
score++;
}
if (sk->sk_bound_dev_if || exact_dif) {
- if (sk->sk_bound_dev_if != dif)
+ bool dev_match = (sk->sk_bound_dev_if == dif ||
+ sk->sk_bound_dev_if == sdif);
+
+ if (exact_dif && !dev_match)
return -1;
- score++;
+ if (sk->sk_bound_dev_if && dev_match)
+ score++;
}
if (sk->sk_incoming_cpu == raw_smp_processor_id())
score++;
@@ -126,7 +130,7 @@ struct sock *inet6_lookup_listener(struct net *net,
struct sk_buff *skb, int doff,
const struct in6_addr *saddr,
const __be16 sport, const struct in6_addr *daddr,
- const unsigned short hnum, const int dif)
+ const unsigned short hnum, const int dif, const int sdif)
{
unsigned int hash = inet_lhashfn(net, hnum);
struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
@@ -136,7 +140,7 @@ struct sock *inet6_lookup_listener(struct net *net,
u32 phash = 0;
sk_for_each(sk, &ilb->head) {
- score = compute_score(sk, net, hnum, daddr, dif, exact_dif);
+ score = compute_score(sk, net, hnum, daddr, dif, sdif, exact_dif);
if (score > hiscore) {
reuseport = sk->sk_reuseport;
if (reuseport) {
@@ -171,7 +175,7 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
bool refcounted;
sk = __inet6_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
- ntohs(dport), dif, &refcounted);
+ ntohs(dport), dif, 0, &refcounted);
if (sk && !refcounted && !refcount_inc_not_zero(&sk->sk_refcnt))
sk = NULL;
return sk;
@@ -187,8 +191,9 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
const struct in6_addr *daddr = &sk->sk_v6_rcv_saddr;
const struct in6_addr *saddr = &sk->sk_v6_daddr;
const int dif = sk->sk_bound_dev_if;
- const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
struct net *net = sock_net(sk);
+ const int sdif = l3mdev_master