summaryrefslogtreecommitdiffstats
path: root/net/ipv6
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/Kconfig16
-rw-r--r--net/ipv6/Makefile1
-rw-r--r--net/ipv6/addrconf.c73
-rw-r--r--net/ipv6/addrlabel.c24
-rw-r--r--net/ipv6/ip6_flowlabel.c47
-rw-r--r--net/ipv6/ip6_gre.c1792
-rw-r--r--net/ipv6/ip6_output.c20
-rw-r--r--net/ipv6/ip6_tunnel.c91
-rw-r--r--net/ipv6/ip6mr.c10
-rw-r--r--net/ipv6/netfilter.c8
-rw-r--r--net/ipv6/netfilter/Kconfig54
-rw-r--r--net/ipv6/netfilter/Makefile8
-rw-r--r--net/ipv6/netfilter/ip6t_MASQUERADE.c135
-rw-r--r--net/ipv6/netfilter/ip6t_NETMAP.c94
-rw-r--r--net/ipv6/netfilter/ip6t_NPT.c165
-rw-r--r--net/ipv6/netfilter/ip6t_REDIRECT.c98
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c4
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c4
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c321
-rw-r--r--net/ipv6/netfilter/ip6table_raw.c4
-rw-r--r--net/ipv6/netfilter/ip6table_security.c5
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c137
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c19
-rw-r--r--net/ipv6/netfilter/nf_nat_l3proto_ipv6.c288
-rw-r--r--net/ipv6/netfilter/nf_nat_proto_icmpv6.c90
-rw-r--r--net/ipv6/raw.c3
-rw-r--r--net/ipv6/route.c90
-rw-r--r--net/ipv6/syncookies.c1
-rw-r--r--net/ipv6/tcp_ipv6.c20
-rw-r--r--net/ipv6/udp.c3
30 files changed, 3397 insertions, 228 deletions
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 5728695b5449..4f7fe7270e37 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -201,6 +201,22 @@ config IPV6_TUNNEL
If unsure, say N.
+config IPV6_GRE
+ tristate "IPv6: GRE tunnel"
+ select IPV6_TUNNEL
+ ---help---
+ Tunneling means encapsulating data of one protocol type within
+ another protocol and sending it over a channel that understands the
+ encapsulating protocol. This particular tunneling driver implements
+ GRE (Generic Routing Encapsulation) and at this time allows
+ encapsulating of IPv4 or IPv6 over existing IPv6 infrastructure.
+ This driver is useful if the other endpoint is a Cisco router: Cisco
+ likes GRE much better than the other Linux tunneling driver ("IP
+ tunneling" above). In addition, GRE allows multicast redistribution
+ through the tunnel.
+
+ Saying M here will produce a module called ip6_gre. If unsure, say N.
+
config IPV6_MULTIPLE_TABLES
bool "IPv6: Multiple Routing Tables"
depends on EXPERIMENTAL
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 686934acfac1..b6d3f79151e2 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -36,6 +36,7 @@ obj-$(CONFIG_NETFILTER) += netfilter/
obj-$(CONFIG_IPV6_SIT) += sit.o
obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o
+obj-$(CONFIG_IPV6_GRE) += ip6_gre.o
obj-y += addrconf_core.o exthdrs_core.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 6bc85f7c31e3..719a828fb67f 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -127,8 +127,8 @@ static inline void addrconf_sysctl_unregister(struct inet6_dev *idev)
#endif
#ifdef CONFIG_IPV6_PRIVACY
-static int __ipv6_regen_rndid(struct inet6_dev *idev);
-static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr);
+static void __ipv6_regen_rndid(struct inet6_dev *idev);
+static void __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr);
static void ipv6_regen_rndid(unsigned long data);
#endif
@@ -852,16 +852,7 @@ retry:
}
in6_ifa_hold(ifp);
memcpy(addr.s6_addr, ifp->addr.s6_addr, 8);
- if (__ipv6_try_regen_rndid(idev, tmpaddr) < 0) {
- spin_unlock_bh(&ifp->lock);
- write_unlock(&idev->lock);
- pr_warn("%s: regeneration of randomized interface id failed\n",
- __func__);
- in6_ifa_put(ifp);
- in6_dev_put(idev);
- ret = -1;
- goto out;
- }
+ __ipv6_try_regen_rndid(idev, tmpaddr);
memcpy(&addr.s6_addr[8], idev->rndid, 8);
age = (now - ifp->tstamp) / HZ;
tmp_valid_lft = min_t(__u32,
@@ -1079,8 +1070,10 @@ static int ipv6_get_saddr_eval(struct net *net,
break;
case IPV6_SADDR_RULE_PREFIX:
/* Rule 8: Use longest matching prefix */
- score->matchlen = ret = ipv6_addr_diff(&score->ifa->addr,
- dst->addr);
+ ret = ipv6_addr_diff(&score->ifa->addr, dst->addr);
+ if (ret > score->ifa->prefix_len)
+ ret = score->ifa->prefix_len;
+ score->matchlen = ret;
break;
default:
ret = 0;
@@ -1093,7 +1086,7 @@ out:
return ret;
}
-int ipv6_dev_get_saddr(struct net *net, struct net_device *dst_dev,
+int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
const struct in6_addr *daddr, unsigned int prefs,
struct in6_addr *saddr)
{
@@ -1600,7 +1593,7 @@ static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev)
#ifdef CONFIG_IPV6_PRIVACY
/* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */
-static int __ipv6_regen_rndid(struct inet6_dev *idev)
+static void __ipv6_regen_rndid(struct inet6_dev *idev)
{
regen:
get_random_bytes(idev->rndid, sizeof(idev->rndid));
@@ -1627,8 +1620,6 @@ regen:
if ((idev->rndid[2]|idev->rndid[3]|idev->rndid[4]|idev->rndid[5]|idev->rndid[6]|idev->rndid[7]) == 0x00)
goto regen;
}
-
- return 0;
}
static void ipv6_regen_rndid(unsigned long data)
@@ -1642,8 +1633,7 @@ static void ipv6_regen_rndid(unsigned long data)
if (idev->dead)
goto out;
- if (__ipv6_regen_rndid(idev) < 0)
- goto out;
+ __ipv6_regen_rndid(idev);
expires = jiffies +
idev->cnf.temp_prefered_lft * HZ -
@@ -1664,13 +1654,10 @@ out:
in6_dev_put(idev);
}
-static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr)
+static void __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr)
{
- int ret = 0;
-
if (tmpaddr && memcmp(idev->rndid, &tmpaddr->s6_addr[8], 8) == 0)
- ret = __ipv6_regen_rndid(idev);
- return ret;
+ __ipv6_regen_rndid(idev);
}
#endif
@@ -1721,7 +1708,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
if (table == NULL)
return NULL;
- write_lock_bh(&table->tb6_lock);
+ read_lock_bh(&table->tb6_lock);
fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0);
if (!fn)
goto out;
@@ -1736,7 +1723,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
break;
}
out:
- write_unlock_bh(&table->tb6_lock);
+ read_unlock_bh(&table->tb6_lock);
return rt;
}
@@ -3549,12 +3536,12 @@ static inline int inet6_ifaddr_msgsize(void)
}
static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
- u32 pid, u32 seq, int event, unsigned int flags)
+ u32 portid, u32 seq, int event, unsigned int flags)
{
struct nlmsghdr *nlh;
u32 preferred, valid;
- nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags);
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags);
if (nlh == NULL)
return -EMSGSIZE;
@@ -3592,7 +3579,7 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
}
static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
- u32 pid, u32 seq, int event, u16 flags)
+ u32 portid, u32 seq, int event, u16 flags)
{
struct nlmsghdr *nlh;
u8 scope = RT_SCOPE_UNIVERSE;
@@ -3601,7 +3588,7 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE)
scope = RT_SCOPE_SITE;
- nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags);
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags);
if (nlh == NULL)
return -EMSGSIZE;
@@ -3617,7 +3604,7 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
}
static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
- u32 pid, u32 seq, int event, unsigned int flags)
+ u32 portid, u32 seq, int event, unsigned int flags)
{
struct nlmsghdr *nlh;
u8 scope = RT_SCOPE_UNIVERSE;
@@ -3626,7 +3613,7 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE)
scope = RT_SCOPE_SITE;
- nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags);
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags);
if (nlh == NULL)
return -EMSGSIZE;
@@ -3667,7 +3654,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
if (++ip_idx < s_ip_idx)
continue;
err = inet6_fill_ifaddr(skb, ifa,
- NETLINK_CB(cb->skb).pid,
+ NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
RTM_NEWADDR,
NLM_F_MULTI);
@@ -3683,7 +3670,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
if (ip_idx < s_ip_idx)
continue;
err = inet6_fill_ifmcaddr(skb, ifmca,
- NETLINK_CB(cb->skb).pid,
+ NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
RTM_GETMULTICAST,
NLM_F_MULTI);
@@ -3698,7 +3685,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
if (ip_idx < s_ip_idx)
continue;
err = inet6_fill_ifacaddr(skb, ifaca,
- NETLINK_CB(cb->skb).pid,
+ NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
RTM_GETANYCAST,
NLM_F_MULTI);
@@ -3820,7 +3807,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
goto errout_ifa;
}
- err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).pid,
+ err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).portid,
nlh->nlmsg_seq, RTM_NEWADDR, 0);
if (err < 0) {
/* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */
@@ -3828,7 +3815,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
kfree_skb(skb);
goto errout_ifa;
}
- err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
+ err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
errout_ifa:
in6_ifa_put(ifa);
errout:
@@ -4030,14 +4017,14 @@ static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
}
static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
- u32 pid, u32 seq, int event, unsigned int flags)
+ u32 portid, u32 seq, int event, unsigned int flags)
{
struct net_device *dev = idev->dev;
struct ifinfomsg *hdr;
struct nlmsghdr *nlh;
void *protoinfo;
- nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags);
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags);
if (nlh == NULL)
return -EMSGSIZE;
@@ -4095,7 +4082,7 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
if (!idev)
goto cont;
if (inet6_fill_ifinfo(skb, idev,
- NETLINK_CB(cb->skb).pid,
+ NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
RTM_NEWLINK, NLM_F_MULTI) <= 0)
goto out;
@@ -4143,14 +4130,14 @@ static inline size_t inet6_prefix_nlmsg_size(void)
}
static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev,
- struct prefix_info *pinfo, u32 pid, u32 seq,
+ struct prefix_info *pinfo, u32 portid, u32 seq,
int event, unsigned int flags)
{
struct prefixmsg *pmsg;
struct nlmsghdr *nlh;
struct prefix_cacheinfo ci;
- nlh = nlmsg_put(skb, pid, seq, event, sizeof(*pmsg), flags);
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(*pmsg), flags);
if (nlh == NULL)
return -EMSGSIZE;
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index eb6a63632d3c..4be23da32b89 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -57,7 +57,7 @@ struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl)
}
/*
- * Default policy table (RFC3484 + extensions)
+ * Default policy table (RFC6724 + extensions)
*
* prefix addr_type label
* -------------------------------------------------------------------------
@@ -69,8 +69,12 @@ struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl)
* fc00::/7 N/A 5 ULA (RFC 4193)
* 2001::/32 N/A 6 Teredo (RFC 4380)
* 2001:10::/28 N/A 7 ORCHID (RFC 4843)
+ * fec0::/10 N/A 11 Site-local
+ * (deprecated by RFC3879)
+ * 3ffe::/16 N/A 12 6bone
*
* Note: 0xffffffff is used if we do not have any policies.
+ * Note: Labels for ULA and 6to4 are different from labels listed in RFC6724.
*/
#define IPV6_ADDR_LABEL_DEFAULT 0xffffffffUL
@@ -88,10 +92,18 @@ static const __net_initdata struct ip6addrlbl_init_table
.prefix = &(struct in6_addr){{{ 0xfc }}},
.prefixlen = 7,
.label = 5,
+ },{ /* fec0::/10 */
+ .prefix = &(struct in6_addr){{{ 0xfe, 0xc0 }}},
+ .prefixlen = 10,
+ .label = 11,
},{ /* 2002::/16 */
.prefix = &(struct in6_addr){{{ 0x20, 0x02 }}},
.prefixlen = 16,
.label = 2,
+ },{ /* 3ffe::/16 */
+ .prefix = &(struct in6_addr){{{ 0x3f, 0xfe }}},
+ .prefixlen = 16,
+ .label = 12,
},{ /* 2001::/32 */
.prefix = &(struct in6_addr){{{ 0x20, 0x01 }}},
.prefixlen = 32,
@@ -470,10 +482,10 @@ static void ip6addrlbl_putmsg(struct nlmsghdr *nlh,
static int ip6addrlbl_fill(struct sk_buff *skb,
struct ip6addrlbl_entry *p,
u32 lseq,
- u32 pid, u32 seq, int event,
+ u32 portid, u32 seq, int event,
unsigned int flags)
{
- struct nlmsghdr *nlh = nlmsg_put(skb, pid, seq, event,
+ struct nlmsghdr *nlh = nlmsg_put(skb, portid, seq, event,
sizeof(struct ifaddrlblmsg), flags);
if (!nlh)
return -EMSGSIZE;
@@ -503,7 +515,7 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
net_eq(ip6addrlbl_net(p), net)) {
if ((err = ip6addrlbl_fill(skb, p,
ip6addrlbl_table.seq,
- NETLINK_CB(cb->skb).pid,
+ NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
RTM_NEWADDRLABEL,
NLM_F_MULTI)) <= 0)
@@ -574,7 +586,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,
}
err = ip6addrlbl_fill(skb, p, lseq,
- NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
+ NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
RTM_NEWADDRLABEL, 0);
ip6addrlbl_put(p);
@@ -585,7 +597,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,
goto out;
}
- err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
+ err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
out:
return err;
}
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 9772fbd8a3f5..90bbefb57943 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -22,6 +22,7 @@
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/export.h>
+#include <linux/pid_namespace.h>
#include <net/net_namespace.h>
#include <net/sock.h>
@@ -91,6 +92,8 @@ static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label)
static void fl_free(struct ip6_flowlabel *fl)
{
if (fl) {
+ if (fl->share == IPV6_FL_S_PROCESS)
+ put_pid(fl->owner.pid);
release_net(fl->fl_net);
kfree(fl->opt);
}
@@ -394,10 +397,10 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
case IPV6_FL_S_ANY:
break;
case IPV6_FL_S_PROCESS:
- fl->owner = current->pid;
+ fl->owner.pid = get_task_pid(current, PIDTYPE_PID);
break;
case IPV6_FL_S_USER:
- fl->owner = current_euid();
+ fl->owner.uid = current_euid();
break;
default:
err = -EINVAL;
@@ -561,7 +564,10 @@ recheck:
err = -EPERM;
if (fl1->share == IPV6_FL_S_EXCL ||
fl1->share != fl->share ||
- fl1->owner != fl->owner)
+ ((fl1->share == IPV6_FL_S_PROCESS) &&
+ (fl1->owner.pid == fl->owner.pid)) ||
+ ((fl1->share == IPV6_FL_S_USER) &&
+ uid_eq(fl1->owner.uid, fl->owner.uid)))
goto release;
err = -EINVAL;
@@ -621,6 +627,7 @@ done:
struct ip6fl_iter_state {
struct seq_net_private p;
+ struct pid_namespace *pid_ns;
int bucket;
};
@@ -699,6 +706,7 @@ static void ip6fl_seq_stop(struct seq_file *seq, void *v)
static int ip6fl_seq_show(struct seq_file *seq, void *v)
{
+ struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
if (v == SEQ_START_TOKEN)
seq_printf(seq, "%-5s %-1s %-6s %-6s %-6s %-8s %-32s %s\n",
"Label", "S", "Owner", "Users", "Linger", "Expires", "Dst", "Opt");
@@ -708,7 +716,11 @@ static int ip6fl_seq_show(struct seq_file *seq, void *v)
"%05X %-1d %-6d %-6d %-6ld %-8ld %pi6 %-4d\n",
(unsigned int)ntohl(fl->label),
fl->share,
- (int)fl->owner,
+ ((fl->share == IPV6_FL_S_PROCESS) ?
+ pid_nr_ns(fl->owner.pid, state->pid_ns) :
+ ((fl->share == IPV6_FL_S_USER) ?
+ from_kuid_munged(seq_user_ns(seq), fl->owner.uid) :
+ 0)),
atomic_read(&fl->users),
fl->linger/HZ,
(long)(fl->expires - jiffies)/HZ,
@@ -727,8 +739,29 @@ static const struct seq_operations ip6fl_seq_ops = {
static int ip6fl_seq_open(struct inode *inode, struct file *file)
{
- return seq_open_net(inode, file, &ip6fl_seq_ops,
- sizeof(struct ip6fl_iter_state));
+ struct seq_file *seq;
+ struct ip6fl_iter_state *state;
+ int err;
+
+ err = seq_open_net(inode, file, &ip6fl_seq_ops,
+ sizeof(struct ip6fl_iter_state));
+
+ if (!err) {
+ seq = file->private_data;
+ state = ip6fl_seq_private(seq);
+ rcu_read_lock();
+ state->pid_ns = get_pid_ns(task_active_pid_ns(current));
+ rcu_read_unlock();
+ }
+ return err;
+}
+
+static int ip6fl_seq_release(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq = file->private_data;
+ struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
+ put_pid_ns(state->pid_ns);
+ return seq_release_net(inode, file);
}
static const struct file_operations ip6fl_seq_fops = {
@@ -736,7 +769,7 @@ static const struct file_operations ip6fl_seq_fops = {
.open = ip6fl_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release_net,
+ .release = ip6fl_seq_release,
};
static int __net_init ip6_flowlabel_proc_init(struct net *net)
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
new file mode 100644
index 000000000000..424d11a4e7ff
--- /dev/null
+++ b/net/ipv6/ip6_gre.c
@@ -0,0 +1,1792 @@
+/*
+ * GRE over IPv6 protocol decoder.
+ *
+ * Authors: Dmitry Kozlov (xeb@mail.ru)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/capability.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/in.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/if_arp.h>
+#include <linux/mroute.h>
+#include <linux/init.h>
+#include <linux/in6.h>
+#include <linux/inetdevice.h>
+#include <linux/igmp.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/etherdevice.h>
+#include <linux/if_ether.h>
+#include <linux/hash.h>
+#include <linux/if_tunnel.h>
+#include <linux/ip6_tunnel.h>
+
+#include <net/sock.h>
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <net/protocol.h>
+#include <net/addrconf.h>
+#include <net/arp.h>
+#include <net/checksum.h>
+#include <net/dsfield.h>
+#include <net/inet_ecn.h>
+#include <net/xfrm.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <net/rtnetlink.h>
+
+#include <net/ipv6.h>
+#include <net/ip6_fib.h>
+#include <net/ip6_route.h>
+#include <net/ip6_tunnel.h>
+
+
+#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK)
+#define IPV6_TCLASS_SHIFT 20
+
+#define HASH_SIZE_SHIFT 5
+#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
+
+static int ip6gre_net_id __read_mostly;
+struct ip6gre_net {
+ struct ip6_tnl __rcu *tunnels[4][HASH_SIZE];
+
+ struct net_device *fb_tunnel_dev;
+};
+
+static struct rtnl_link_ops ip6gre_link_ops __read_mostly;
+static int ip6gre_tunnel_init(struct net_device *dev);
+static void ip6gre_tunnel_setup(struct net_device *dev);
+static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t);
+static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu);
+
+/* Tunnel hash table */
+
+/*
+ 4 hash tables:
+
+ 3: (remote,local)
+ 2: (remote,*)
+ 1: (*,local)
+ 0: (*,*)
+
+ We require exact key match i.e. if a key is present in packet
+ it will match only tunnel with the same key; if it is not present,
+ it will match only keyless tunnel.
+
+ All keysless packets, if not matched configured keyless tunnels
+ will match fallback tunnel.
+ */
+
+#define HASH_KEY(key) (((__force u32)key^((__force u32)key>>4))&(HASH_SIZE - 1))
+static u32 HASH_ADDR(const struct in6_addr *addr)
+{
+ u32 hash = ipv6_addr_hash(addr);
+
+ return hash_32(hash, HASH_SIZE_SHIFT);
+}
+
+#define tunnels_r_l tunnels[3]
+#define tunnels_r tunnels[2]
+#define tunnels_l tunnels[1]
+#define tunnels_wc tunnels[0]
+/*
+ * Locking : hash tables are protected by RCU and RTNL
+ */
+
+#define for_each_ip_tunnel_rcu(start) \
+ for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
+
+/* often modified stats are per cpu, other are shared (netdev->stats) */
+struct pcpu_tstats {
+ u64 rx_packets;
+ u64 rx_bytes;
+ u64 tx_packets;
+ u64 tx_bytes;
+ struct u64_stats_sync syncp;
+};
+
+static struct rtnl_link_stats64 *ip6gre_get_stats64(struct net_device *dev,
+ struct rtnl_link_stats64 *tot)
+{
+ int i;
+
+ for_each_possible_cpu(i) {
+ const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
+ u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
+ unsigned int start;
+
+ do {
+ start = u64_stats_fetch_begin_bh(&tstats->syncp);
+ rx_packets = tstats->rx_packets;
+ tx_packets = tstats->tx_packets;
+ rx_bytes = tstats->rx_bytes;
+ tx_bytes = tstats->tx_bytes;
+ } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
+
+ tot->rx_packets += rx_packets;
+ tot->tx_packets += tx_packets;
+ tot->rx_bytes += rx_bytes;
+ tot->tx_bytes += tx_bytes;
+ }
+
+ tot->multicast = dev->stats.multicast;
+ tot->rx_crc_errors = dev->stats.rx_crc_errors;
+ tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
+ tot->rx_length_errors = dev->stats.rx_length_errors;
+ tot->rx_errors = dev->stats.rx_errors;
+ tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
+ tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
+ tot->tx_dropped = dev->stats.tx_dropped;
+ tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
+ tot->tx_errors = dev->stats.tx_errors;
+
+ return tot;
+}
+
+/* Given src, dst and key, find appropriate for input tunnel. */
+
+static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
+ const struct in6_addr *remote, const struct in6_addr *local,
+ __be32 key, __be16 gre_proto)
+{
+ struct net *net = dev_net(dev);
+ int link = dev->ifindex;
+ unsigned int h0 = HASH_ADDR(remote);
+ unsigned int h1 = HASH_KEY(key);
+ struct ip6_tnl *t, *cand = NULL;
+ struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+ int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
+ ARPHRD_ETHER : ARPHRD_IP6GRE;
+ int score, cand_score = 4;
+
+ for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
+ if (!ipv6_addr_equal(local, &t->parms.laddr) ||
+ !ipv6_addr_equal(remote, &t->parms.raddr) ||
+ key != t->parms.i_key ||
+ !(t->dev->flags & IFF_UP))
+ continue;
+
+ if (t->dev->type != ARPHRD_IP6GRE &&
+ t->dev->type != dev_type)
+ continue;
+
+ score = 0;
+ if (t->parms.link != link)
+ score |= 1;
+ if (t->dev->type != dev_type)
+ score |= 2;
+ if (score == 0)
+ return t;
+
+ if (score < cand_score) {
+ cand = t;
+ cand_score = score;
+ }
+ }
+
+ for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
+ if (!ipv6_addr_equal(remote, &t->parms.raddr) ||
+ key != t->parms.i_key ||
+ !(t->dev->flags & IFF_UP))
+ continue;
+
+ if (t->dev->type != ARPHRD_IP6GRE &&
+ t->dev->type != dev_type)
+ continue;
+
+ score = 0;
+ if (t->parms.link != link)
+ score |= 1;
+ if (t->dev->type != dev_type)
+ score |= 2;
+ if (score == 0)
+ return t;
+
+ if (score < cand_score) {
+ cand = t;
+ cand_score = score;
+ }
+ }
+
+ for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
+ if ((!ipv6_addr_equal(local, &t->parms.laddr) &&
+ (!ipv6_addr_equal(local, &t->parms.raddr) ||
+ !ipv6_addr_is_multicast(local))) ||
+ key != t->parms.i_key ||
+ !(t->dev->flags & IFF_UP))
+ continue;
+
+ if (t->dev->type != ARPHRD_IP6GRE &&
+ t->dev->type != dev_type)
+ continue;
+
+ score = 0;
+ if (t->parms.link != link)
+ score |= 1;
+ if (t->dev->type != dev_type)
+ score |= 2;
+ if (score == 0)
+ return t;
+
+ if (score < cand_score) {
+ cand = t;
+ cand_score = score;
+ }
+ }
+
+ for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
+ if (t->parms.i_key != key ||
+ !(t->dev->flags & IFF_UP))
+ continue;
+
+ if (t->dev->type != ARPHRD_IP6GRE &&
+ t->dev->type != dev_type)
+ continue;
+
+ score = 0;
+ if (t->parms.link != link)
+ score |= 1;
+ if (t->dev->type != dev_type)
+ score |= 2;
+ if (score == 0)
+ return t;
+
+ if (score < cand_score) {
+ cand = t;
+ cand_score = score;
+ }
+ }
+
+ if (cand != NULL)
+ return cand;
+
+ dev = ign->fb_tunnel_dev;
+ if (dev->flags & IFF_UP)
+ return netdev_priv(dev);
+
+ return NULL;
+}
+
+static struct ip6_tnl __rcu **__ip6gre_bucket(struct ip6gre_net *ign,
+ const struct __ip6_tnl_parm *p)
+{
+ const struct in6_addr *remote = &p->raddr;
+ const struct in6_addr *local = &p->laddr;
+ unsigned int h = HASH_KEY(p->i_key);
+ int prio = 0;
+
+ if (!ipv6_addr_any(local))
+ prio |= 1;
+ if (!ipv6_addr_any(remote) && !ipv6_addr_is_multicast(remote)) {
+ prio |= 2;
+ h ^= HASH_ADDR(remote);
+ }
+
+ return &ign->tunnels[prio][h];
+}
+
+static inline struct ip6_tnl __rcu **ip6gre_bucket(struct ip6gre_net *ign,
+ const struct ip6_tnl *t)
+{
+ return __ip6gre_bucket(ign, &t->parms);
+}
+
+static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t)
+{
+ struct ip6_tnl __rcu **tp = ip6gre_bucket(ign, t);
+
+ rcu_assign_pointer(t->next, rtnl_dereference(*tp));
+ rcu_assign_pointer(*tp, t);
+}
+
+static void ip6gre_tunnel_unlink(struct ip6gre_net *ign, struct ip6_tnl *t)
+{
+ struct ip6_tnl __rcu **tp;
+ struct ip6_tnl *iter;
+
+ for (tp = ip6gre_bucket(ign, t);
+ (iter = rtnl_dereference(*tp)) != NULL;
+ tp = &iter->next) {
+ if (t == iter) {
+ rcu_assign_pointer(*tp, t->next);
+ break;
+ }
+ }
+}
+
+static struct ip6_tnl *ip6gre_tunnel_find(struct net *net,
+ const struct __ip6_tnl_parm *parms,
+ int type)
+{
+ const struct in6_addr *remote = &parms->raddr;
+ const struct in6_addr *local = &parms->laddr;
+ __be32 key = parms->i_key;
+ int link = parms->link;
+ struct ip6_tnl *t;
+ struct ip6_tnl __rcu **tp;
+ struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+
+ for (tp = __ip6gre_bucket(ign, parms);
+ (t = rtnl_dereference(*tp)) != NULL;
+ tp = &t->next)
+ if (ipv6_addr_equal(local, &t->parms.laddr) &&
+ ipv6_addr_equal(remote, &t->parms.raddr) &&
+ key == t->parms.i_key &&
+ link == t->parms.link &&
+ type == t->dev->type)
+ break;
+