summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorThomas Graf <tgraf@suug.ch>2015-07-21 10:43:58 +0200
committerDavid S. Miller <davem@davemloft.net>2015-07-21 10:39:06 -0700
commitee122c79d4227f6ec642157834b6a90fcffa4382 (patch)
treec70e570367668c4f9f4314c076e8afffb300117e /include
parent0accfc268f4d3345693d3af3d5780aae3ad93d8d (diff)
vxlan: Flow based tunneling
Allows putting a VXLAN device into a new flow-based mode in which skbs with a ip_tunnel_info dst metadata attached will be encapsulated according to the instructions stored in there with the VXLAN device defaults taken into consideration. Similar on the receive side, if the VXLAN_F_COLLECT_METADATA flag is set, the packet processing will populate a ip_tunnel_info struct for each packet received and attach it to the skb using the new metadata dst. The metadata structure will contain the outer header and tunnel header fields which have been stripped off. Layers further up in the stack such as routing, tc or netfitler can later match on these fields and perform forwarding. It is the responsibility of upper layers to ensure that the flag is set if the metadata is needed. The flag limits the additional cost of metadata collecting based on demand. This prepares the VXLAN device to be steered by the routing and other subsystems which allows to support encapsulation for a large number of tunnel endpoints and tunnel ids through a single net_device which improves the scalability. It also allows for OVS to leverage this mode which in turn allows for the removal of the OVS specific VXLAN code. Because the skb is currently scrubed in vxlan_rcv(), the attachment of the new dst metadata is postponed until after scrubing which requires the temporary addition of a new member to vxlan_metadata. This member is removed again in a later commit after the indirect VXLAN receive API has been removed. Signed-off-by: Thomas Graf <tgraf@suug.ch> Signed-off-by: Pravin B Shelar <pshelar@nicira.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r--include/linux/skbuff.h1
-rw-r--r--include/net/dst_metadata.h13
-rw-r--r--include/net/ip_tunnels.h14
-rw-r--r--include/net/vxlan.h10
-rw-r--r--include/uapi/linux/if_link.h1
5 files changed, 38 insertions, 1 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 6bd96fe9416a..648a2c241993 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3469,5 +3469,6 @@ static inline unsigned int skb_gso_network_seglen(const struct sk_buff *skb)
skb_network_header(skb);
return hdr_len + skb_gso_transport_seglen(skb);
}
+
#endif /* __KERNEL__ */
#endif /* _LINUX_SKBUFF_H */
diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h
index 4f7694f3c7d0..e843937fb30a 100644
--- a/include/net/dst_metadata.h
+++ b/include/net/dst_metadata.h
@@ -8,6 +8,9 @@
struct metadata_dst {
struct dst_entry dst;
size_t opts_len;
+ union {
+ struct ip_tunnel_info tun_info;
+ } u;
};
static inline struct metadata_dst *skb_metadata_dst(struct sk_buff *skb)
@@ -20,6 +23,16 @@ static inline struct metadata_dst *skb_metadata_dst(struct sk_buff *skb)
return NULL;
}
+static inline struct ip_tunnel_info *skb_tunnel_info(struct sk_buff *skb)
+{
+ struct metadata_dst *md_dst = skb_metadata_dst(skb);
+
+ if (md_dst)
+ return &md_dst->u.tun_info;
+
+ return NULL;
+}
+
static inline bool skb_valid_dst(const struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 6b9d559ce5f5..d11530f1c1e2 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -38,10 +38,19 @@ struct ip_tunnel_key {
__be16 tp_dst;
} __packed __aligned(4); /* Minimize padding. */
+/* Indicates whether the tunnel info structure represents receive
+ * or transmit tunnel parameters.
+ */
+enum {
+ IP_TUNNEL_INFO_RX,
+ IP_TUNNEL_INFO_TX,
+};
+
struct ip_tunnel_info {
struct ip_tunnel_key key;
const void *options;
u8 options_len;
+ u8 mode;
};
/* 6rd prefix/relay information */
@@ -284,6 +293,11 @@ static inline void iptunnel_xmit_stats(int err,
}
}
+static inline void *ip_tunnel_info_opts(struct ip_tunnel_info *info, size_t n)
+{
+ return info + 1;
+}
+
#endif /* CONFIG_INET */
#endif /* __NET_IP_TUNNELS_H */
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 0082b5d33d7d..80a2da29e088 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -7,6 +7,7 @@
#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <linux/udp.h>
+#include <net/dst_metadata.h>
#define VNI_HASH_BITS 10
#define VNI_HASH_SIZE (1<<VNI_HASH_BITS)
@@ -97,6 +98,9 @@ struct vxlanhdr {
struct vxlan_metadata {
__be32 vni;
u32 gbp;
+
+ /* Temporary until vxlan_rcv() API is gone */
+ struct metadata_dst *tun_dst;
};
struct vxlan_sock;
@@ -130,6 +134,8 @@ struct vxlan_sock {
#define VXLAN_F_REMCSUM_RX 0x400
#define VXLAN_F_GBP 0x800
#define VXLAN_F_REMCSUM_NOPARTIAL 0x1000
+#define VXLAN_F_COLLECT_METADATA 0x2000
+#define VXLAN_F_FLOW_BASED 0x4000
/* Flags that are used in the receive path. These flags must match in
* order for a socket to be shareable
@@ -137,7 +143,9 @@ struct vxlan_sock {
#define VXLAN_F_RCV_FLAGS (VXLAN_F_GBP | \
VXLAN_F_UDP_ZERO_CSUM6_RX | \
VXLAN_F_REMCSUM_RX | \
- VXLAN_F_REMCSUM_NOPARTIAL)
+ VXLAN_F_REMCSUM_NOPARTIAL | \
+ VXLAN_F_COLLECT_METADATA | \
+ VXLAN_F_FLOW_BASED)
struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
vxlan_rcv_t *rcv, void *data,
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 24d68b797c59..9eeb5d9cf8f0 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -382,6 +382,7 @@ enum {
IFLA_VXLAN_REMCSUM_RX,
IFLA_VXLAN_GBP,
IFLA_VXLAN_REMCSUM_NOPARTIAL,
+ IFLA_VXLAN_FLOWBASED,
__IFLA_VXLAN_MAX
};
#define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)