summaryrefslogtreecommitdiffstats
path: root/net/openvswitch
diff options
context:
space:
mode:
authorAndy Zhou <azhou@nicira.com>2013-08-07 20:01:00 -0700
committerJesse Gross <jesse@nicira.com>2013-08-23 16:43:07 -0700
commit03f0d916aa0317592dda11bd17c7357858719b6c (patch)
tree436f94d9c4846cadfa73ee0822f44a6383f3a2f3 /net/openvswitch
parent3fa34de67861abfc4846ccec886ca549d46ae56c (diff)
openvswitch: Mega flow implementation
Add wildcarded flow support in kernel datapath. Wildcarded flow can improve OVS flow set up performance by avoid sending matching new flows to the user space program. The exact performance boost will largely dependent on wildcarded flow hit rate. In case all new flows hits wildcard flows, the flow set up rate is within 5% of that of linux bridge module. Pravin has made significant contributions to this patch. Including API clean ups and bug fixes. Signed-off-by: Pravin B Shelar <pshelar@nicira.com> Signed-off-by: Andy Zhou <azhou@nicira.com> Signed-off-by: Jesse Gross <jesse@nicira.com>
Diffstat (limited to 'net/openvswitch')
-rw-r--r--net/openvswitch/actions.c6
-rw-r--r--net/openvswitch/datapath.c140
-rw-r--r--net/openvswitch/datapath.h6
-rw-r--r--net/openvswitch/flow.c1387
-rw-r--r--net/openvswitch/flow.h96
5 files changed, 1123 insertions, 512 deletions
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index ab101f715447..1f680222f4f7 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007-2012 Nicira, Inc.
+ * Copyright (c) 2007-2013 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@@ -376,8 +376,10 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
const struct nlattr *a;
int rem;
+ BUG_ON(!OVS_CB(skb)->pkt_key);
+
upcall.cmd = OVS_PACKET_CMD_ACTION;
- upcall.key = &OVS_CB(skb)->flow->key;
+ upcall.key = OVS_CB(skb)->pkt_key;
upcall.userdata = NULL;
upcall.portid = 0;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 9d97ef3c9830..d29cd9aa4a67 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007-2012 Nicira, Inc.
+ * Copyright (c) 2007-2013 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@@ -165,7 +165,7 @@ static void destroy_dp_rcu(struct rcu_head *rcu)
{
struct datapath *dp = container_of(rcu, struct datapath, rcu);
- ovs_flow_tbl_destroy((__force struct flow_table *)dp->table);
+ ovs_flow_tbl_destroy((__force struct flow_table *)dp->table, false);
free_percpu(dp->stats_percpu);
release_net(ovs_dp_get_net(dp));
kfree(dp->ports);
@@ -226,19 +226,18 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
struct sw_flow_key key;
u64 *stats_counter;
int error;
- int key_len;
stats = this_cpu_ptr(dp->stats_percpu);
/* Extract flow from 'skb' into 'key'. */
- error = ovs_flow_extract(skb, p->port_no, &key, &key_len);
+ error = ovs_flow_extract(skb, p->port_no, &key);
if (unlikely(error)) {
kfree_skb(skb);
return;
}
/* Look up flow. */
- flow = ovs_flow_tbl_lookup(rcu_dereference(dp->table), &key, key_len);
+ flow = ovs_flow_lookup(rcu_dereference(dp->table), &key);
if (unlikely(!flow)) {
struct dp_upcall_info upcall;
@@ -253,6 +252,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
}
OVS_CB(skb)->flow = flow;
+ OVS_CB(skb)->pkt_key = &key;
stats_counter = &stats->n_hit;
ovs_flow_used(OVS_CB(skb)->flow, skb);
@@ -435,7 +435,7 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,
upcall->dp_ifindex = dp_ifindex;
nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
- ovs_flow_to_nlattrs(upcall_info->key, user_skb);
+ ovs_flow_to_nlattrs(upcall_info->key, upcall_info->key, user_skb);
nla_nest_end(user_skb, nla);
if (upcall_info->userdata)
@@ -468,7 +468,7 @@ static int flush_flows(struct datapath *dp)
rcu_assign_pointer(dp->table, new_table);
- ovs_flow_tbl_deferred_destroy(old_table);
+ ovs_flow_tbl_destroy(old_table, true);
return 0;
}
@@ -611,10 +611,12 @@ static int validate_tp_port(const struct sw_flow_key *flow_key)
static int validate_and_copy_set_tun(const struct nlattr *attr,
struct sw_flow_actions **sfa)
{
- struct ovs_key_ipv4_tunnel tun_key;
+ struct sw_flow_match match;
+ struct sw_flow_key key;
int err, start;
- err = ovs_ipv4_tun_from_nlattr(nla_data(attr), &tun_key);
+ ovs_match_init(&match, &key, NULL);
+ err = ovs_ipv4_tun_from_nlattr(nla_data(attr), &match, false);
if (err)
return err;
@@ -622,7 +624,8 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
if (start < 0)
return start;
- err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &tun_key, sizeof(tun_key));
+ err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &match.key->tun_key,
+ sizeof(match.key->tun_key));
add_nested_action_end(*sfa, start);
return err;
@@ -857,7 +860,6 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
struct ethhdr *eth;
int len;
int err;
- int key_len;
err = -EINVAL;
if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
@@ -890,11 +892,11 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
if (IS_ERR(flow))
goto err_kfree_skb;
- err = ovs_flow_extract(packet, -1, &flow->key, &key_len);
+ err = ovs_flow_extract(packet, -1, &flow->key);
if (err)
goto err_flow_free;
- err = ovs_flow_metadata_from_nlattrs(flow, key_len, a[OVS_PACKET_ATTR_KEY]);
+ err = ovs_flow_metadata_from_nlattrs(flow, a[OVS_PACKET_ATTR_KEY]);
if (err)
goto err_flow_free;
acts = ovs_flow_actions_alloc(nla_len(a[OVS_PACKET_ATTR_ACTIONS]));
@@ -908,6 +910,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
goto err_flow_free;
OVS_CB(packet)->flow = flow;
+ OVS_CB(packet)->pkt_key = &flow->key;
packet->priority = flow->key.phy.priority;
packet->mark = flow->key.phy.skb_mark;
@@ -922,13 +925,13 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
local_bh_enable();
rcu_read_unlock();
- ovs_flow_free(flow);
+ ovs_flow_free(flow, false);
return err;
err_unlock:
rcu_read_unlock();
err_flow_free:
- ovs_flow_free(flow);
+ ovs_flow_free(flow, false);
err_kfree_skb:
kfree_skb(packet);
err:
@@ -1045,7 +1048,8 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
if (!start)
return -EMSGSIZE;
- err = ovs_ipv4_tun_to_nlattr(skb, nla_data(ovs_key));
+ err = ovs_ipv4_tun_to_nlattr(skb, nla_data(ovs_key),
+ nla_data(ovs_key));
if (err)
return err;
nla_nest_end(skb, start);
@@ -1093,6 +1097,7 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
{
return NLMSG_ALIGN(sizeof(struct ovs_header))
+ nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_KEY */
+ + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_MASK */
+ nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
+ nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
+ nla_total_size(8) /* OVS_FLOW_ATTR_USED */
@@ -1119,12 +1124,25 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
ovs_header->dp_ifindex = get_dpifindex(dp);
+ /* Fill flow key. */
nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
if (!nla)
goto nla_put_failure;
- err = ovs_flow_to_nlattrs(&flow->key, skb);
+
+ err = ovs_flow_to_nlattrs(&flow->unmasked_key,
+ &flow->unmasked_key, skb);
+ if (err)
+ goto error;
+ nla_nest_end(skb, nla);
+
+ nla = nla_nest_start(skb, OVS_FLOW_ATTR_MASK);
+ if (!nla)
+ goto nla_put_failure;
+
+ err = ovs_flow_to_nlattrs(&flow->key, &flow->mask->key, skb);
if (err)
goto error;
+
nla_nest_end(skb, nla);
spin_lock_bh(&flow->lock);
@@ -1214,20 +1232,24 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr **a = info->attrs;
struct ovs_header *ovs_header = info->userhdr;
- struct sw_flow_key key;
- struct sw_flow *flow;
+ struct sw_flow_key key, masked_key;
+ struct sw_flow *flow = NULL;
+ struct sw_flow_mask mask;
struct sk_buff *reply;
struct datapath *dp;
struct flow_table *table;
struct sw_flow_actions *acts = NULL;
+ struct sw_flow_match match;
int error;
- int key_len;
/* Extract key. */
error = -EINVAL;
if (!a[OVS_FLOW_ATTR_KEY])
goto error;
- error = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
+
+ ovs_match_init(&match, &key, &mask);
+ error = ovs_match_from_nlattrs(&match,
+ a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]);
if (error)
goto error;
@@ -1238,9 +1260,13 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
if (IS_ERR(acts))
goto error;
- error = validate_and_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, 0, &acts);
- if (error)
+ ovs_flow_key_mask(&masked_key, &key, &mask);
+ error = validate_and_copy_actions(a[OVS_FLOW_ATTR_ACTIONS],
+ &masked_key, 0, &acts);
+ if (error) {
+ OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
goto err_kfree;
+ }
} else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) {
error = -EINVAL;
goto error;
@@ -1253,8 +1279,11 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
goto err_unlock_ovs;
table = ovsl_dereference(dp->table);
- flow = ovs_flow_tbl_lookup(table, &key, key_len);
+
+ /* Check if this is a duplicate flow */
+ flow = ovs_flow_lookup(table, &key);
if (!flow) {
+ struct sw_flow_mask *mask_p;
/* Bail out if we're not allowed to create a new flow. */
error = -ENOENT;
if (info->genlhdr->cmd == OVS_FLOW_CMD_SET)
@@ -1267,7 +1296,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
new_table = ovs_flow_tbl_expand(table);
if (!IS_ERR(new_table)) {
rcu_assign_pointer(dp->table, new_table);
- ovs_flow_tbl_deferred_destroy(table);
+ ovs_flow_tbl_destroy(table, true);
table = ovsl_dereference(dp->table);
}
}
@@ -1280,14 +1309,30 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
}
clear_stats(flow);
+ flow->key = masked_key;
+ flow->unmasked_key = key;
+
+ /* Make sure mask is unique in the system */
+ mask_p = ovs_sw_flow_mask_find(table, &mask);
+ if (!mask_p) {
+ /* Allocate a new mask if none exsits. */
+ mask_p = ovs_sw_flow_mask_alloc();
+ if (!mask_p)
+ goto err_flow_free;
+ mask_p->key = mask.key;
+ mask_p->range = mask.range;
+ ovs_sw_flow_mask_insert(table, mask_p);
+ }
+
+ ovs_sw_flow_mask_add_ref(mask_p);
+ flow->mask = mask_p;
rcu_assign_pointer(flow->sf_acts, acts);
/* Put flow in bucket. */
- ovs_flow_tbl_insert(table, flow, &key, key_len);
+ ovs_flow_insert(table, flow);
reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
- info->snd_seq,
- OVS_FLOW_CMD_NEW);
+ info->snd_seq, OVS_FLOW_CMD_NEW);
} else {
/* We found a matching flow. */
struct sw_flow_actions *old_acts;
@@ -1303,6 +1348,13 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
goto err_unlock_ovs;
+ /* The unmasked key has to be the same for flow updates. */
+ error = -EINVAL;
+ if (!ovs_flow_cmp_unmasked_key(flow, &key, match.range.end)) {
+ OVS_NLERR("Flow modification message rejected, unmasked key does not match.\n");
+ goto err_unlock_ovs;
+ }
+
/* Update actions. */
old_acts = ovsl_dereference(flow->sf_acts);
rcu_assign_pointer(flow->sf_acts, acts);
@@ -1327,6 +1379,8 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
ovs_dp_flow_multicast_group.id, PTR_ERR(reply));
return 0;
+err_flow_free:
+ ovs_flow_free(flow, false);
err_unlock_ovs:
ovs_unlock();
err_kfree:
@@ -1344,12 +1398,16 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
struct sw_flow *flow;
struct datapath *dp;
struct flow_table *table;
+ struct sw_flow_match match;
int err;
- int key_len;
- if (!a[OVS_FLOW_ATTR_KEY])
+ if (!a[OVS_FLOW_ATTR_KEY]) {
+ OVS_NLERR("Flow get message rejected, Key attribute missing.\n");
return -EINVAL;
- err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
+ }
+
+ ovs_match_init(&match, &key, NULL);
+ err = ovs_match_from_nlattrs(&match, a[OVS_FLOW_ATTR_KEY], NULL);
if (err)
return err;
@@ -1361,7 +1419,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
}
table = ovsl_dereference(dp->table);
- flow = ovs_flow_tbl_lookup(table, &key, key_len);
+ flow = ovs_flow_lookup_unmasked_key(table, &match);
if (!flow) {
err = -ENOENT;
goto unlock;
@@ -1390,8 +1448,8 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
struct sw_flow *flow;
struct datapath *dp;
struct flow_table *table;
+ struct sw_flow_match match;
int err;
- int key_len;
ovs_lock();
dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
@@ -1404,12 +1462,14 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
err = flush_flows(dp);
goto unlock;
}
- err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
+
+ ovs_match_init(&match, &key, NULL);
+ err = ovs_match_from_nlattrs(&match, a[OVS_FLOW_ATTR_KEY], NULL);
if (err)
goto unlock;
table = ovsl_dereference(dp->table);
- flow = ovs_flow_tbl_lookup(table, &key, key_len);
+ flow = ovs_flow_lookup_unmasked_key(table, &match);
if (!flow) {
err = -ENOENT;
goto unlock;
@@ -1421,13 +1481,13 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
goto unlock;
}
- ovs_flow_tbl_remove(table, flow);
+ ovs_flow_remove(table, flow);
err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid,
info->snd_seq, 0, OVS_FLOW_CMD_DEL);
BUG_ON(err < 0);
- ovs_flow_deferred_free(flow);
+ ovs_flow_free(flow, true);
ovs_unlock();
ovs_notify(reply, info, &ovs_dp_flow_multicast_group);
@@ -1457,7 +1517,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
bucket = cb->args[0];
obj = cb->args[1];
- flow = ovs_flow_tbl_next(table, &bucket, &obj);
+ flow = ovs_flow_dump_next(table, &bucket, &obj);
if (!flow)
break;
@@ -1680,7 +1740,7 @@ err_destroy_ports_array:
err_destroy_percpu:
free_percpu(dp->stats_percpu);
err_destroy_table:
- ovs_flow_tbl_destroy(ovsl_dereference(dp->table));
+ ovs_flow_tbl_destroy(ovsl_dereference(dp->table), false);
err_free_dp:
release_net(ovs_dp_get_net(dp));
kfree(dp);
@@ -2287,7 +2347,7 @@ static void rehash_flow_table(struct work_struct *work)
new_table = ovs_flow_tbl_rehash(old_table);
if (!IS_ERR(new_table)) {
rcu_assign_pointer(dp->table, new_table);
- ovs_flow_tbl_deferred_destroy(old_table);
+ ovs_flow_tbl_destroy(old_table, true);
}
}
}
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index a91486484916..4d109c176ef3 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -88,11 +88,13 @@ struct datapath {
/**
* struct ovs_skb_cb - OVS data in skb CB
* @flow: The flow associated with this packet. May be %NULL if no flow.
+ * @pkt_key: The flow information extracted from the packet. Must be nonnull.
* @tun_key: Key for the tunnel that encapsulated this packet. NULL if the
* packet is not being tunneled.
*/
struct ovs_skb_cb {
struct sw_flow *flow;
+ struct sw_flow_key *pkt_key;
struct ovs_key_ipv4_tunnel *tun_key;
};
#define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
@@ -183,4 +185,8 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq,
int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb);
void ovs_dp_notify_wq(struct work_struct *work);
+
+#define OVS_NLERR(fmt, ...) \
+ pr_info_once("netlink: " fmt, ##__VA_ARGS__)
+
#endif /* datapath.h */
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index fca282520cee..1fceb9653598 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007-2011 Nicira, Inc.
+ * Copyright (c) 2007-2013 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@@ -46,6 +46,184 @@
static struct kmem_cache *flow_cache;
+static void ovs_sw_flow_mask_set(struct sw_flow_mask *mask,
+ struct sw_flow_key_range *range, u8 val);
+
+static void update_range__(struct sw_flow_match *match,
+ size_t offset, size_t size, bool is_mask)
+{
+ struct sw_flow_key_range *range = NULL;
+ size_t start = offset;
+ size_t end = offset + size;
+
+ if (!is_mask)
+ range = &match->range;
+ else if (match->mask)
+ range = &match->mask->range;
+
+ if (!range)
+ return;
+
+ if (range->start == range->end) {
+ range->start = start;
+ range->end = end;
+ return;
+ }
+
+ if (range->start > start)
+ range->start = start;
+
+ if (range->end < end)
+ range->end = end;
+}
+
+#define SW_FLOW_KEY_PUT(match, field, value, is_mask) \
+ do { \
+ update_range__(match, offsetof(struct sw_flow_key, field), \
+ sizeof((match)->key->field), is_mask); \
+ if (is_mask) { \
+ if ((match)->mask) \
+ (match)->mask->key.field = value; \
+ } else { \
+ (match)->key->field = value; \
+ } \
+ } while (0)
+
+#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \
+ do { \
+ update_range__(match, offsetof(struct sw_flow_key, field), \
+ len, is_mask); \
+ if (is_mask) { \
+ if ((match)->mask) \
+ memcpy(&(match)->mask->key.field, value_p, len);\
+ } else { \
+ memcpy(&(match)->key->field, value_p, len); \
+ } \
+ } while (0)
+
+void ovs_match_init(struct sw_flow_match *match,
+ struct sw_flow_key *key,
+ struct sw_flow_mask *mask)
+{
+ memset(match, 0, sizeof(*match));
+ match->key = key;
+ match->mask = mask;
+
+ memset(key, 0, sizeof(*key));
+
+ if (mask) {
+ memset(&mask->key, 0, sizeof(mask->key));
+ mask->range.start = mask->range.end = 0;
+ }
+}
+
+static bool ovs_match_validate(const struct sw_flow_match *match,
+ u64 key_attrs, u64 mask_attrs)
+{
+ u64 key_expected = 1 << OVS_KEY_ATTR_ETHERNET;
+ u64 mask_allowed = key_attrs; /* At most allow all key attributes */
+
+ /* The following mask attributes allowed only if they
+ * pass the validation tests. */
+ mask_allowed &= ~((1 << OVS_KEY_ATTR_IPV4)
+ | (1 << OVS_KEY_ATTR_IPV6)
+ | (1 << OVS_KEY_ATTR_TCP)
+ | (1 << OVS_KEY_ATTR_UDP)
+ | (1 << OVS_KEY_ATTR_ICMP)
+ | (1 << OVS_KEY_ATTR_ICMPV6)
+ | (1 << OVS_KEY_ATTR_ARP)
+ | (1 << OVS_KEY_ATTR_ND));
+
+ /* Always allowed mask fields. */
+ mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL)
+ | (1 << OVS_KEY_ATTR_IN_PORT)
+ | (1 << OVS_KEY_ATTR_ETHERTYPE));
+
+ /* Check key attributes. */
+ if (match->key->eth.type == htons(ETH_P_ARP)
+ || match->key->eth.type == htons(ETH_P_RARP)) {
+ key_expected |= 1 << OVS_KEY_ATTR_ARP;
+ if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
+ mask_allowed |= 1 << OVS_KEY_ATTR_ARP;
+ }
+
+ if (match->key->eth.type == htons(ETH_P_IP)) {
+ key_expected |= 1 << OVS_KEY_ATTR_IPV4;
+ if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
+ mask_allowed |= 1 << OVS_KEY_ATTR_IPV4;
+
+ if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
+ if (match->key->ip.proto == IPPROTO_UDP) {
+ key_expected |= 1 << OVS_KEY_ATTR_UDP;
+ if (match->mask && (match->mask->key.ip.proto == 0xff))
+ mask_allowed |= 1 << OVS_KEY_ATTR_UDP;
+ }
+
+ if (match->key->ip.proto == IPPROTO_TCP) {
+ key_expected |= 1 << OVS_KEY_ATTR_TCP;
+ if (match->mask && (match->mask->key.ip.proto == 0xff))
+ mask_allowed |= 1 << OVS_KEY_ATTR_TCP;
+ }
+
+ if (match->key->ip.proto == IPPROTO_ICMP) {
+ key_expected |= 1 << OVS_KEY_ATTR_ICMP;
+ if (match->mask && (match->mask->key.ip.proto == 0xff))
+ mask_allowed |= 1 << OVS_KEY_ATTR_ICMP;
+ }
+ }
+ }
+
+ if (match->key->eth.type == htons(ETH_P_IPV6)) {
+ key_expected |= 1 << OVS_KEY_ATTR_IPV6;
+ if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
+ mask_allowed |= 1 << OVS_KEY_ATTR_IPV6;
+
+ if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
+ if (match->key->ip.proto == IPPROTO_UDP) {
+ key_expected |= 1 << OVS_KEY_ATTR_UDP;
+ if (match->mask && (match->mask->key.ip.proto == 0xff))
+ mask_allowed |= 1 << OVS_KEY_ATTR_UDP;
+ }
+
+ if (match->key->ip.proto == IPPROTO_TCP) {
+ key_expected |= 1 << OVS_KEY_ATTR_TCP;
+ if (match->mask && (match->mask->key.ip.proto == 0xff))
+ mask_allowed |= 1 << OVS_KEY_ATTR_TCP;
+ }
+
+ if (match->key->ip.proto == IPPROTO_ICMPV6) {
+ key_expected |= 1 << OVS_KEY_ATTR_ICMPV6;
+ if (match->mask && (match->mask->key.ip.proto == 0xff))
+ mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6;
+
+ if (match->key->ipv6.tp.src ==
+ htons(NDISC_NEIGHBOUR_SOLICITATION) ||
+ match->key->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
+ key_expected |= 1 << OVS_KEY_ATTR_ND;
+ if (match->mask && (match->mask->key.ipv6.tp.src == htons(0xffff)))
+ mask_allowed |= 1 << OVS_KEY_ATTR_ND;
+ }
+ }
+ }
+ }
+
+ if ((key_attrs & key_expected) != key_expected) {
+ /* Key attributes check failed. */
+ OVS_NLERR("Missing expected key attributes (key_attrs=%llx, expected=%llx).\n",
+ key_attrs, key_expected);
+ return false;
+ }
+
+ if ((mask_attrs & mask_allowed) != mask_attrs) {
+ /* Mask attributes check failed. */
+ OVS_NLERR("Contain more than allowed mask fields (mask_attrs=%llx, mask_allowed=%llx).\n",
+ mask_attrs, mask_allowed);
+ return false;
+ }
+
+ return true;
+}
+
static int check_header(struct sk_buff *skb, int len)
{
if (unlikely(skb->len < len))
@@ -121,12 +299,7 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies)
return cur_ms - idle_ms;
}
-#define SW_FLOW_KEY_OFFSET(field) \
- (offsetof(struct sw_flow_key, field) + \
- FIELD_SIZEOF(struct sw_flow_key, field))
-
-static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key,
- int *key_lenp)
+static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key)
{
unsigned int nh_ofs = skb_network_offset(skb);
unsigned int nh_len;
@@ -136,8 +309,6 @@ static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key,
__be16 frag_off;
int err;
- *key_lenp = SW_FLOW_KEY_OFFSET(ipv6.label);
-
err = check_header(skb, nh_ofs + sizeof(*nh));
if (unlikely(err))
return err;
@@ -176,6 +347,21 @@ static bool icmp6hdr_ok(struct sk_buff *skb)
sizeof(struct icmp6hdr));
}
+void ovs_flow_key_mask(struct sw_flow_key *dst, const struct sw_flow_key *src,
+ const struct sw_flow_mask *mask)
+{
+ u8 *m = (u8 *)&mask->key + mask->range.start;
+ u8 *s = (u8 *)src + mask->range.start;
+ u8 *d = (u8 *)dst + mask->range.start;
+ int i;
+
+ memset(dst, 0, sizeof(*dst));
+ for (i = 0; i < ovs_sw_flow_mask_size_roundup(mask); i++) {
+ *d = *s & *m;
+ d++, s++, m++;
+ }
+}
+
#define TCP_FLAGS_OFFSET 13
#define TCP_FLAG_MASK 0x3f
@@ -224,6 +410,7 @@ struct sw_flow *ovs_flow_alloc(void)
spin_lock_init(&flow->lock);
flow->sf_acts = NULL;
+ flow->mask = NULL;
return flow;
}
@@ -263,7 +450,7 @@ static void free_buckets(struct flex_array *buckets)
flex_array_free(buckets);
}
-struct flow_table *ovs_flow_tbl_alloc(int new_size)
+static struct flow_table *__flow_tbl_alloc(int new_size)
{
struct flow_table *table = kmalloc(sizeof(*table), GFP_KERNEL);
@@ -281,17 +468,15 @@ struct flow_table *ovs_flow_tbl_alloc(int new_size)
table->node_ver = 0;
table->keep_flows = false;
get_random_bytes(&table->hash_seed, sizeof(u32));
+ table->mask_list = NULL;
return table;
}
-void ovs_flow_tbl_destroy(struct flow_table *table)
+static void __flow_tbl_destroy(struct flow_table *table)
{
int i;
- if (!table)
- return;
-
if (table->keep_flows)
goto skip_flows;
@@ -303,31 +488,55 @@ void ovs_flow_tbl_destroy(struct flow_table *table)
hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) {
hlist_del(&flow->hash_node[ver]);
- ovs_flow_free(flow);
+ ovs_flow_free(flow, false);
}
}
+ BUG_ON(!list_empty(table->mask_list));
+ kfree(table->mask_list);
+
skip_flows:
free_buckets(table->buckets);
kfree(table);
}
+struct flow_table *ovs_flow_tbl_alloc(int new_size)
+{
+ struct flow_table *table = __flow_tbl_alloc(new_size);
+
+ if (!table)
+ return NULL;
+
+ table->mask_list = kmalloc(sizeof(struct list_head), GFP_KERNEL);
+ if (!table->mask_list) {
+ table->keep_flows = true;
+ __flow_tbl_destroy(table);
+ return NULL;
+ }
+ INIT_LIST_HEAD(table->mask_list);
+
+ return table;
+}
+
static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
{
struct flow_table *table = container_of(rcu, struct flow_table, rcu);
- ovs_flow_tbl_destroy(table);
+ __flow_tbl_destroy(table);
}
-void ovs_flow_tbl_deferred_destroy(struct flow_table *table)
+void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred)
{
if (!table)
return;
- call_rcu(&table->rcu, flow_tbl_destroy_rcu_cb);
+ if (deferred)
+ call_rcu(&table->rcu, flow_tbl_destroy_rcu_cb);
+ else
+ __flow_tbl_destroy(table);
}
-struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *last)
+struct sw_flow *ovs_flow_dump_next(struct flow_table *table, u32 *bucket, u32 *last)
{
struct sw_flow *flow;
struct hlist_head *head;
@@ -353,11 +562,13 @@ struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *la
return NULL;
}
-static void __flow_tbl_insert(struct flow_table *table, struct sw_flow *flow)
+static void __tbl_insert(struct flow_table *table, struct sw_flow *flow)
{
struct hlist_head *head;
+
head = find_bucket(table, flow->hash);
hlist_add_head_rcu(&flow->hash_node[table->node_ver], head);
+
table->count++;
}
@@ -377,8 +588,10 @@ static void flow_table_copy_flows(struct flow_table *old, struct flow_table *new
head = flex_array_get(old->buckets, i);
hlist_for_each_entry(flow, head, hash_node[old_ver])
- __flow_tbl_insert(new, flow);
+ __tbl_insert(new, flow);
}
+
+ new->mask_list = old->mask_list;
old->keep_flows = true;
}
@@ -386,7 +599,7 @@ static struct flow_table *__flow_tbl_rehash(struct flow_table *table, int n_buck
{
struct flow_table *new_table;
- new_table = ovs_flow_tbl_alloc(n_buckets);
+ new_table = __flow_tbl_alloc(n_buckets);
if (!new_table)
return ERR_PTR(-ENOMEM);
@@ -405,28 +618,30 @@ struct flow_table *ovs_flow_tbl_expand(struct flow_table *table)
return __flow_tbl_rehash(table, table->n_buckets * 2);
}
-void ovs_flow_free(struct sw_flow *flow)
+static void __flow_free(struct sw_flow *flow)
{
- if (unlikely(!flow))
- return;
-
kfree((struct sf_flow_acts __force *)flow->sf_acts);
kmem_cache_free(flow_cache, flow);
}
-/* RCU callback used by ovs_flow_deferred_free. */
static void rcu_free_flow_callback(struct rcu_head *rcu)
{
struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu);
- ovs_flow_free(flow);
+ __flow_free(flow);
}
-/* Schedules 'flow' to be freed after the next RCU grace period.
- * The caller must hold rcu_read_lock for this to be sensible. */
-void ovs_flow_deferred_free(struct sw_flow *flow)
+void ovs_flow_free(struct sw_flow *flow, bool deferred)
{
- call_rcu(&flow->rcu, rcu_free_flow_callback);
+ if (!flow)
+ return;
+
+ ovs_sw_flow_mask_del_ref(flow->mask, deferred);
+
+ if (deferred)
+ call_rcu(&flow->rcu, rcu_free_flow_callback);
+ else
+ __flow_free(flow);
}
/* Schedules 'sf_acts' to be freed after the next RCU grace period.
@@ -497,18 +712,15 @@ static __be16 parse_ethertype(struct sk_buff *skb)
}
static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
- int *key_lenp, int nh_len)
+ int nh_len)
{
struct icmp6hdr *icmp = icmp6_hdr(skb);
- int error = 0;
- int key_len;
/* The ICMPv6 type and code fields use the 16-bit transport port
* fields, so we need to store them in 16-bit network byte order.
*/
key->ipv6.tp.src = htons(icmp->icmp6_type);
key->ipv6.tp.dst = htons(icmp->icmp6_code);
- key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
if (icmp->icmp6_code == 0 &&
(icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION ||
@@ -517,21 +729,17 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
struct nd_msg *nd;
int offset;
- key_len = SW_FLOW_KEY_OFFSET(ipv6.nd);
-
/* In order to process neighbor discovery options, we need the
* entire packet.
*/
if (unlikely(icmp_len < sizeof(*nd)))
- goto out;
- if (unlikely(skb_linearize(skb))) {
- error = -ENOMEM;
- goto out;
- }
+ return 0;
+
+ if (unlikely(skb_linearize(skb)))
+ return -ENOMEM;
nd = (struct nd_msg *)skb_transport_header(skb);
key->ipv6.nd.target = nd->target;
- key_len = SW_FLOW_KEY_OFFSET(ipv6.nd);
icmp_len -= sizeof(*nd);
offset = 0;
@@ -541,7 +749,7 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
int opt_len = nd_opt->nd_opt_len * 8;
if (unlikely(!opt_len || opt_len > icmp_len))
- goto invalid;
+ return 0;
/* Store the link layer address if the appropriate
* option is provided. It is considered an error if
@@ -566,16 +774,14 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
}
}
- goto out;
+ return 0;
invalid:
memset(&key->ipv6.nd.target, 0, sizeof(key->ipv6.nd.target));
memset(key->ipv6.nd.sll, 0, sizeof(key->ipv6.nd.sll));
memset(key->ipv6.nd.tll, 0, sizeof(key->ipv6.nd.tll));
-out:
- *key_lenp = key_len;
- return error;
+ return 0;
}
/**
@@ -584,7 +790,6 @@ out:
* Ethernet header
* @in_port: port number on which @skb was received.
* @key: output flow key
- * @key_lenp: length of output flow key
*
* The caller must ensure that skb->len >= ETH_HLEN.
*
@@ -602,11 +807,9 @@ out:
* of a correct length, otherwise the same as skb->network_header.
* For other key->eth.type values it is left untouched.
*/
-int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
- int *key_lenp)
+int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
{
- int error = 0;
- int key_len = SW_FLOW_KEY_OFFSET(eth);
+ int error;
struct ethhdr *eth;
memset(key, 0, sizeof(*key));
@@ -649,15 +852,13 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
struct iphdr *nh;
__be16 offset;
- key_len = SW_FLOW_KEY_OFFSET(ipv4.addr);
-
error = check_iphdr(skb);
if (unlikely(error)) {
if (error == -EINVAL) {
skb->transport_header = skb->network_header;
error = 0;
}
- goto out;
+ return error;
}
nh = ip_hdr(skb);
@@ -671,7 +872,7 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
offset = nh->frag_off & htons(IP_OFFSET);
if (offset) {
key->ip.frag = OVS_FRAG_TYPE_LATER;
- goto out;
+ return 0;
}
if (nh->frag_off & htons(IP_MF) ||
skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
@@ -679,21 +880,18 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
/* Transport layer. */
if (key->ip.proto == IPPROTO_TCP) {
- key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
if (tcphdr_ok(skb)) {
struct tcphdr *tcp = tcp_hdr(skb);
key->ipv4.tp.src = tcp->source;
key->ipv4.tp.dst = tcp->dest;
}
} else if (key->ip.proto == IPPROTO_UDP) {
- key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
if (udphdr_ok(skb)) {
struct udphdr *udp = udp_hdr(skb);
key->ipv4.tp.src = udp->source;
key->ipv4.tp.dst = udp->dest;
}
} else if (key->ip.proto == IPPROTO_ICMP) {
- key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
if (icmphdr_ok(skb)) {
struct icmphdr *icmp = icmp_hdr(skb);
/* The ICMP type and code fields use the 16-bit
@@ -722,53 +920,49 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst));
memcpy(key->ipv4.arp.sha, arp->ar_sha, ETH_ALEN);
memcpy(key->ipv4.arp.tha, arp->ar_tha, ETH_ALEN);
- key_len = SW_FLOW_KEY_OFFSET(ipv4.arp);
}
} else if (key->eth.type == htons(ETH_P_IPV6)) {
int nh_len; /* IPv6 Header + Extensions */
- nh_len = parse_ipv6hdr(skb, key, &key_len);
+ nh_len = parse_ipv6hdr(skb, key);
if (unlikely(nh_len < 0)) {
- if (nh_len == -EINVAL)
+ if (nh_len == -EINVAL) {
skb->transport_header = skb->network_header;
- else
+ error = 0;
+ } else {
error = nh_len;
- goto out;
+ }
+ return error;
}
if (key->ip.frag == OVS_FRAG_TYPE_LATER)
- goto out;
+ return 0;
if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
key->ip.frag = OVS_FRAG_TYPE_FIRST;
/* Transport layer. */
if (key->ip.proto == NEXTHDR_TCP) {
- key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
if (tcphdr_ok(skb)) {
struct tcphdr *tcp = tcp_hdr(skb);
key->ipv6.tp.src = tcp->source;
key->ipv6.tp.dst = tcp->dest;
}