summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2014-01-06 19:48:38 -0500
committerDavid S. Miller <davem@davemloft.net>2014-01-06 19:48:38 -0500
commit39b6b2992f9dc65d1de5c66e7ec2271b8a5fac33 (patch)
treec0fc4e2be0429bb4d7643e6b6f8f5a56212f9284 /net
parent56a4342dfe3145cd66f766adccb28fd9b571606d (diff)
parent443cd88c8a31379e95326428bbbd40af25c1d440 (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/jesse/openvswitch
Jesse Gross says: ==================== [GIT net-next] Open vSwitch Open vSwitch changes for net-next/3.14. Highlights are: * Performance improvements in the mechanism to get packets to userspace using memory mapped netlink and skb zero copy where appropriate. * Per-cpu flow stats in situations where flows are likely to be shared across CPUs. Standard flow stats are used in other situations to save memory and allocation time. * A handful of code cleanups and rationalization. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/core/skbuff.c85
-rw-r--r--net/netfilter/nfnetlink_queue_core.c59
-rw-r--r--net/netlink/af_netlink.c4
-rw-r--r--net/netlink/genetlink.c21
-rw-r--r--net/openvswitch/datapath.c231
-rw-r--r--net/openvswitch/datapath.h6
-rw-r--r--net/openvswitch/flow.c96
-rw-r--r--net/openvswitch/flow.h33
-rw-r--r--net/openvswitch/flow_netlink.c66
-rw-r--r--net/openvswitch/flow_netlink.h1
-rw-r--r--net/openvswitch/flow_table.c60
-rw-r--r--net/openvswitch/flow_table.h6
-rw-r--r--net/openvswitch/vport.c6
-rw-r--r--net/openvswitch/vport.h1
14 files changed, 463 insertions, 212 deletions
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index eb96c2c22400..1d641e781f85 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2121,6 +2121,91 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
}
EXPORT_SYMBOL(skb_copy_and_csum_bits);
+ /**
+ * skb_zerocopy_headlen - Calculate headroom needed for skb_zerocopy()
+ * @from: source buffer
+ *
+ * Calculates the amount of linear headroom needed in the 'to' skb passed
+ * into skb_zerocopy().
+ */
+unsigned int
+skb_zerocopy_headlen(const struct sk_buff *from)
+{
+ unsigned int hlen = 0;
+
+ if (!from->head_frag ||
+ skb_headlen(from) < L1_CACHE_BYTES ||
+ skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS)
+ hlen = skb_headlen(from);
+
+ if (skb_has_frag_list(from))
+ hlen = from->len;
+
+ return hlen;
+}
+EXPORT_SYMBOL_GPL(skb_zerocopy_headlen);
+
+/**
+ * skb_zerocopy - Zero copy skb to skb
+ * @to: destination buffer
+ * @source: source buffer
+ * @len: number of bytes to copy from source buffer
+ * @hlen: size of linear headroom in destination buffer
+ *
+ * Copies up to `len` bytes from `from` to `to` by creating references
+ * to the frags in the source buffer.
+ *
+ * The `hlen` as calculated by skb_zerocopy_headlen() specifies the
+ * headroom in the `to` buffer.
+ */
+void
+skb_zerocopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen)
+{
+ int i, j = 0;
+ int plen = 0; /* length of skb->head fragment */
+ struct page *page;
+ unsigned int offset;
+
+ BUG_ON(!from->head_frag && !hlen);
+
+ /* dont bother with small payloads */
+ if (len <= skb_tailroom(to)) {
+ skb_copy_bits(from, 0, skb_put(to, len), len);
+ return;
+ }
+
+ if (hlen) {
+ skb_copy_bits(from, 0, skb_put(to, hlen), hlen);
+ len -= hlen;
+ } else {
+ plen = min_t(int, skb_headlen(from), len);
+ if (plen) {
+ page = virt_to_head_page(from->head);
+ offset = from->data - (unsigned char *)page_address(page);
+ __skb_fill_page_desc(to, 0, page, offset, plen);
+ get_page(page);
+ j = 1;
+ len -= plen;
+ }
+ }
+
+ to->truesize += len + plen;
+ to->len += len + plen;
+ to->data_len += len + plen;
+
+ for (i = 0; i < skb_shinfo(from)->nr_frags; i++) {
+ if (!len)
+ break;
+ skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i];
+ skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len);
+ len -= skb_shinfo(to)->frags[j].size;
+ skb_frag_ref(to, j);
+ j++;
+ }
+ skb_shinfo(to)->nr_frags = j;
+}
+EXPORT_SYMBOL_GPL(skb_zerocopy);
+
void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
{
__wsum csum;
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index b5e1f82890df..f072fe803510 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -236,51 +236,6 @@ nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data)
spin_unlock_bh(&queue->lock);
}
-static void
-nfqnl_zcopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen)
-{
- int i, j = 0;
- int plen = 0; /* length of skb->head fragment */
- struct page *page;
- unsigned int offset;
-
- /* dont bother with small payloads */
- if (len <= skb_tailroom(to)) {
- skb_copy_bits(from, 0, skb_put(to, len), len);
- return;
- }
-
- if (hlen) {
- skb_copy_bits(from, 0, skb_put(to, hlen), hlen);
- len -= hlen;
- } else {
- plen = min_t(int, skb_headlen(from), len);
- if (plen) {
- page = virt_to_head_page(from->head);
- offset = from->data - (unsigned char *)page_address(page);
- __skb_fill_page_desc(to, 0, page, offset, plen);
- get_page(page);
- j = 1;
- len -= plen;
- }
- }
-
- to->truesize += len + plen;
- to->len += len + plen;
- to->data_len += len + plen;
-
- for (i = 0; i < skb_shinfo(from)->nr_frags; i++) {
- if (!len)
- break;
- skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i];
- skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len);
- len -= skb_shinfo(to)->frags[j].size;
- skb_frag_ref(to, j);
- j++;
- }
- skb_shinfo(to)->nr_frags = j;
-}
-
static int
nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet,
bool csum_verify)
@@ -330,7 +285,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
{
size_t size;
size_t data_len = 0, cap_len = 0;
- int hlen = 0;
+ unsigned int hlen = 0;
struct sk_buff *skb;
struct nlattr *nla;
struct nfqnl_msg_packet_hdr *pmsg;
@@ -382,14 +337,8 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
if (data_len > entskb->len)
data_len = entskb->len;
- if (!entskb->head_frag ||
- skb_headlen(entskb) < L1_CACHE_BYTES ||
- skb_shinfo(entskb)->nr_frags >= MAX_SKB_FRAGS)
- hlen = skb_headlen(entskb);
-
- if (skb_has_frag_list(entskb))
- hlen = entskb->len;
- hlen = min_t(int, data_len, hlen);
+ hlen = skb_zerocopy_headlen(entskb);
+ hlen = min_t(unsigned int, hlen, data_len);
size += sizeof(struct nlattr) + hlen;
cap_len = entskb->len;
break;
@@ -539,7 +488,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
nla->nla_type = NFQA_PAYLOAD;
nla->nla_len = nla_attr_size(data_len);
- nfqnl_zcopy(skb, entskb, data_len, hlen);
+ skb_zerocopy(skb, entskb, data_len, hlen);
}
nlh->nlmsg_len = skb->len;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index b077b90c1254..34a656d90175 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1773,6 +1773,9 @@ struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size,
if (ring->pg_vec == NULL)
goto out_put;
+ if (ring->frame_size - NL_MMAP_HDRLEN < size)
+ goto out_put;
+
skb = alloc_skb_head(gfp_mask);
if (skb == NULL)
goto err1;
@@ -1782,6 +1785,7 @@ struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size,
if (ring->pg_vec == NULL)
goto out_free;
+ /* check again under lock */
maxlen = ring->frame_size - NL_MMAP_HDRLEN;
if (maxlen < size)
goto out_free;
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 713671ae45af..b1dcdb932a86 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -461,6 +461,26 @@ int genl_unregister_family(struct genl_family *family)
EXPORT_SYMBOL(genl_unregister_family);
/**
+ * genlmsg_new_unicast - Allocate generic netlink message for unicast
+ * @payload: size of the message payload
+ * @info: information on destination
+ * @flags: the type of memory to allocate
+ *
+ * Allocates a new sk_buff large enough to cover the specified payload
+ * plus required Netlink headers. Will check receiving socket for
+ * memory mapped i/o capability and use it if enabled. Will fall back
+ * to non-mapped skb if message size exceeds the frame size of the ring.
+ */
+struct sk_buff *genlmsg_new_unicast(size_t payload, struct genl_info *info,
+ gfp_t flags)
+{
+ size_t len = nlmsg_total_size(genlmsg_total_size(payload));
+
+ return netlink_alloc_skb(info->dst_sk, len, info->snd_portid, flags);
+}
+EXPORT_SYMBOL_GPL(genlmsg_new_unicast);
+
+/**
* genlmsg_put - Add generic netlink header to netlink message
* @skb: socket buffer holding the message
* @portid: netlink portid the message is addressed to
@@ -600,6 +620,7 @@ static int genl_family_rcv_msg(struct genl_family *family,
info.genlhdr = nlmsg_data(nlh);
info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN;
info.attrs = attrbuf;
+ info.dst_sk = skb->sk;
genl_info_net_set(&info, net);
memset(&info.user_ptr, 0, sizeof(info.user_ptr));
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 6f5e1dd3be2d..df4692826ead 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -108,10 +108,9 @@ int lockdep_ovsl_is_held(void)
#endif
static struct vport *new_vport(const struct vport_parms *);
-static int queue_gso_packets(struct net *, int dp_ifindex, struct sk_buff *,
+static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
const struct dp_upcall_info *);
-static int queue_userspace_packet(struct net *, int dp_ifindex,
- struct sk_buff *,
+static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
const struct dp_upcall_info *);
/* Must be called with rcu_read_lock or ovs_mutex. */
@@ -133,7 +132,7 @@ static struct datapath *get_dp(struct net *net, int dp_ifindex)
}
/* Must be called with rcu_read_lock or ovs_mutex. */
-const char *ovs_dp_name(const struct datapath *dp)
+static const char *ovs_dp_name(const struct datapath *dp)
{
struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
return vport->ops->get_name(vport);
@@ -234,7 +233,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
}
/* Look up flow. */
- flow = ovs_flow_tbl_lookup(&dp->table, &key, &n_mask_hit);
+ flow = ovs_flow_tbl_lookup_stats(&dp->table, &key, &n_mask_hit);
if (unlikely(!flow)) {
struct dp_upcall_info upcall;
@@ -251,9 +250,9 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
OVS_CB(skb)->flow = flow;
OVS_CB(skb)->pkt_key = &key;
- stats_counter = &stats->n_hit;
- ovs_flow_used(OVS_CB(skb)->flow, skb);
+ ovs_flow_stats_update(OVS_CB(skb)->flow, skb);
ovs_execute_actions(dp, skb);
+ stats_counter = &stats->n_hit;
out:
/* Update datapath statistics. */
@@ -277,7 +276,6 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
const struct dp_upcall_info *upcall_info)
{
struct dp_stats_percpu *stats;
- int dp_ifindex;
int err;
if (upcall_info->portid == 0) {
@@ -285,16 +283,10 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
goto err;
}
- dp_ifindex = get_dpifindex(dp);
- if (!dp_ifindex) {
- err = -ENODEV;
- goto err;
- }
-
if (!skb_is_gso(skb))
- err = queue_userspace_packet(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info);
+ err = queue_userspace_packet(dp, skb, upcall_info);
else
- err = queue_gso_packets(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info);
+ err = queue_gso_packets(dp, skb, upcall_info);
if (err)
goto err;
@@ -310,8 +302,7 @@ err:
return err;
}
-static int queue_gso_packets(struct net *net, int dp_ifindex,
- struct sk_buff *skb,
+static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
const struct dp_upcall_info *upcall_info)
{
unsigned short gso_type = skb_shinfo(skb)->gso_type;
@@ -320,14 +311,14 @@ static int queue_gso_packets(struct net *net, int dp_ifindex,
struct sk_buff *segs, *nskb;
int err;
- segs = __skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM, false);
+ segs = __skb_gso_segment(skb, NETIF_F_SG, false);
if (IS_ERR(segs))
return PTR_ERR(segs);
/* Queue all of the segments. */
skb = segs;
do {
- err = queue_userspace_packet(net, dp_ifindex, skb, upcall_info);
+ err = queue_userspace_packet(dp, skb, upcall_info);
if (err)
break;
@@ -380,11 +371,11 @@ static size_t key_attr_size(void)
+ nla_total_size(28); /* OVS_KEY_ATTR_ND */
}
-static size_t upcall_msg_size(const struct sk_buff *skb,
- const struct nlattr *userdata)
+static size_t upcall_msg_size(const struct nlattr *userdata,
+ unsigned int hdrlen)
{
size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
- + nla_total_size(skb->len) /* OVS_PACKET_ATTR_PACKET */
+ + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
+ nla_total_size(key_attr_size()); /* OVS_PACKET_ATTR_KEY */
/* OVS_PACKET_ATTR_USERDATA */
@@ -394,15 +385,24 @@ static size_t upcall_msg_size(const struct sk_buff *skb,
return size;
}
-static int queue_userspace_packet(struct net *net, int dp_ifindex,
- struct sk_buff *skb,
+static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
const struct dp_upcall_info *upcall_info)
{
struct ovs_header *upcall;
struct sk_buff *nskb = NULL;
struct sk_buff *user_skb; /* to be queued to userspace */
struct nlattr *nla;
- int err;
+ struct genl_info info = {
+ .dst_sk = ovs_dp_get_net(dp)->genl_sock,
+ .snd_portid = upcall_info->portid,
+ };
+ size_t len;
+ unsigned int hlen;
+ int err, dp_ifindex;
+
+ dp_ifindex = get_dpifindex(dp);
+ if (!dp_ifindex)
+ return -ENODEV;
if (vlan_tx_tag_present(skb)) {
nskb = skb_clone(skb, GFP_ATOMIC);
@@ -422,7 +422,22 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,
goto out;
}
- user_skb = genlmsg_new(upcall_msg_size(skb, upcall_info->userdata), GFP_ATOMIC);
+ /* Complete checksum if needed */
+ if (skb->ip_summed == CHECKSUM_PARTIAL &&
+ (err = skb_checksum_help(skb)))
+ goto out;
+
+ /* Older versions of OVS user space enforce alignment of the last
+ * Netlink attribute to NLA_ALIGNTO which would require extensive
+ * padding logic. Only perform zerocopy if padding is not required.
+ */
+ if (dp->user_features & OVS_DP_F_UNALIGNED)
+ hlen = skb_zerocopy_headlen(skb);
+ else
+ hlen = skb->len;
+
+ len = upcall_msg_size(upcall_info->userdata, hlen);
+ user_skb = genlmsg_new_unicast(len, &info, GFP_ATOMIC);
if (!user_skb) {
err = -ENOMEM;
goto out;
@@ -441,26 +456,24 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,
nla_len(upcall_info->userdata),
nla_data(upcall_info->userdata));
- nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len);
+ /* Only reserve room for attribute header, packet data is added
+ * in skb_zerocopy() */
+ if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
+ err = -ENOBUFS;
+ goto out;
+ }
+ nla->nla_len = nla_attr_size(skb->len);
- skb_copy_and_csum_dev(skb, nla_data(nla));
+ skb_zerocopy(user_skb, skb, skb->len, hlen);
- genlmsg_end(user_skb, upcall);
- err = genlmsg_unicast(net, user_skb, upcall_info->portid);
+ ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
+ err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);
out:
kfree_skb(nskb);
return err;
}
-static void clear_stats(struct sw_flow *flow)
-{
- flow->used = 0;
- flow->tcp_flags = 0;
- flow->packet_count = 0;
- flow->byte_count = 0;
-}
-
static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
{
struct ovs_header *ovs_header = info->userhdr;
@@ -499,7 +512,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
packet->protocol = htons(ETH_P_802_2);
/* Build an sw_flow for sending this packet. */
- flow = ovs_flow_alloc();
+ flow = ovs_flow_alloc(false);
err = PTR_ERR(flow);
if (IS_ERR(flow))
goto err_kfree_skb;
@@ -635,10 +648,10 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
const int skb_orig_len = skb->len;
struct nlattr *start;
struct ovs_flow_stats stats;
+ __be16 tcp_flags;
+ unsigned long used;
struct ovs_header *ovs_header;
struct nlattr *nla;
- unsigned long used;
- u8 tcp_flags;
int err;
ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd);
@@ -667,24 +680,17 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
nla_nest_end(skb, nla);
- spin_lock_bh(&flow->lock);
- used = flow->used;
- stats.n_packets = flow->packet_count;
- stats.n_bytes = flow->byte_count;
- tcp_flags = (u8)ntohs(flow->tcp_flags);
- spin_unlock_bh(&flow->lock);
-
+ ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
if (used &&
nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used)))
goto nla_put_failure;
if (stats.n_packets &&
- nla_put(skb, OVS_FLOW_ATTR_STATS,
- sizeof(struct ovs_flow_stats), &stats))
+ nla_put(skb, OVS_FLOW_ATTR_STATS, sizeof(struct ovs_flow_stats), &stats))
goto nla_put_failure;
- if (tcp_flags &&
- nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, tcp_flags))
+ if ((u8)ntohs(tcp_flags) &&
+ nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags)))
goto nla_put_failure;
/* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
@@ -701,8 +707,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
if (start) {
const struct sw_flow_actions *sf_acts;
- sf_acts = rcu_dereference_check(flow->sf_acts,
- lockdep_ovsl_is_held());
+ sf_acts = rcu_dereference_ovsl(flow->sf_acts);
err = ovs_nla_put_actions(sf_acts->actions,
sf_acts->actions_len, skb);
@@ -726,39 +731,34 @@ error:
return err;
}
-static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow)
+static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow,
+ struct genl_info *info)
{
- const struct sw_flow_actions *sf_acts;
+ size_t len;
- sf_acts = ovsl_dereference(flow->sf_acts);
+ len = ovs_flow_cmd_msg_size(ovsl_dereference(flow->sf_acts));
- return genlmsg_new(ovs_flow_cmd_msg_size(sf_acts), GFP_KERNEL);
+ return genlmsg_new_unicast(len, info, GFP_KERNEL);
}
static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow,
struct datapath *dp,
- u32 portid, u32 seq, u8 cmd)
+ struct genl_info *info,
+ u8 cmd)
{
struct sk_buff *skb;
int retval;
- skb = ovs_flow_cmd_alloc_info(flow);
+ skb = ovs_flow_cmd_alloc_info(flow, info);
if (!skb)
return ERR_PTR(-ENOMEM);
- retval = ovs_flow_cmd_fill_info(flow, dp, skb, portid, seq, 0, cmd);
+ retval = ovs_flow_cmd_fill_info(flow, dp, skb, info->snd_portid,
+ info->snd_seq, 0, cmd);
BUG_ON(retval < 0);
return skb;
}
-static struct sw_flow *__ovs_flow_tbl_lookup(struct flow_table *tbl,
- const struct sw_flow_key *key)
-{
- u32 __always_unused n_mask_hit;
-
- return ovs_flow_tbl_lookup(tbl, key, &n_mask_hit);
-}
-
static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr **a = info->attrs;
@@ -770,6 +770,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
struct datapath *dp;
struct sw_flow_actions *acts = NULL;
struct sw_flow_match match;
+ bool exact_5tuple;
int error;
/* Extract key. */
@@ -778,7 +779,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
goto error;
ovs_match_init(&match, &key, &mask);
- error = ovs_nla_get_match(&match,
+ error = ovs_nla_get_match(&match, &exact_5tuple,
a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]);
if (error)
goto error;
@@ -809,7 +810,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
goto err_unlock_ovs;
/* Check if this is a duplicate flow */
- flow = __ovs_flow_tbl_lookup(&dp->table, &key);
+ flow = ovs_flow_tbl_lookup(&dp->table, &key);
if (!flow) {
/* Bail out if we're not allowed to create a new flow. */
error = -ENOENT;
@@ -817,12 +818,11 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
goto err_unlock_ovs;
/* Allocate flow. */
- flow = ovs_flow_alloc();
+ flow = ovs_flow_alloc(!exact_5tuple);
if (IS_ERR(flow)) {
error = PTR_ERR(flow);
goto err_unlock_ovs;
}
- clear_stats(flow);
flow->key = masked_key;
flow->unmasked_key = key;
@@ -835,8 +835,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
goto err_flow_free;
}
- reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
- info->snd_seq, OVS_FLOW_CMD_NEW);
+ reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW);
} else {
/* We found a matching flow. */
struct sw_flow_actions *old_acts;
@@ -864,15 +863,11 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
rcu_assign_pointer(flow->sf_acts, acts);
ovs_nla_free_flow_actions(old_acts);
- reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
- info->snd_seq, OVS_FLOW_CMD_NEW);
+ reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW);
/* Clear stats. */
- if (a[OVS_FLOW_ATTR_CLEAR]) {
- spin_lock_bh(&flow->lock);
- clear_stats(flow);
- spin_unlock_bh(&flow->lock);
- }
+ if (a[OVS_FLOW_ATTR_CLEAR])
+ ovs_flow_stats_clear(flow);
}
ovs_unlock();
@@ -910,7 +905,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
}
ovs_match_init(&match, &key, NULL);
- err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL);
+ err = ovs_nla_get_match(&match, NULL, a[OVS_FLOW_ATTR_KEY], NULL);
if (err)
return err;
@@ -921,14 +916,13 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
goto unlock;
}
- flow = __ovs_flow_tbl_lookup(&dp->table, &key);
+ flow = ovs_flow_tbl_lookup(&dp->table, &key);
if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) {
err = -ENOENT;
goto unlock;
}
- reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
- info->snd_seq, OVS_FLOW_CMD_NEW);
+ reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW);
if (IS_ERR(reply)) {
err = PTR_ERR(reply);
goto unlock;
@@ -965,17 +959,17 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
}
ovs_match_init(&match, &key, NULL);
- err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL);
+ err = ovs_nla_get_match(&match, NULL, a[OVS_FLOW_ATTR_KEY], NULL);
if (err)
goto unlock;
- flow = __ovs_flow_tbl_lookup(&dp->table, &key);
+ flow = ovs_flow_tbl_lookup(&dp->table, &key);
if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) {
err = -ENOENT;
goto unlock;
}
- reply = ovs_flow_cmd_alloc_info(flow);
+ reply = ovs_flow_cmd_alloc_info(flow, info);
if (!reply) {
err = -ENOMEM;
goto unlock;
@@ -1061,6 +1055,7 @@ static const struct genl_ops dp_flow_genl_ops[] = {
static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
[OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
[OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
+ [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
};
static struct genl_family dp_datapath_genl_family = {
@@ -1119,6 +1114,9 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
&dp_megaflow_stats))
goto nla_put_failure;
+ if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))
+ goto nla_put_failure;
+
return genlmsg_end(skb, ovs_header);
nla_put_failure:
@@ -1127,17 +1125,17 @@ error:
return -EMSGSIZE;
}
-static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid,
- u32 seq, u8 cmd)
+static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp,
+ struct genl_info *info, u8 cmd)
{
struct sk_buff *skb;
int retval;
- skb = genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL);
+ skb = genlmsg_new_unicast(ovs_dp_cmd_msg_size(), info, GFP_KERNEL);
if (!skb)
return ERR_PTR(-ENOMEM);
- retval = ovs_dp_cmd_fill_info(dp, skb, portid, seq, 0, cmd);
+ retval = ovs_dp_cmd_fill_info(dp, skb, info->snd_portid, info->snd_seq, 0, cmd);
if (retval < 0) {
kfree_skb(skb);
return ERR_PTR(retval);
@@ -1165,6 +1163,24 @@ static struct datapath *lookup_datapath(struct net *net,
return dp ? dp : ERR_PTR(-ENODEV);
}
+static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *info)
+{
+ struct datapath *dp;
+
+ dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
+ if (!dp)
+ return;
+
+ WARN(dp->user_features, "Dropping previously announced user features\n");
+ dp->user_features = 0;
+}
+
+static void ovs_dp_change(struct datapath *dp, struct nlattr **a)
+{
+ if (a[OVS_DP_ATTR_USER_FEATURES])
+ dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
+}
+
static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr **a = info->attrs;
@@ -1223,17 +1239,27 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
parms.port_no = OVSP_LOCAL;
parms.upcall_portid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]);
+ ovs_dp_change(dp, a);
+
vport = new_vport(&parms);
if (IS_ERR(vport)) {
err = PTR_ERR(vport);
if (err == -EBUSY)
err = -EEXIST;
+ if (err == -EEXIST) {
+ /* An outdated user space instance that does not understand
+ * the concept of user_features has attempted to create a new
+ * datapath and is likely to reuse it. Drop all user features.
+ */
+ if (info->genlhdr->version < OVS_DP_VER_FEATURES)
+ ovs_dp_reset_user_features(skb, info);
+ }
+
goto err_destroy_ports_array;
}
- reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
- info->snd_seq, OVS_DP_CMD_NEW);
+ reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW);
err = PTR_ERR(reply);
if (IS_ERR(reply))
goto err_destroy_local_port;
@@ -1299,8 +1325,7 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
if (IS_ERR(dp))
goto unlock;
- reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
- info->snd_seq, OVS_DP_CMD_DEL);
+ reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_DEL);
err = PTR_ERR(reply);
if (IS_ERR(reply))
goto unlock;
@@ -1328,8 +1353,9 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
if (IS_ERR(dp))
goto unlock;
- reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
- info->snd_seq, OVS_DP_CMD_NEW);
+ ovs_dp_change(dp, info->attrs);
+
+ reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW);
if (IS_ERR(reply)) {
err = PTR_ERR(reply);
genl_set_err(&dp_datapath_genl_family, sock_net(skb->sk), 0,
@@ -1360,8 +1386,7 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
goto unlock;
}
- reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
- info->snd_seq, OVS_DP_CMD_NEW);
+ reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW);
if (IS_ERR(reply)) {
err = PTR_ERR(reply);
goto unlock;
@@ -1441,7 +1466,7 @@ struct genl_family dp_vport_genl_family = {
.parallel_ops = true,
};
-struct genl_multicast_group ovs_dp_vport_multicast_group = {
+static struct genl_multicast_group ovs_dp_vport_multicast_group = {
.name = OVS_VPORT_MCGROUP
};
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 4067ea41be28..6be9fbb5e9cb 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -88,6 +88,8 @@ struct datapath {
/* Network namespace ref. */
struct net *net;
#endif
+
+ u32 user_features;
};
/**
@@ -145,6 +147,8 @@ int lockdep_ovsl_is_held(void);
#define ASSERT_OVSL() WARN_ON(unlikely(!lockdep_ovsl_is_held()))
#define ovsl_dereference(p) \
rcu_dereference_protected(p, lockdep_ovsl_is_held())
+#define rcu_dereference_ovsl(p) \
+ rcu_dereference_check(p, lockdep_ovsl_is_held())
static inline struct net *ovs_dp_get_net(struct datapath *dp)
{
@@ -178,14 +182,12 @@ static inline struct vport *ovs_vport_ovsl(const struct datapath *dp, int port_n
extern struct notifier_block ovs_dp_device_notifier;
extern struct genl_family dp_vport_genl_family;
-extern struct genl_multicast_group ovs_dp_vport_multicast_group;
void ovs_dp_process_received_packet(struct vport *, struct sk_buff *);
void ovs_dp_detach_port(struct vport *);
int ovs_dp_upcall(struct datapath *, struct sk_buff *,
const struct dp_upcall_info *);
-const char *ovs_dp_name(const struct datapath *dp);
struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq,
u8 cmd);
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index b409f5279601..16f4b46161d4 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -35,6 +35,7 @@
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/sctp.h>
+#include <linux/smp.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/icmp.h>
@@ -60,10 +61,16 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies)
#define TCP_FLAGS_BE16(tp) (*(__be16 *)&tcp_flag_word(tp) & htons(0x0FFF))
-void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb)
+void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb)
{
+ struct flow_stats *stats;
__be16 tcp_flags = 0;
+ if (!flow->stats.is_percpu)
+ stats = flow->stats.stat;
+ else
+ stats = this_cpu_ptr(flow->stats.cpu_stats);
+
if ((flow->key.eth.type == htons(ETH_P_IP) ||
flow->key.eth.type == htons(ETH_P_IPV6)) &&
flow->key.ip.proto == IPPROTO_TCP &&
@@ -71,12 +78,87 @@ void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb)
tcp_flags = TCP_FLAGS_BE16(tcp_hdr(skb));
}
- spin_lock(&flow->lock);
- flow->used = jiffies;
- flow->packet_count++;
- flow->byte_count += skb->len;
- flow->tcp_flags |= tcp_flags;
- spin_unlock(&flow->lock);
+ spin_lock(&stats->lock);
+ stats->used = jiffies;
+ stats->packet_count++;
+ stats->byte_count += skb->len;
+ stats->tcp_flags |= tcp_flags;
+ spin_unlock(&stats->lock);
+}
+
+static void stats_read(struct flow_stats *stats,
+ struct ovs_flow_stats *ovs_stats,
+ unsigned long *used, __be16 *tcp_flags)
+{
+ spin_lock(&stats->lock);
+ if (time_after(stats->used, *used))
+ *used = stats->used;
+ *tcp_flags |= stats->tcp_flags;
+ ovs_stats->n_packets += stats->packet_count;
+ ovs_stats->n_bytes += stats->byte_count;
+ spin_unlock(&stats->lock);
+}
+
+void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats,
+ unsigned long *used, __be16 *tcp_flags)
+{
+ int cpu, cur_cpu;
+
+ *used = 0;
+ *tcp_flags = 0;
+ memset(ovs_stats, 0, sizeof(*ovs_stats));
+
+ if (!flow->stats.is_percpu) {
+ stats_read(flow->stats.stat, ovs_stats, used, tcp_flags);
+ } else {
+ cur_cpu = get_cpu();
+ for_each_possible_cpu(cpu) {
+ struct flow_stats *stats;
+
+ if (cpu == cur_cpu)
+ local_bh_disable();
+
+ stats = per_cpu_ptr(flow->stats.cpu_stats, cpu);
+ stats_read(stats, ovs_stats, used, tcp_flags);
+
+ if (cpu == cur_cpu)
+ local_bh_enable();
+ }
+ put_cpu();
+ }
+}
+
+static void stats_reset(struct flow_stats *stats)
+{
+ spin_lock(&stats->lock);
+ stats->used = 0;
+ stats->packet_count = 0;
+ stats->byte_count = 0;
+ stats->tcp_flags = 0;
+ spin_unlock(&stats->lock);
+}
+
+void ovs_flow_stats_clear(struct sw_flow *flow)
+{
+ int cpu, cur_cpu;
+
+ if (!flow->stats.is_percpu) {
+ stats_reset(flow->stats.stat);
+ } else {
+ cur_cpu