summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2020-09-24 19:54:40 -0700
committerDavid S. Miller <davem@davemloft.net>2020-09-24 19:54:40 -0700
commit075c156850f6915ff5d040917043c08d679f4539 (patch)
treee5650688ad0cecab9e6742dd7bbcd5f522a5b992
parent54ce00ae361a12a3e4801c937a1b19d52f8c2181 (diff)
parent987cd5f049a2b5ed46901f6a874040a08d21d31f (diff)
Merge tag 'mlx5-updates-2020-09-22' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux
Saeed Mahameed says: ==================== mlx5-updates-2020-09-22 This series includes mlx5 updates 1) Add support for Connection Tracking offload in NIC mode. Supporting CT offload in NIC mode on Mellanox cards is useful for scenarios where the dual port NIC serves as a gateway between 2 networks and forwards traffic between these networks. Since the traffic is not terminated on the host in this case, no use of SRIOV VFs and/or switchdev mode is required. Today Mellanox NIC cards already support offloading of packet forwarding between physical ports without going to the host so combining it with CT offloading allows users to create a gateway with forwarding and CT (Including NAT) offloading capabilities in non-switchdev mode. To support connection tracking in non-Switchdev mode (Single NIC mode), we need to make use of the current Connection tracking infrastructure implemented on top of E-Switch and the mlx5 generic flow table chains APIs, to make it work on non-Eswitch steering domain e.g. NIC RX domain, the following was performed: 1.1) Refactor current flow steering chains infrastructure and updates TC nic mode implementation to use flow table chains. 1.2) Refactor current Connection Tracking (CT) infrastructure to not assume E-switch backend, and make the CT layer agnostic to underlying steering mode (E-Switch/NIC) 1.3) Plumbing to support CT offload in NIC mode. 2) Trivial code cleanups. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ecpf.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/fs.h7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c22
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c525
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h75
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c865
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.h97
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/chains.c944
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/chains.h68
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h39
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c309
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c911
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h93
21 files changed, 2339 insertions, 1658 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 0b3eaa102751..9826a041e407 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -37,7 +37,7 @@ mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o
mlx5_core-$(CONFIG_MLX5_ESWITCH) += lag_mp.o lib/geneve.o lib/port_tun.o \
en_rep.o en/rep/bond.o en/mod_hdr.o
mlx5_core-$(CONFIG_MLX5_CLS_ACT) += en_tc.o en/rep/tc.o en/rep/neigh.o \
- en/mapping.o esw/chains.o en/tc_tun.o \
+ en/mapping.o lib/fs_chains.o en/tc_tun.o \
en/tc_tun_vxlan.o en/tc_tun_gre.o en/tc_tun_geneve.o \
en/tc_tun_mplsoudp.o diag/en_tc_tracepoint.o
mlx5_core-$(CONFIG_MLX5_TC_CT) += en/tc_ct.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
index a894ea98c95a..3dc9dd3f24dc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
@@ -43,19 +43,13 @@ static void mlx5_peer_pf_cleanup(struct mlx5_core_dev *dev)
int mlx5_ec_init(struct mlx5_core_dev *dev)
{
- int err = 0;
-
if (!mlx5_core_is_ecpf(dev))
return 0;
/* ECPF shall enable HCA for peer PF in the same way a PF
* does this for its VFs.
*/
- err = mlx5_peer_pf_init(dev);
- if (err)
- return err;
-
- return 0;
+ return mlx5_peer_pf_init(dev);
}
void mlx5_ec_cleanup(struct mlx5_core_dev *dev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
index 6fdcd5e69476..6a97452dc60e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
@@ -12,9 +12,12 @@ enum {
};
struct mlx5e_tc_table {
- /* protects flow table */
+ /* Protects the dynamic assignment of the t parameter
+ * which is the nic tc root table.
+ */
struct mutex t_lock;
struct mlx5_flow_table *t;
+ struct mlx5_fs_chains *chains;
struct rhashtable ht;
@@ -24,6 +27,8 @@ struct mlx5e_tc_table {
struct notifier_block netdevice_nb;
struct netdev_net_notifier netdevice_nn;
+
+ struct mlx5_tc_ct_priv *ct;
};
struct mlx5e_flow_table {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
index 79cc42d88eec..e36e505d38ad 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
@@ -12,7 +12,7 @@
#include "neigh.h"
#include "en_rep.h"
#include "eswitch.h"
-#include "esw/chains.h"
+#include "lib/fs_chains.h"
#include "en/tc_ct.h"
#include "en/mapping.h"
#include "en/tc_tun.h"
@@ -191,7 +191,7 @@ static int mlx5e_rep_setup_ft_cb(enum tc_setup_type type, void *type_data,
case TC_SETUP_CLSFLOWER:
memcpy(&tmp, f, sizeof(*f));
- if (!mlx5_esw_chains_prios_supported(esw))
+ if (!mlx5_chains_prios_supported(esw_chains(esw)))
return -EOPNOTSUPP;
/* Re-use tc offload path by moving the ft flow to the
@@ -203,12 +203,12 @@ static int mlx5e_rep_setup_ft_cb(enum tc_setup_type type, void *type_data,
*
* We only support chain 0 of FT offload.
*/
- if (tmp.common.prio >= mlx5_esw_chains_get_prio_range(esw))
+ if (tmp.common.prio >= mlx5_chains_get_prio_range(esw_chains(esw)))
return -EOPNOTSUPP;
if (tmp.common.chain_index != 0)
return -EOPNOTSUPP;
- tmp.common.chain_index = mlx5_esw_chains_get_ft_chain(esw);
+ tmp.common.chain_index = mlx5_chains_get_nf_ft_chain(esw_chains(esw));
tmp.common.prio++;
err = mlx5e_rep_setup_tc_cls_flower(priv, &tmp, flags);
memcpy(&f->stats, &tmp.stats, sizeof(f->stats));
@@ -378,12 +378,12 @@ static int mlx5e_rep_indr_setup_ft_cb(enum tc_setup_type type,
*
* We only support chain 0 of FT offload.
*/
- if (!mlx5_esw_chains_prios_supported(esw) ||
- tmp.common.prio >= mlx5_esw_chains_get_prio_range(esw) ||
+ if (!mlx5_chains_prios_supported(esw_chains(esw)) ||
+ tmp.common.prio >= mlx5_chains_get_prio_range(esw_chains(esw)) ||
tmp.common.chain_index)
return -EOPNOTSUPP;
- tmp.common.chain_index = mlx5_esw_chains_get_ft_chain(esw);
+ tmp.common.chain_index = mlx5_chains_get_nf_ft_chain(esw_chains(esw));
tmp.common.prio++;
err = mlx5e_rep_indr_offload(priv->netdev, &tmp, priv, flags);
memcpy(&f->stats, &tmp.stats, sizeof(f->stats));
@@ -612,7 +612,6 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
struct tc_skb_ext *tc_skb_ext;
struct mlx5_eswitch *esw;
struct mlx5e_priv *priv;
- int tunnel_moffset;
int err;
reg_c0 = (be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK);
@@ -626,7 +625,7 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
priv = netdev_priv(skb->dev);
esw = priv->mdev->priv.eswitch;
- err = mlx5_eswitch_get_chain_for_tag(esw, reg_c0, &chain);
+ err = mlx5_get_chain_for_tag(esw_chains(esw), reg_c0, &chain);
if (err) {
netdev_dbg(priv->netdev,
"Couldn't find chain for chain tag: %d, err: %d\n",
@@ -647,13 +646,12 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
uplink_priv = &uplink_rpriv->uplink_priv;
- if (!mlx5e_tc_ct_restore_flow(uplink_priv, skb,
+ if (!mlx5e_tc_ct_restore_flow(uplink_priv->ct_priv, skb,
zone_restore_id))
return false;
}
- tunnel_moffset = mlx5e_tc_attr_to_reg_mappings[TUNNEL_TO_REG].moffset;
- tunnel_id = reg_c1 >> (8 * tunnel_moffset);
+ tunnel_id = reg_c1 >> REG_MAPPING_SHIFT(TUNNEL_TO_REG);
return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id);
#endif /* CONFIG_NET_TC_SKB_EXT */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index bc5f72ec3623..b5f8ed30047b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -14,7 +14,7 @@
#include <linux/workqueue.h>
#include <linux/xarray.h>
-#include "esw/chains.h"
+#include "lib/fs_chains.h"
#include "en/tc_ct.h"
#include "en/mod_hdr.h"
#include "en/mapping.h"
@@ -39,8 +39,9 @@
netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args)
struct mlx5_tc_ct_priv {
- struct mlx5_eswitch *esw;
+ struct mlx5_core_dev *dev;
const struct net_device *netdev;
+ struct mod_hdr_tbl *mod_hdr_tbl;
struct idr fte_ids;
struct xarray tuple_ids;
struct rhashtable zone_ht;
@@ -50,13 +51,16 @@ struct mlx5_tc_ct_priv {
struct mlx5_flow_table *ct_nat;
struct mlx5_flow_table *post_ct;
struct mutex control_lock; /* guards parallel adds/dels */
+ struct mutex shared_counter_lock;
struct mapping_ctx *zone_mapping;
struct mapping_ctx *labels_mapping;
+ enum mlx5_flow_namespace_type ns_type;
+ struct mlx5_fs_chains *chains;
};
struct mlx5_ct_flow {
- struct mlx5_esw_flow_attr pre_ct_attr;
- struct mlx5_esw_flow_attr post_ct_attr;
+ struct mlx5_flow_attr *pre_ct_attr;
+ struct mlx5_flow_attr *post_ct_attr;
struct mlx5_flow_handle *pre_ct_rule;
struct mlx5_flow_handle *post_ct_rule;
struct mlx5_ct_ft *ft;
@@ -67,12 +71,12 @@ struct mlx5_ct_flow {
struct mlx5_ct_zone_rule {
struct mlx5_flow_handle *rule;
struct mlx5e_mod_hdr_handle *mh;
- struct mlx5_esw_flow_attr attr;
+ struct mlx5_flow_attr *attr;
bool nat;
};
struct mlx5_tc_ct_pre {
- struct mlx5_flow_table *fdb;
+ struct mlx5_flow_table *ft;
struct mlx5_flow_group *flow_grp;
struct mlx5_flow_group *miss_grp;
struct mlx5_flow_handle *flow_rule;
@@ -114,11 +118,16 @@ struct mlx5_ct_tuple {
u16 zone;
};
+struct mlx5_ct_shared_counter {
+ struct mlx5_fc *counter;
+ refcount_t refcount;
+};
+
struct mlx5_ct_entry {
struct rhash_head node;
struct rhash_head tuple_node;
struct rhash_head tuple_nat_node;
- struct mlx5_fc *counter;
+ struct mlx5_ct_shared_counter *shared_counter;
unsigned long cookie;
unsigned long restore_cookie;
struct mlx5_ct_tuple tuple;
@@ -157,18 +166,6 @@ static const struct rhashtable_params tuples_nat_ht_params = {
.min_size = 16 * 1024,
};
-static struct mlx5_tc_ct_priv *
-mlx5_tc_ct_get_ct_priv(struct mlx5e_priv *priv)
-{
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mlx5_rep_uplink_priv *uplink_priv;
- struct mlx5e_rep_priv *uplink_rpriv;
-
- uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
- uplink_priv = &uplink_rpriv->uplink_priv;
- return uplink_priv->ct_priv;
-}
-
static int
mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule)
{
@@ -395,20 +392,30 @@ mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec,
}
static void
+mlx5_tc_ct_shared_counter_put(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry)
+{
+ if (!refcount_dec_and_test(&entry->shared_counter->refcount))
+ return;
+
+ mlx5_fc_destroy(ct_priv->dev, entry->shared_counter->counter);
+ kfree(entry->shared_counter);
+}
+
+static void
mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv,
struct mlx5_ct_entry *entry,
bool nat)
{
struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
- struct mlx5_esw_flow_attr *attr = &zone_rule->attr;
- struct mlx5_eswitch *esw = ct_priv->esw;
+ struct mlx5_flow_attr *attr = zone_rule->attr;
ct_dbg("Deleting ct entry rule in zone %d", entry->tuple.zone);
- mlx5_eswitch_del_offloaded_rule(esw, zone_rule->rule, attr);
- mlx5e_mod_hdr_detach(ct_priv->esw->dev,
- &esw->offloads.mod_hdr, zone_rule->mh);
+ mlx5_tc_rule_delete(netdev_priv(ct_priv->netdev), zone_rule->rule, attr);
+ mlx5e_mod_hdr_detach(ct_priv->dev,
+ ct_priv->mod_hdr_tbl, zone_rule->mh);
mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id);
+ kfree(attr);
}
static void
@@ -417,8 +424,6 @@ mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv,
{
mlx5_tc_ct_entry_del_rule(ct_priv, entry, true);
mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
-
- mlx5_fc_destroy(ct_priv->esw->dev, entry->counter);
}
static struct flow_action_entry *
@@ -444,29 +449,40 @@ mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv,
u32 labels_id,
u8 zone_restore_id)
{
- struct mlx5_eswitch *esw = ct_priv->esw;
+ enum mlx5_flow_namespace_type ns = ct_priv->ns_type;
+ struct mlx5_core_dev *dev = ct_priv->dev;
int err;
- err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts,
+ err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
CTSTATE_TO_REG, ct_state);
if (err)
return err;
- err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts,
+ err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
MARK_TO_REG, mark);
if (err)
return err;
- err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts,
+ err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
LABELS_TO_REG, labels_id);
if (err)
return err;
- err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts,
+ err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
ZONE_RESTORE_TO_REG, zone_restore_id);
if (err)
return err;
+ /* Make another copy of zone id in reg_b for
+ * NIC rx flows since we don't copy reg_c1 to
+ * reg_b upon miss.
+ */
+ if (ns != MLX5_FLOW_NAMESPACE_FDB) {
+ err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
+ NIC_ZONE_RESTORE_TO_REG, zone_restore_id);
+ if (err)
+ return err;
+ }
return 0;
}
@@ -547,7 +563,7 @@ mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv,
struct mlx5e_tc_mod_hdr_acts *mod_acts)
{
struct flow_action *flow_action = &flow_rule->action;
- struct mlx5_core_dev *mdev = ct_priv->esw->dev;
+ struct mlx5_core_dev *mdev = ct_priv->dev;
struct flow_action_entry *act;
size_t action_size;
char *modact;
@@ -558,8 +574,7 @@ mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv,
flow_action_for_each(i, act, flow_action) {
switch (act->id) {
case FLOW_ACTION_MANGLE: {
- err = alloc_mod_hdr_actions(mdev,
- MLX5_FLOW_NAMESPACE_FDB,
+ err = alloc_mod_hdr_actions(mdev, ct_priv->ns_type,
mod_acts);
if (err)
return err;
@@ -588,7 +603,7 @@ mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv,
static int
mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
- struct mlx5_esw_flow_attr *attr,
+ struct mlx5_flow_attr *attr,
struct flow_rule *flow_rule,
struct mlx5e_mod_hdr_handle **mh,
u8 zone_restore_id, bool nat)
@@ -624,9 +639,9 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
if (err)
goto err_mapping;
- *mh = mlx5e_mod_hdr_attach(ct_priv->esw->dev,
- &ct_priv->esw->offloads.mod_hdr,
- MLX5_FLOW_NAMESPACE_FDB,
+ *mh = mlx5e_mod_hdr_attach(ct_priv->dev,
+ ct_priv->mod_hdr_tbl,
+ ct_priv->ns_type,
&mod_acts);
if (IS_ERR(*mh)) {
err = PTR_ERR(*mh);
@@ -650,9 +665,9 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
bool nat, u8 zone_restore_id)
{
struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
- struct mlx5_esw_flow_attr *attr = &zone_rule->attr;
- struct mlx5_eswitch *esw = ct_priv->esw;
+ struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
struct mlx5_flow_spec *spec = NULL;
+ struct mlx5_flow_attr *attr;
int err;
zone_rule->nat = nat;
@@ -661,6 +676,12 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
if (!spec)
return -ENOMEM;
+ attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
+ if (!attr) {
+ err = -ENOMEM;
+ goto err_attr;
+ }
+
err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule,
&zone_rule->mh,
zone_restore_id, nat);
@@ -674,9 +695,9 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
MLX5_FLOW_CONTEXT_ACTION_COUNT;
attr->dest_chain = 0;
attr->dest_ft = ct_priv->post_ct;
- attr->fdb = nat ? ct_priv->ct_nat : ct_priv->ct;
+ attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct;
attr->outer_match_level = MLX5_MATCH_L4;
- attr->counter = entry->counter;
+ attr->counter = entry->shared_counter->counter;
attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT;
mlx5_tc_ct_set_tuple_match(netdev_priv(ct_priv->netdev), spec, flow_rule);
@@ -684,39 +705,98 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
entry->tuple.zone & MLX5_CT_ZONE_MASK,
MLX5_CT_ZONE_MASK);
- zone_rule->rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
+ zone_rule->rule = mlx5_tc_rule_insert(priv, spec, attr);
if (IS_ERR(zone_rule->rule)) {
err = PTR_ERR(zone_rule->rule);
ct_dbg("Failed to add ct entry rule, nat: %d", nat);
goto err_rule;
}
+ zone_rule->attr = attr;
+
kfree(spec);
ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone);
return 0;
err_rule:
- mlx5e_mod_hdr_detach(ct_priv->esw->dev,
- &esw->offloads.mod_hdr, zone_rule->mh);
+ mlx5e_mod_hdr_detach(ct_priv->dev,
+ ct_priv->mod_hdr_tbl, zone_rule->mh);
mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id);
err_mod_hdr:
+ kfree(attr);
+err_attr:
kfree(spec);
return err;
}
+static struct mlx5_ct_shared_counter *
+mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5_ct_entry *entry)
+{
+ struct mlx5_ct_tuple rev_tuple = entry->tuple;
+ struct mlx5_ct_shared_counter *shared_counter;
+ struct mlx5_core_dev *dev = ct_priv->dev;
+ struct mlx5_ct_entry *rev_entry;
+ __be16 tmp_port;
+
+ /* get the reversed tuple */
+ tmp_port = rev_tuple.port.src;
+ rev_tuple.port.src = rev_tuple.port.dst;
+ rev_tuple.port.dst = tmp_port;
+
+ if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+ __be32 tmp_addr = rev_tuple.ip.src_v4;
+
+ rev_tuple.ip.src_v4 = rev_tuple.ip.dst_v4;
+ rev_tuple.ip.dst_v4 = tmp_addr;
+ } else if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+ struct in6_addr tmp_addr = rev_tuple.ip.src_v6;
+
+ rev_tuple.ip.src_v6 = rev_tuple.ip.dst_v6;
+ rev_tuple.ip.dst_v6 = tmp_addr;
+ } else {
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ /* Use the same counter as the reverse direction */
+ mutex_lock(&ct_priv->shared_counter_lock);
+ rev_entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, &rev_tuple,
+ tuples_ht_params);
+ if (rev_entry) {
+ if (refcount_inc_not_zero(&rev_entry->shared_counter->refcount)) {
+ mutex_unlock(&ct_priv->shared_counter_lock);
+ return rev_entry->shared_counter;
+ }
+ }
+ mutex_unlock(&ct_priv->shared_counter_lock);
+
+ shared_counter = kzalloc(sizeof(*shared_counter), GFP_KERNEL);
+ if (!shared_counter)
+ return ERR_PTR(-ENOMEM);
+
+ shared_counter->counter = mlx5_fc_create(dev, true);
+ if (IS_ERR(shared_counter->counter)) {
+ ct_dbg("Failed to create counter for ct entry");
+ kfree(shared_counter);
+ return ERR_PTR(PTR_ERR(shared_counter->counter));
+ }
+
+ refcount_set(&shared_counter->refcount, 1);
+ return shared_counter;
+}
+
static int
mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv,
struct flow_rule *flow_rule,
struct mlx5_ct_entry *entry,
u8 zone_restore_id)
{
- struct mlx5_eswitch *esw = ct_priv->esw;
int err;
- entry->counter = mlx5_fc_create(esw->dev, true);
- if (IS_ERR(entry->counter)) {
- err = PTR_ERR(entry->counter);
+ entry->shared_counter = mlx5_tc_ct_shared_counter_get(ct_priv, entry);
+ if (IS_ERR(entry->shared_counter)) {
+ err = PTR_ERR(entry->shared_counter);
ct_dbg("Failed to create counter for ct entry");
return err;
}
@@ -736,7 +816,7 @@ mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv,
err_nat:
mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
err_orig:
- mlx5_fc_destroy(esw->dev, entry->counter);
+ mlx5_tc_ct_shared_counter_put(ct_priv, entry);
return err;
}
@@ -826,12 +906,16 @@ mlx5_tc_ct_del_ft_entry(struct mlx5_tc_ct_priv *ct_priv,
struct mlx5_ct_entry *entry)
{
mlx5_tc_ct_entry_del_rules(ct_priv, entry);
+ mutex_lock(&ct_priv->shared_counter_lock);
if (entry->tuple_node.next)
rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
&entry->tuple_nat_node,
tuples_nat_ht_params);
rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node,
tuples_ht_params);
+ mutex_unlock(&ct_priv->shared_counter_lock);
+ mlx5_tc_ct_shared_counter_put(ct_priv, entry);
+
}
static int
@@ -868,7 +952,7 @@ mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft,
if (!entry)
return -ENOENT;
- mlx5_fc_query_cached(entry->counter, &bytes, &packets, &lastuse);
+ mlx5_fc_query_cached(entry->shared_counter->counter, &bytes, &packets, &lastuse);
flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
FLOW_ACTION_HW_STATS_DELAYED);
@@ -941,9 +1025,7 @@ out:
return false;
}
-int
-mlx5_tc_ct_add_no_trk_match(struct mlx5e_priv *priv,
- struct mlx5_flow_spec *spec)
+int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec)
{
u32 ctstate = 0, ctstate_mask = 0;
@@ -959,24 +1041,21 @@ mlx5_tc_ct_add_no_trk_match(struct mlx5e_priv *priv,
return 0;
}
-void mlx5_tc_ct_match_del(struct mlx5e_priv *priv, struct mlx5_ct_attr *ct_attr)
+void mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr)
{
- struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv);
-
- if (!ct_priv || !ct_attr->ct_labels_id)
+ if (!priv || !ct_attr->ct_labels_id)
return;
- mapping_remove(ct_priv->labels_mapping, ct_attr->ct_labels_id);
+ mapping_remove(priv->labels_mapping, ct_attr->ct_labels_id);
}
int
-mlx5_tc_ct_match_add(struct mlx5e_priv *priv,
+mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
struct mlx5_flow_spec *spec,
struct flow_cls_offload *f,
struct mlx5_ct_attr *ct_attr,
struct netlink_ext_ack *extack)
{
- struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv);
struct flow_rule *rule = flow_cls_offload_flow_rule(f);
struct flow_dissector_key_ct *mask, *key;
bool trk, est, untrk, unest, new;
@@ -989,7 +1068,7 @@ mlx5_tc_ct_match_add(struct mlx5e_priv *priv,
if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT))
return 0;
- if (!ct_priv) {
+ if (!priv) {
NL_SET_ERR_MSG_MOD(extack,
"offload of ct matching isn't available");
return -EOPNOTSUPP;
@@ -1045,7 +1124,7 @@ mlx5_tc_ct_match_add(struct mlx5e_priv *priv,
ct_labels[1] = key->ct_labels[1] & mask->ct_labels[1];
ct_labels[2] = key->ct_labels[2] & mask->ct_labels[2];
ct_labels[3] = key->ct_labels[3] & mask->ct_labels[3];
- if (mapping_add(ct_priv->labels_mapping, ct_labels, &ct_attr->ct_labels_id))
+ if (mapping_add(priv->labels_mapping, ct_labels, &ct_attr->ct_labels_id))
return -EOPNOTSUPP;
mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, ct_attr->ct_labels_id,
MLX5_CT_LABELS_MASK);
@@ -1055,14 +1134,12 @@ mlx5_tc_ct_match_add(struct mlx5e_priv *priv,
}
int
-mlx5_tc_ct_parse_action(struct mlx5e_priv *priv,
- struct mlx5_esw_flow_attr *attr,
+mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
+ struct mlx5_flow_attr *attr,
const struct flow_action_entry *act,
struct netlink_ext_ack *extack)
{
- struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv);
-
- if (!ct_priv) {
+ if (!priv) {
NL_SET_ERR_MSG_MOD(extack,
"offload of ct action isn't available");
return -EOPNOTSUPP;
@@ -1081,8 +1158,8 @@ static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
{
struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
- struct mlx5_core_dev *dev = ct_priv->esw->dev;
- struct mlx5_flow_table *fdb = pre_ct->fdb;
+ struct mlx5_core_dev *dev = ct_priv->dev;
+ struct mlx5_flow_table *ft = pre_ct->ft;
struct mlx5_flow_destination dest = {};
struct mlx5_flow_act flow_act = {};
struct mlx5_modify_hdr *mod_hdr;
@@ -1097,14 +1174,14 @@ static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
return -ENOMEM;
zone = ct_ft->zone & MLX5_CT_ZONE_MASK;
- err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ZONE_TO_REG, zone);
+ err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ct_priv->ns_type,
+ ZONE_TO_REG, zone);
if (err) {
ct_dbg("Failed to set zone register mapping");
goto err_mapping;
}
- mod_hdr = mlx5_modify_header_alloc(dev,
- MLX5_FLOW_NAMESPACE_FDB,
+ mod_hdr = mlx5_modify_header_alloc(dev, ct_priv->ns_type,
pre_mod_acts.num_actions,
pre_mod_acts.actions);
@@ -1130,7 +1207,7 @@ static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate);
dest.ft = ct_priv->post_ct;
- rule = mlx5_add_flow_rules(fdb, spec, &flow_act, &dest, 1);
+ rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
ct_dbg("Failed to add pre ct flow rule zone %d", zone);
@@ -1141,7 +1218,7 @@ static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
/* add miss rule */
memset(spec, 0, sizeof(*spec));
dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct;
- rule = mlx5_add_flow_rules(fdb, spec, &flow_act, &dest, 1);
+ rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
ct_dbg("Failed to add pre ct miss rule zone %d", zone);
@@ -1168,7 +1245,7 @@ tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft,
struct mlx5_tc_ct_pre *pre_ct)
{
struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
- struct mlx5_core_dev *dev = ct_priv->esw->dev;
+ struct mlx5_core_dev *dev = ct_priv->dev;
mlx5_del_flow_rules(pre_ct->flow_rule);
mlx5_del_flow_rules(pre_ct->miss_rule);
@@ -1182,7 +1259,7 @@ mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft,
{
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
- struct mlx5_core_dev *dev = ct_priv->esw->dev;
+ struct mlx5_core_dev *dev = ct_priv->dev;
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5_flow_namespace *ns;
struct mlx5_flow_table *ft;
@@ -1192,10 +1269,10 @@ mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft,
void *misc;
int err;
- ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
+ ns = mlx5_get_flow_namespace(dev, ct_priv->ns_type);
if (!ns) {
err = -EOPNOTSUPP;
- ct_dbg("Failed to get FDB flow namespace");
+ ct_dbg("Failed to get flow namespace");
return err;
}
@@ -1204,7 +1281,8 @@ mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft,
return -ENOMEM;
ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
- ft_attr.prio = FDB_TC_OFFLOAD;
+ ft_attr.prio = ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB ?
+ FDB_TC_OFFLOAD : MLX5E_TC_PRIO;
ft_attr.max_fte = 2;
ft_attr.level = 1;
ft = mlx5_create_flow_table(ns, &ft_attr);
@@ -1213,7 +1291,7 @@ mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft,
ct_dbg("Failed to create pre ct table");
goto out_free;
}
- pre_ct->fdb = ft;
+ pre_ct->ft = ft;
/* create flow group */
MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
@@ -1277,7 +1355,7 @@ mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft,
tc_ct_pre_ct_del_rules(ct_ft, pre_ct);
mlx5_destroy_flow_group(pre_ct->miss_grp);
mlx5_destroy_flow_group(pre_ct->flow_grp);
- mlx5_destroy_flow_table(pre_ct->fdb);
+ mlx5_destroy_flow_table(pre_ct->ft);
}
static int
@@ -1396,7 +1474,7 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
/* We translate the tc filter with CT action to the following HW model:
*
* +---------------------+
- * + fdb prio (tc chain) +
+ * + ft prio (tc chain) +
* + original match +
* +---------------------+
* | set chain miss mapping
@@ -1426,17 +1504,17 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
* +--------------+
*/
static struct mlx5_flow_handle *
-__mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
+__mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *orig_spec,
- struct mlx5_esw_flow_attr *attr)
+ struct mlx5_flow_attr *attr)
{
- struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv);
bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT;
+ struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
+ u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
struct mlx5_flow_spec *post_ct_spec = NULL;
- struct mlx5_eswitch *esw = ct_priv->esw;
- struct mlx5_esw_flow_attr *pre_ct_attr;
+ struct mlx5_flow_attr *pre_ct_attr;
struct mlx5_modify_hdr *mod_hdr;
struct mlx5_flow_handle *rule;
struct mlx5_ct_flow *ct_flow;
@@ -1471,10 +1549,22 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
}
ct_flow->fte_id = fte_id;
- /* Base esw attributes of both rules on original rule attribute */
- pre_ct_attr = &ct_flow->pre_ct_attr;
- memcpy(pre_ct_attr, attr, sizeof(*attr));
- memcpy(&ct_flow->post_ct_attr, attr, sizeof(*attr));
+ /* Base flow attributes of both rules on original rule attribute */
+ ct_flow->pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
+ if (!ct_flow->pre_ct_attr) {
+ err = -ENOMEM;
+ goto err_alloc_pre;
+ }
+
+ ct_flow->post_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
+ if (!ct_flow->post_ct_attr) {
+ err = -ENOMEM;
+ goto err_alloc_post;
+ }
+
+ pre_ct_attr = ct_flow->pre_ct_attr;
+ memcpy(pre_ct_attr, attr, attr_sz);
+ memcpy(ct_flow->post_ct_attr, attr, attr_sz);
/* Modify the original rule's action to fwd and modify, leave decap */
pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP;
@@ -1485,22 +1575,22 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
* don't go though all prios of this chain as normal tc rules
* miss.
*/
- err = mlx5_esw_chains_get_chain_mapping(esw, attr->chain,
- &chain_mapping);
+ err = mlx5_chains_get_chain_mapping(ct_priv->chains, attr->chain,
+ &chain_mapping);
if (err) {
ct_dbg("Failed to get chain register mapping for chain");
goto err_get_chain;
}
ct_flow->chain_mapping = chain_mapping;
- err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts,
+ err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, ct_priv->ns_type,
CHAIN_TO_REG, chain_mapping);
if (err) {
ct_dbg("Failed to set chain register mapping");
goto err_mapping;
}
- err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts,
+ err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, ct_priv->ns_type,
FTEID_TO_REG, fte_id);
if (err) {
ct_dbg("Failed to set fte_id register mapping");
@@ -1514,7 +1604,8 @@ __mlx5_tc_ct_flow_offload(