summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2017-05-01 11:47:10 -0400
committerDavid S. Miller <davem@davemloft.net>2017-05-01 11:47:10 -0400
commitcedf90c0cc1250cfb95905b61dc36b37ec9d5395 (patch)
treef6ff272d003bf583adf36dd34b9ee12b66f65e95 /drivers
parent07ff2ed03bb874a5bb97361a5a07ee28f1afa574 (diff)
parent0a0ab1d2cc5d5e68191488235074b5b30d793bb7 (diff)
Merge tag 'mlx5-updates-2017-04-30' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux
mlx5-updates-2017-04-30 Or says: ================ mlx5 neigh update This series (whose code name is 'neigh update') from Hadar, enhances the mlx5 TC IP tunnel offloads to deal with changes to tunnel destination neighbours used in offloaded flows which involved encapsulation. In order to keep track on the validity state of such neighbours, we register a netevent notifier callback and act on NEIGH_UPDATE events: if a neighbour becomes valid, offload the related flows to HW (the other way around when neigh becomes invalid) and similarly when a neigh mac addresses changes. Since this traffic is offloaded from the host OS, the neighbour for the IP tunnel destination can mistakenly become STALE and deleted by the kernel since its 'used' value wasn't changed. To address that, we proactively update the neighbour 'used' value every DELAY_PROBE_TIME seconds, using time stamps generated by the existing driver code for HW flow counters. We use the DELAY_PROBE_TIME_UPDATE event to adjust the frequency of the updates. Prior to the core of the series, there's a patch from Saeed that introduces an extendable vport representor implementation scheme. It provides a separation between the eswitch to the netdev related aspects of the representors. We would like to thank Ido Schimmel and Ilya Lesokhin for their coaching && advice through the long design and review cycles while we struggled to understand and (hopefully correctly) implement the locking around the different driver flows(..) . - Or. ================= Misc Updates: From Tariq: Some small performance and trivial code optimization for mlx5 netdev driver - Optimize poll ICOSQ completion queue - Use prefetchw when a write is to follow - Use u8 as ownership type in mlx5e_get_cqe() From Eran: - Disable LRO by default on specific setups From Eli: - Small cleanup for E-Switch to avoid redundant allocation Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h20
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c98
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c574
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.h145
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c341
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.h9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c66
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c20
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h25
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c24
13 files changed, 1073 insertions, 262 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 632a04b0ecaf..0099a3e397bc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -991,20 +991,6 @@ int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev);
void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev);
int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb);
-struct mlx5_eswitch_rep;
-int mlx5e_vport_rep_load(struct mlx5_eswitch *esw,
- struct mlx5_eswitch_rep *rep);
-void mlx5e_vport_rep_unload(struct mlx5_eswitch *esw,
- struct mlx5_eswitch_rep *rep);
-int mlx5e_nic_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep);
-void mlx5e_nic_rep_unload(struct mlx5_eswitch *esw,
- struct mlx5_eswitch_rep *rep);
-int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv);
-void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv);
-int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr);
-void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
-void mlx5e_update_hw_rep_counters(struct mlx5e_priv *priv);
-
/* common netdev helpers */
int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv);
@@ -1031,12 +1017,6 @@ int mlx5e_open(struct net_device *netdev);
void mlx5e_update_stats_work(struct work_struct *work);
u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout);
-int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev,
- void *sp);
-bool mlx5e_has_offload_stats(const struct net_device *dev, int attr_id);
-
-bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv);
-
/* mlx5e generic netdev management API */
struct net_device*
mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index e43411d232ee..a61b71b6fff3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -35,9 +35,10 @@
#include <linux/mlx5/fs.h>
#include <net/vxlan.h>
#include <linux/bpf.h>
+#include "eswitch.h"
#include "en.h"
#include "en_tc.h"
-#include "eswitch.h"
+#include "en_rep.h"
#include "vxlan.h"
struct mlx5e_rq_param {
@@ -3784,6 +3785,12 @@ static bool cqe_compress_heuristic(u32 link_speed, u32 pci_bw)
(pci_bw < 40000) && (pci_bw < link_speed));
}
+static bool hw_lro_heuristic(u32 link_speed, u32 pci_bw)
+{
+ return !(link_speed && pci_bw &&
+ (pci_bw <= 16000) && (pci_bw < link_speed));
+}
+
void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
{
params->rx_cq_period_mode = cq_period_mode;
@@ -3828,6 +3835,11 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
params->num_channels = max_channels;
params->num_tc = 1;
+ mlx5e_get_max_linkspeed(mdev, &link_speed);
+ mlx5e_get_pci_bw(mdev, &pci_bw);
+ mlx5_core_dbg(mdev, "Max link speed = %d, PCI BW = %d\n",
+ link_speed, pci_bw);
+
/* SQ */
params->log_sq_size = is_kdump_kernel() ?
MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE :
@@ -3836,13 +3848,9 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
/* set CQE compression */
params->rx_cqe_compress_def = false;
if (MLX5_CAP_GEN(mdev, cqe_compression) &&
- MLX5_CAP_GEN(mdev, vport_group_manager)) {
- mlx5e_get_max_linkspeed(mdev, &link_speed);
- mlx5e_get_pci_bw(mdev, &pci_bw);
- mlx5_core_dbg(mdev, "Max link speed = %d, PCI BW = %d\n",
- link_speed, pci_bw);
+ MLX5_CAP_GEN(mdev, vport_group_manager))
params->rx_cqe_compress_def = cqe_compress_heuristic(link_speed, pci_bw);
- }
+
MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS, params->rx_cqe_compress_def);
/* RQ */
@@ -3851,7 +3859,7 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
/* HW LRO */
/* TODO: && MLX5_CAP_ETH(mdev, lro_cap) */
if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
- params->lro_en = true;
+ params->lro_en = hw_lro_heuristic(link_speed, pci_bw);
params->lro_timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT);
/* CQ moderation params */
@@ -4123,48 +4131,10 @@ static int mlx5e_init_nic_tx(struct mlx5e_priv *priv)
return 0;
}
-static void mlx5e_register_vport_rep(struct mlx5_core_dev *mdev)
-{
- struct mlx5_eswitch *esw = mdev->priv.eswitch;
- int total_vfs = MLX5_TOTAL_VPORTS(mdev);
- int vport;
- u8 mac[ETH_ALEN];
-
- if (!MLX5_CAP_GEN(mdev, vport_group_manager))
- return;
-
- mlx5_query_nic_vport_mac_address(mdev, 0, mac);
-
- for (vport = 1; vport < total_vfs; vport++) {
- struct mlx5_eswitch_rep rep;
-
- rep.load = mlx5e_vport_rep_load;
- rep.unload = mlx5e_vport_rep_unload;
- rep.vport = vport;
- ether_addr_copy(rep.hw_id, mac);
- mlx5_eswitch_register_vport_rep(esw, vport, &rep);
- }
-}
-
-static void mlx5e_unregister_vport_rep(struct mlx5_core_dev *mdev)
-{
- struct mlx5_eswitch *esw = mdev->priv.eswitch;
- int total_vfs = MLX5_TOTAL_VPORTS(mdev);
- int vport;
-
- if (!MLX5_CAP_GEN(mdev, vport_group_manager))
- return;
-
- for (vport = 1; vport < total_vfs; vport++)
- mlx5_eswitch_unregister_vport_rep(esw, vport);
-}
-
static void mlx5e_nic_enable(struct mlx5e_priv *priv)
{
struct net_device *netdev = priv->netdev;
struct mlx5_core_dev *mdev = priv->mdev;
- struct mlx5_eswitch *esw = mdev->priv.eswitch;
- struct mlx5_eswitch_rep rep;
u16 max_mtu;
mlx5e_init_l2_addr(priv);
@@ -4179,16 +4149,8 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
mlx5e_enable_async_events(priv);
- if (MLX5_CAP_GEN(mdev, vport_group_manager)) {
- mlx5_query_nic_vport_mac_address(mdev, 0, rep.hw_id);
- rep.load = mlx5e_nic_rep_load;
- rep.unload = mlx5e_nic_rep_unload;
- rep.vport = FDB_UPLINK_VPORT;
- rep.netdev = netdev;
- mlx5_eswitch_register_vport_rep(esw, 0, &rep);
- }
-
- mlx5e_register_vport_rep(mdev);
+ if (MLX5_CAP_GEN(mdev, vport_group_manager))
+ mlx5e_register_vport_reps(priv);
if (netdev->reg_state != NETREG_REGISTERED)
return;
@@ -4212,7 +4174,6 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
static void mlx5e_nic_disable(struct mlx5e_priv *priv)
{
struct mlx5_core_dev *mdev = priv->mdev;
- struct mlx5_eswitch *esw = mdev->priv.eswitch;
rtnl_lock();
if (netif_running(priv->netdev))
@@ -4221,9 +4182,10 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv)
rtnl_unlock();
queue_work(priv->wq, &priv->set_rx_mode_work);
- mlx5e_unregister_vport_rep(mdev);
+
if (MLX5_CAP_GEN(mdev, vport_group_manager))
- mlx5_eswitch_unregister_vport_rep(esw, 0);
+ mlx5e_unregister_vport_reps(priv);
+
mlx5e_disable_async_events(priv);
mlx5_lag_remove(mdev);
}
@@ -4394,7 +4356,7 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev)
{
struct mlx5_eswitch *esw = mdev->priv.eswitch;
int total_vfs = MLX5_TOTAL_VPORTS(mdev);
- void *ppriv = NULL;
+ struct mlx5e_rep_priv *rpriv = NULL;
void *priv;
int vport;
int err;
@@ -4404,10 +4366,17 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev)
if (err)
return NULL;
- if (MLX5_CAP_GEN(mdev, vport_group_manager))
- ppriv = &esw->offloads.vport_reps[0];
+ if (MLX5_CAP_GEN(mdev, vport_group_manager)) {
+ rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL);
+ if (!rpriv) {
+ mlx5_core_warn(mdev,
+ "Not creating net device, Failed to alloc rep priv data\n");
+ return NULL;
+ }
+ rpriv->rep = &esw->offloads.vport_reps[0];
+ }
- netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, ppriv);
+ netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, rpriv);
if (!netdev) {
mlx5_core_err(mdev, "mlx5e_create_netdev failed\n");
goto err_unregister_reps;
@@ -4439,16 +4408,19 @@ err_unregister_reps:
for (vport = 1; vport < total_vfs; vport++)
mlx5_eswitch_unregister_vport_rep(esw, vport);
+ kfree(rpriv);
return NULL;
}
static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv)
{
struct mlx5e_priv *priv = vpriv;
+ void *ppriv = priv->ppriv;
unregister_netdev(priv->netdev);
mlx5e_detach(mdev, vpriv);
mlx5e_destroy_netdev(priv);
+ kfree(ppriv);
}
static void *mlx5e_get_netdev(void *vpriv)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 16b683e8226d..79462c0368a0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -34,10 +34,14 @@
#include <linux/mlx5/fs.h>
#include <net/switchdev.h>
#include <net/pkt_cls.h>
+#include <net/netevent.h>
+#include <net/arp.h>
#include "eswitch.h"
#include "en.h"
+#include "en_rep.h"
#include "en_tc.h"
+#include "fs_core.h"
static const char mlx5e_rep_driver_name[] = "mlx5e_rep";
@@ -75,7 +79,8 @@ static void mlx5e_rep_get_strings(struct net_device *dev,
static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mlx5_eswitch_rep *rep = priv->ppriv;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
struct rtnl_link_stats64 *vport_stats;
struct ifla_vf_stats vf_stats;
int err;
@@ -165,7 +170,8 @@ static const struct ethtool_ops mlx5e_rep_ethtool_ops = {
int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr)
{
struct mlx5e_priv *priv = netdev_priv(dev);
- struct mlx5_eswitch_rep *rep = priv->ppriv;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
if (esw->mode == SRIOV_NONE)
@@ -184,10 +190,10 @@ int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr)
}
int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv)
-
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mlx5_eswitch_rep *rep = priv->ppriv;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
struct mlx5e_channel *c;
int n, tc, num_sqs = 0;
int err = -ENOMEM;
@@ -212,42 +218,398 @@ out:
return err;
}
-int mlx5e_nic_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep)
+void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv)
{
- struct net_device *netdev = rep->netdev;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+
+ mlx5_eswitch_sqs2vport_stop(esw, rep);
+}
+
+static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ unsigned long ipv6_interval = NEIGH_VAR(&ipv6_stub->nd_tbl->parms,
+ DELAY_PROBE_TIME);
+#else
+ unsigned long ipv6_interval = ~0UL;
+#endif
+ unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms,
+ DELAY_PROBE_TIME);
+ struct net_device *netdev = rpriv->rep->netdev;
struct mlx5e_priv *priv = netdev_priv(netdev);
- if (test_bit(MLX5E_STATE_OPENED, &priv->state))
- return mlx5e_add_sqs_fwd_rules(priv);
- return 0;
+ rpriv->neigh_update.min_interval = min_t(unsigned long, ipv6_interval, ipv4_interval);
+ mlx5_fc_update_sampling_interval(priv->mdev, rpriv->neigh_update.min_interval);
}
-void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv)
+void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv)
{
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mlx5_eswitch_rep *rep = priv->ppriv;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
- mlx5_eswitch_sqs2vport_stop(esw, rep);
+ mlx5_fc_queue_stats_work(priv->mdev,
+ &neigh_update->neigh_stats_work,
+ neigh_update->min_interval);
}
-void mlx5e_nic_rep_unload(struct mlx5_eswitch *esw,
- struct mlx5_eswitch_rep *rep)
+static void mlx5e_rep_neigh_stats_work(struct work_struct *work)
{
- struct net_device *netdev = rep->netdev;
+ struct mlx5e_rep_priv *rpriv = container_of(work, struct mlx5e_rep_priv,
+ neigh_update.neigh_stats_work.work);
+ struct net_device *netdev = rpriv->rep->netdev;
struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_neigh_hash_entry *nhe;
- if (test_bit(MLX5E_STATE_OPENED, &priv->state))
- mlx5e_remove_sqs_fwd_rules(priv);
+ rtnl_lock();
+ if (!list_empty(&rpriv->neigh_update.neigh_list))
+ mlx5e_rep_queue_neigh_stats_work(priv);
- /* clean (and re-init) existing uplink offloaded TC rules */
- mlx5e_tc_cleanup(priv);
- mlx5e_tc_init(priv);
+ list_for_each_entry(nhe, &rpriv->neigh_update.neigh_list, neigh_list)
+ mlx5e_tc_update_neigh_used_value(nhe);
+
+ rtnl_unlock();
+}
+
+static void mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe)
+{
+ refcount_inc(&nhe->refcnt);
+}
+
+static void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe)
+{
+ if (refcount_dec_and_test(&nhe->refcnt))
+ kfree(nhe);
+}
+
+static void mlx5e_rep_update_flows(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ bool neigh_connected,
+ unsigned char ha[ETH_ALEN])
+{
+ struct ethhdr *eth = (struct ethhdr *)e->encap_header;
+
+ ASSERT_RTNL();
+
+ if ((!neigh_connected && (e->flags & MLX5_ENCAP_ENTRY_VALID)) ||
+ !ether_addr_equal(e->h_dest, ha))
+ mlx5e_tc_encap_flows_del(priv, e);
+
+ if (neigh_connected && !(e->flags & MLX5_ENCAP_ENTRY_VALID)) {
+ ether_addr_copy(e->h_dest, ha);
+ ether_addr_copy(eth->h_dest, ha);
+
+ mlx5e_tc_encap_flows_add(priv, e);
+ }
+}
+
+static void mlx5e_rep_neigh_update(struct work_struct *work)
+{
+ struct mlx5e_neigh_hash_entry *nhe =
+ container_of(work, struct mlx5e_neigh_hash_entry, neigh_update_work);
+ struct neighbour *n = nhe->n;
+ struct mlx5e_encap_entry *e;
+ unsigned char ha[ETH_ALEN];
+ struct mlx5e_priv *priv;
+ bool neigh_connected;
+ bool encap_connected;
+ u8 nud_state, dead;
+
+ rtnl_lock();
+
+ /* If these parameters are changed after we release the lock,
+ * we'll receive another event letting us know about it.
+ * We use this lock to avoid inconsistency between the neigh validity
+ * and it's hw address.
+ */
+ read_lock_bh(&n->lock);
+ memcpy(ha, n->ha, ETH_ALEN);
+ nud_state = n->nud_state;
+ dead = n->dead;
+ read_unlock_bh(&n->lock);
+
+ neigh_connected = (nud_state & NUD_VALID) && !dead;
+
+ list_for_each_entry(e, &nhe->encap_list, encap_list) {
+ encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID);
+ priv = netdev_priv(e->out_dev);
+
+ if (encap_connected != neigh_connected ||
+ !ether_addr_equal(e->h_dest, ha))
+ mlx5e_rep_update_flows(priv, e, neigh_connected, ha);
+ }
+ mlx5e_rep_neigh_entry_release(nhe);
+ rtnl_unlock();
+ neigh_release(n);
+}
+
+static struct mlx5e_neigh_hash_entry *
+mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
+ struct mlx5e_neigh *m_neigh);
+
+static int mlx5e_rep_netevent_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv,
+ neigh_update.netevent_nb);
+ struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+ struct net_device *netdev = rpriv->rep->netdev;
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_neigh_hash_entry *nhe = NULL;
+ struct mlx5e_neigh m_neigh = {};
+ struct neigh_parms *p;
+ struct neighbour *n;
+ bool found = false;
+
+ switch (event) {
+ case NETEVENT_NEIGH_UPDATE:
+ n = ptr;
+#if IS_ENABLED(CONFIG_IPV6)
+ if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl)
+#else
+ if (n->tbl != &arp_tbl)
+#endif
+ return NOTIFY_DONE;
+
+ m_neigh.dev = n->dev;
+ m_neigh.family = n->ops->family;
+ memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
+
+ /* We are in atomic context and can't take RTNL mutex, so use
+ * spin_lock_bh to lookup the neigh table. bh is used since
+ * netevent can be called from a softirq context.
+ */
+ spin_lock_bh(&neigh_update->encap_lock);
+ nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh);
+ if (!nhe) {
+ spin_unlock_bh(&neigh_update->encap_lock);
+ return NOTIFY_DONE;
+ }
+
+ /* This assignment is valid as long as the the neigh reference
+ * is taken
+ */
+ nhe->n = n;
+
+ /* Take a reference to ensure the neighbour and mlx5 encap
+ * entry won't be destructed until we drop the reference in
+ * delayed work.
+ */
+ neigh_hold(n);
+ mlx5e_rep_neigh_entry_hold(nhe);
+
+ if (!queue_work(priv->wq, &nhe->neigh_update_work)) {
+ mlx5e_rep_neigh_entry_release(nhe);
+ neigh_release(n);
+ }
+ spin_unlock_bh(&neigh_update->encap_lock);
+ break;
+
+ case NETEVENT_DELAY_PROBE_TIME_UPDATE:
+ p = ptr;
+
+ /* We check the device is present since we don't care about
+ * changes in the default table, we only care about changes
+ * done per device delay prob time parameter.
+ */
+#if IS_ENABLED(CONFIG_IPV6)
+ if (!p->dev || (p->tbl != ipv6_stub->nd_tbl && p->tbl != &arp_tbl))
+#else
+ if (!p->dev || p->tbl != &arp_tbl)
+#endif
+ return NOTIFY_DONE;
+
+ /* We are in atomic context and can't take RTNL mutex,
+ * so use spin_lock_bh to walk the neigh list and look for
+ * the relevant device. bh is used since netevent can be
+ * called from a softirq context.
+ */
+ spin_lock_bh(&neigh_update->encap_lock);
+ list_for_each_entry(nhe, &neigh_update->neigh_list, neigh_list) {
+ if (p->dev == nhe->m_neigh.dev) {
+ found = true;
+ break;
+ }
+ }
+ spin_unlock_bh(&neigh_update->encap_lock);
+ if (!found)
+ return NOTIFY_DONE;
+
+ neigh_update->min_interval = min_t(unsigned long,
+ NEIGH_VAR(p, DELAY_PROBE_TIME),
+ neigh_update->min_interval);
+ mlx5_fc_update_sampling_interval(priv->mdev,
+ neigh_update->min_interval);
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+static const struct rhashtable_params mlx5e_neigh_ht_params = {
+ .head_offset = offsetof(struct mlx5e_neigh_hash_entry, rhash_node),
+ .key_offset = offsetof(struct mlx5e_neigh_hash_entry, m_neigh),
+ .key_len = sizeof(struct mlx5e_neigh),
+ .automatic_shrinking = true,
+};
+
+static int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv)
+{
+ struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+ int err;
+
+ err = rhashtable_init(&neigh_update->neigh_ht, &mlx5e_neigh_ht_params);
+ if (err)
+ return err;
+
+ INIT_LIST_HEAD(&neigh_update->neigh_list);
+ spin_lock_init(&neigh_update->encap_lock);
+ INIT_DELAYED_WORK(&neigh_update->neigh_stats_work,
+ mlx5e_rep_neigh_stats_work);
+ mlx5e_rep_neigh_update_init_interval(rpriv);
+
+ rpriv->neigh_update.netevent_nb.notifier_call = mlx5e_rep_netevent_event;
+ err = register_netevent_notifier(&rpriv->neigh_update.netevent_nb);
+ if (err)
+ goto out_err;
+ return 0;
+
+out_err:
+ rhashtable_destroy(&neigh_update->neigh_ht);
+ return err;
+}
+
+static void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv)
+{
+ struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+ struct mlx5e_priv *priv = netdev_priv(rpriv->rep->netdev);
+
+ unregister_netevent_notifier(&neigh_update->netevent_nb);
+
+ flush_workqueue(priv->wq); /* flush neigh update works */
+
+ cancel_delayed_work_sync(&rpriv->neigh_update.neigh_stats_work);
+
+ rhashtable_destroy(&neigh_update->neigh_ht);
+}
+
+static int mlx5e_rep_neigh_entry_insert(struct mlx5e_priv *priv,
+ struct mlx5e_neigh_hash_entry *nhe)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ int err;
+
+ err = rhashtable_insert_fast(&rpriv->neigh_update.neigh_ht,
+ &nhe->rhash_node,
+ mlx5e_neigh_ht_params);
+ if (err)
+ return err;
+
+ list_add(&nhe->neigh_list, &rpriv->neigh_update.neigh_list);
+
+ return err;
+}
+
+static void mlx5e_rep_neigh_entry_remove(struct mlx5e_priv *priv,
+ struct mlx5e_neigh_hash_entry *nhe)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+ spin_lock_bh(&rpriv->neigh_update.encap_lock);
+
+ list_del(&nhe->neigh_list);
+
+ rhashtable_remove_fast(&rpriv->neigh_update.neigh_ht,
+ &nhe->rhash_node,
+ mlx5e_neigh_ht_params);
+ spin_unlock_bh(&rpriv->neigh_update.encap_lock);
+}
+
+/* This function must only be called under RTNL lock or under the
+ * representor's encap_lock in case RTNL mutex can't be held.
+ */
+static struct mlx5e_neigh_hash_entry *
+mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
+ struct mlx5e_neigh *m_neigh)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+
+ return rhashtable_lookup_fast(&neigh_update->neigh_ht, m_neigh,
+ mlx5e_neigh_ht_params);
+}
+
+static int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct mlx5e_neigh_hash_entry **nhe)
+{
+ int err;
+
+ *nhe = kzalloc(sizeof(**nhe), GFP_KERNEL);
+ if (!*nhe)
+ return -ENOMEM;
+
+ memcpy(&(*nhe)->m_neigh, &e->m_neigh, sizeof(e->m_neigh));
+ INIT_WORK(&(*nhe)->neigh_update_work, mlx5e_rep_neigh_update);
+ INIT_LIST_HEAD(&(*nhe)->encap_list);
+ refcount_set(&(*nhe)->refcnt, 1);
+
+ err = mlx5e_rep_neigh_entry_insert(priv, *nhe);
+ if (err)
+ goto out_free;
+ return 0;
+
+out_free:
+ kfree(*nhe);
+ return err;
+}
+
+static void mlx5e_rep_neigh_entry_destroy(struct mlx5e_priv *priv,
+ struct mlx5e_neigh_hash_entry *nhe)
+{
+ /* The neigh hash entry must be removed from the hash table regardless
+ * of the reference count value, so it won't be found by the next
+ * neigh notification call. The neigh hash entry reference count is
+ * incremented only during creation and neigh notification calls and
+ * protects from freeing the nhe struct.
+ */
+ mlx5e_rep_neigh_entry_remove(priv, nhe);
+ mlx5e_rep_neigh_entry_release(nhe);
+}
+
+int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e)
+{
+ struct mlx5e_neigh_hash_entry *nhe;
+ int err;
+
+ nhe = mlx5e_rep_neigh_entry_lookup(priv, &e->m_neigh);
+ if (!nhe) {
+ err = mlx5e_rep_neigh_entry_create(priv, e, &nhe);
+ if (err)
+ return err;
+ }
+ list_add(&e->encap_list, &nhe->encap_list);
+ return 0;
+}
+
+void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e)
+{
+ struct mlx5e_neigh_hash_entry *nhe;
+
+ list_del(&e->encap_list);
+ nhe = mlx5e_rep_neigh_entry_lookup(priv, &e->m_neigh);
+
+ if (list_empty(&nhe->encap_list))
+ mlx5e_rep_neigh_entry_destroy(priv, nhe);
}
static int mlx5e_rep_open(struct net_device *dev)
{
struct mlx5e_priv *priv = netdev_priv(dev);
- struct mlx5_eswitch_rep *rep = priv->ppriv;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
int err;
@@ -265,7 +627,8 @@ static int mlx5e_rep_open(struct net_device *dev)
static int mlx5e_rep_close(struct net_device *dev)
{
struct mlx5e_priv *priv = netdev_priv(dev);
- struct mlx5_eswitch_rep *rep = priv->ppriv;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
(void)mlx5_eswitch_set_vport_state(esw, rep->vport, MLX5_ESW_VPORT_ADMIN_STATE_DOWN);
@@ -277,7 +640,8 @@ static int mlx5e_rep_get_phys_port_name(struct net_device *dev,
char *buf, size_t len)
{
struct mlx5e_priv *priv = netdev_priv(dev);
- struct mlx5_eswitch_rep *rep = priv->ppriv;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
int ret;
ret = snprintf(buf, len, "%d", rep->vport - 1);
@@ -320,10 +684,16 @@ static int mlx5e_rep_ndo_setup_tc(struct net_device *dev, u32 handle,
bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv)
{
- struct mlx5_eswitch_rep *rep = (struct mlx5_eswitch_rep *)priv->ppriv;
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep;
+
+ if (!MLX5_CAP_GEN(priv->mdev, vport_group_manager))
+ return false;
- if (rep && rep->vport == FDB_UPLINK_VPORT && esw->mode == SRIOV_OFFLOADS)
+ rep = rpriv->rep;
+ if (esw->mode == SRIOV_OFFLOADS &&
+ rep && rep->vport == FDB_UPLINK_VPORT)
return true;
return false;
@@ -331,7 +701,8 @@ bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv)
static bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv)
{
- struct mlx5_eswitch_rep *rep = (struct mlx5_eswitch_rep *)priv->ppriv;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
if (rep && rep->vport != FDB_UPLINK_VPORT)
return true;
@@ -464,7 +835,8 @@ static void mlx5e_init_rep(struct mlx5_core_dev *mdev,
static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mlx5_eswitch_rep *rep = priv->ppriv;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
struct mlx5_flow_handle *flow_rule;
int err;
@@ -504,7 +876,8 @@ err_destroy_direct_rqts:
static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv)
{
- struct mlx5_eswitch_rep *rep = priv->ppriv;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
mlx5e_tc_cleanup(priv);
mlx5_del_flow_rules(rep->vport_rx_rule);
@@ -543,20 +916,70 @@ static struct mlx5e_profile mlx5e_rep_profile = {
.max_tc = 1,
};
-int mlx5e_vport_rep_load(struct mlx5_eswitch *esw,
- struct mlx5_eswitch_rep *rep)
+/* e-Switch vport representors */
+
+static int
+mlx5e_nic_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep)
{
+ struct mlx5e_priv *priv = netdev_priv(rep->netdev);
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+ int err;
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ err = mlx5e_add_sqs_fwd_rules(priv);
+ if (err)
+ return err;
+ }
+
+ err = mlx5e_rep_neigh_init(rpriv);
+ if (err)
+ goto err_remove_sqs;
+
+ return 0;
+
+err_remove_sqs:
+ mlx5e_remove_sqs_fwd_rules(priv);
+ return err;
+}
+
+static void
+mlx5e_nic_rep_unload(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5e_priv *priv = netdev_priv(rep->netdev);
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state))
+ mlx5e_remove_sqs_fwd_rules(priv);
+
+ /* clean (and re-init) existing uplink offloaded TC rules */
+ mlx5e_tc_cleanup(priv);
+ mlx5e_tc_init(priv);
+
+ mlx5e_rep_neigh_cleanup(rpriv);
+}
+
+static int
+mlx5e_vport_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5e_rep_priv *rpriv;
struct net_device *netdev;
int err;
- netdev = mlx5e_create_netdev(esw->dev, &mlx5e_rep_profile, rep);
+ rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL);
+ if (!rpriv)
+ return -ENOMEM;
+
+ netdev = mlx5e_create_netdev(esw->dev, &mlx5e_rep_profile, rpriv);
if (!netdev) {
pr_warn("Failed to create representor netdev for vport %d\n",
rep->vport);
+ kfree(rpriv);
return -EINVAL;
}
rep->netdev = netdev;
+ rpriv->rep = rep;
err = mlx5e_attach_netdev(netdev_priv(netdev));
if (err) {
@@ -565,31 +988,104 @@ int mlx5e_vport_rep_load(struct mlx5_eswitch *esw,
goto err_destroy_netdev;
}
+ err = mlx5e_rep_neigh_init(rpriv);
+ if (err) {
+ pr_warn("Failed to initialized neighbours handling for vport %d\n",
+ rep->vport);
+ goto err_detach_netdev;
+ }
+
err = register_netdev(netdev);
if (err) {
pr_warn("Failed to register representor netdev for vport %d\n",
rep->vport);
- goto err_detach_netdev;
+ goto err_neigh_cleanup;
}
return 0;
+err_neigh_cleanup:
+ mlx5e_rep_neigh_cleanup(rpriv);
+
err_detach_netdev:
mlx5e_detach_netdev(netdev_priv(netdev));
err_destroy_netdev:
mlx5e_destroy_netdev(netdev_priv(netdev));
-
+ kfree(rpriv);
return err;
}
-void mlx5e_vport_rep_unload(struct mlx5_eswitch *esw,
- struct mlx5_eswitch_rep *rep)
+static void
+mlx5e_vport_rep_unload(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep)
{
struct net_device *netdev = rep->netdev;
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ void *ppriv = priv->ppriv;
- unregister_netdev(netdev);
- mlx5e_detach_netdev(netdev_priv(netdev));
- mlx5e_destroy_netdev(netdev_priv(netdev));
+ unregister_netdev(rep->netdev);
+
+ mlx5e_rep_neigh_cleanup(rpriv);
+ mlx5e_detach_netdev(priv);
+ mlx5e_destroy_netdev(priv);
+ kfree(ppriv); /* mlx5e_rep_priv */
+}
+
+static void mlx5e_rep_register_vf_vports(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5_eswitch *esw = mdev->priv.eswitch;
+ int total_vfs = MLX5_TOTAL_VPORTS(mdev);
+ int vport;
+ u8 mac[ETH_ALEN];
+
+ mlx5_query_nic_vport_mac_address(mdev, 0, mac);
+
+ for (vport = 1; vport < total_vfs; vport++) {
+ struct mlx5_eswitch_rep rep;
+
+ rep.load = mlx5e_vport_rep_load;
+ rep.unload = mlx5e_vport_rep_unload;
+ rep.vport = vport;
+ ether_addr_copy(rep.hw_id, mac);
+ mlx5_eswitch_register_vport_rep(esw, vport, &rep);
+ }
+}
+
+static void mlx5e_rep_unregister_vf_vports(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5_eswitch *esw = mdev->priv.eswitch;
+ int total_vfs = MLX5_TOTAL_VPORTS(mdev);
+ int vport;
+
+ for (vport = 1; vport < total_vfs; vpo