summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMaxim Mikityanskiy <maximmi@mellanox.com>2019-06-26 17:35:38 +0300
committerDaniel Borkmann <daniel@iogearbox.net>2019-06-27 22:53:28 +0200
commitdb05815b36cbd486c86fd002dfa81c9af6245e25 (patch)
tree314ae8af0962e283fc111760f0c373aff1e0352e
parent32a23653970ac2284e41258623448a5d0852f402 (diff)
net/mlx5e: Add XSK zero-copy support
This commit adds support for AF_XDP zero-copy RX and TX. We create a dedicated XSK RQ inside the channel, it means that two RQs are running simultaneously: one for non-XSK traffic and the other for XSK traffic. The regular and XSK RQs use a single ID namespace split into two halves: the lower half is regular RQs, and the upper half is XSK RQs. When any zero-copy AF_XDP socket is active, changing the number of channels is not allowed, because it would break to mapping between XSK RQ IDs and channels. XSK requires different page allocation and release routines. Such functions as mlx5e_{alloc,free}_rx_mpwqe and mlx5e_{get,put}_rx_frag are generic enough to be used for both regular and XSK RQs, and they use the mlx5e_page_{alloc,release} wrappers around the real allocation functions. Function pointers are not used to avoid losing the performance with retpolines. Wherever it's certain that the regular (non-XSK) page release function should be used, it's called directly. Only the stats that could be meaningful for XSK are exposed to the userspace. Those that don't take part in the XSK flow are not considered. Note that we don't wait for WQEs on the XSK RQ (unlike the regular RQ), because the newer xdpsock sample doesn't provide any Fill Ring entries at the setup stage. We create a dedicated XSK SQ in the channel. This separation has its advantages: 1. When the UMEM is closed, the XSK SQ can also be closed and stop receiving completions. If an existing SQ was used for XSK, it would continue receiving completions for the packets of the closed socket. If a new UMEM was opened at that point, it would start getting completions that don't belong to it. 2. Calculating statistics separately. When the userspace kicks the TX, the driver triggers a hardware interrupt by posting a NOP to a dedicated XSK ICO (internal control operations) SQ, in order to trigger NAPI on the right CPU core. This XSK ICO SQ is protected by a spinlock, as the userspace application may kick the TX from any core. Store the pointers to the UMEMs in the net device private context, independently from the kernel. This way the driver can distinguish between the zero-copy and non-zero-copy UMEMs. The kernel function xdp_get_umem_from_qid does not care about this difference, but the driver is only interested in zero-copy UMEMs, particularly, on the cleanup it determines whether to close the XSK RQ and SQ or not by looking at the presence of the UMEM. Use state_lock to protect the access to this area of UMEM pointers. LRO isn't compatible with XDP, but there may be active UMEMs while XDP is off. If this is the case, don't allow LRO to ensure XDP can be reenabled at any time. The validation of XSK parameters typically happens when XSK queues open. However, when the interface is down or the XDP program isn't set, it's still possible to have active AF_XDP sockets and even to open new, but the XSK queues will be closed. To cover these cases, perform the validation also in these flows: 1. A new UMEM is registered, but the XSK queues aren't going to be created due to missing XDP program or interface being down. 2. MTU changes while there are UMEMs registered. Having this early check prevents mlx5e_open_channels from failing at a later stage, where recovery is impossible and the application has no chance to handle the error, because it got the successful return value for an MTU change or XSK open operation. The performance testing was performed on a machine with the following configuration: - 24 cores of Intel Xeon E5-2620 v3 @ 2.40 GHz - Mellanox ConnectX-5 Ex with 100 Gbit/s link The results with retpoline disabled, single stream: txonly: 33.3 Mpps (21.5 Mpps with queue and app pinned to the same CPU) rxdrop: 12.2 Mpps l2fwd: 9.4 Mpps The results with retpoline enabled, single stream: txonly: 21.3 Mpps (14.1 Mpps with queue and app pinned to the same CPU) rxdrop: 9.9 Mpps l2fwd: 6.8 Mpps Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com> Signed-off-by: Tariq Toukan <tariqt@mellanox.com> Acked-by: Saeed Mahameed <saeedm@mellanox.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h98
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/params.c53
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/params.h77
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c103
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h18
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/Makefile1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c192
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h27
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c223
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h25
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c111
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c267
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h31
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c25
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c18
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c527
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c77
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.c115
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.h30
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c34
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c14
24 files changed, 1840 insertions, 255 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 5fe2bf916c06..90db891fcc22 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -24,7 +24,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \
en_selftest.o en/port.o en/monitor_stats.o en/reporter_tx.o \
- en/params.o
+ en/params.o en/xsk/umem.o en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o
#
# Netdev extra
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 010140753e73..cb00e622c006 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -137,6 +137,7 @@ struct page_pool;
#define MLX5E_MAX_NUM_CHANNELS (MLX5E_INDIR_RQT_SIZE >> 1)
#define MLX5E_MAX_NUM_SQS (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC)
#define MLX5E_TX_CQ_POLL_BUDGET 128
+#define MLX5E_TX_XSK_POLL_BUDGET 64
#define MLX5E_SQ_RECOVER_MIN_INTERVAL 500 /* msecs */
#define MLX5E_UMR_WQE_INLINE_SZ \
@@ -155,6 +156,11 @@ do { \
##__VA_ARGS__); \
} while (0)
+enum mlx5e_rq_group {
+ MLX5E_RQ_GROUP_REGULAR,
+ MLX5E_RQ_GROUP_XSK,
+ MLX5E_NUM_RQ_GROUPS /* Keep last. */
+};
static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size)
{
@@ -179,7 +185,8 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev)
/* Use this function to get max num channels after netdev was created */
static inline int mlx5e_get_netdev_max_channels(struct net_device *netdev)
{
- return min_t(unsigned int, netdev->num_rx_queues,
+ return min_t(unsigned int,
+ netdev->num_rx_queues / MLX5E_NUM_RQ_GROUPS,
netdev->num_tx_queues);
}
@@ -250,6 +257,7 @@ struct mlx5e_params {
u32 lro_timeout;
u32 pflags;
struct bpf_prog *xdp_prog;
+ struct mlx5e_xsk *xsk;
unsigned int sw_mtu;
int hard_mtu;
};
@@ -399,8 +407,14 @@ struct mlx5e_txqsq {
} ____cacheline_aligned_in_smp;
struct mlx5e_dma_info {
- struct page *page;
- dma_addr_t addr;
+ dma_addr_t addr;
+ union {
+ struct page *page;
+ struct {
+ u64 handle;
+ void *data;
+ } xsk;
+ };
};
/* XDP packets can be transmitted in different ways. On completion, we need to
@@ -467,9 +481,11 @@ struct mlx5e_xdp_mpwqe {
};
struct mlx5e_xdpsq;
+typedef int (*mlx5e_fp_xmit_xdp_frame_check)(struct mlx5e_xdpsq *);
typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq *,
struct mlx5e_xdp_xmit_data *,
- struct mlx5e_xdp_info *);
+ struct mlx5e_xdp_info *,
+ int);
struct mlx5e_xdpsq {
/* data path */
@@ -487,8 +503,10 @@ struct mlx5e_xdpsq {
struct mlx5e_cq cq;
/* read only */
+ struct xdp_umem *umem;
struct mlx5_wq_cyc wq;
struct mlx5e_xdpsq_stats *stats;
+ mlx5e_fp_xmit_xdp_frame_check xmit_xdp_frame_check;
mlx5e_fp_xmit_xdp_frame xmit_xdp_frame;
struct {
struct mlx5e_xdp_wqe_info *wqe_info;
@@ -619,6 +637,7 @@ struct mlx5e_rq {
} mpwqe;
};
struct {
+ u16 umem_headroom;
u16 headroom;
u8 map_dir; /* dma map direction */
} buff;
@@ -649,6 +668,10 @@ struct mlx5e_rq {
DECLARE_BITMAP(flags, 8);
struct page_pool *page_pool;
+ /* AF_XDP zero-copy */
+ struct zero_copy_allocator zca;
+ struct xdp_umem *umem;
+
/* control */
struct mlx5_wq_ctrl wq_ctrl;
__be32 mkey_be;
@@ -661,6 +684,11 @@ struct mlx5e_rq {
struct xdp_rxq_info xdp_rxq;
} ____cacheline_aligned_in_smp;
+enum mlx5e_channel_state {
+ MLX5E_CHANNEL_STATE_XSK,
+ MLX5E_CHANNEL_NUM_STATES
+};
+
struct mlx5e_channel {
/* data path */
struct mlx5e_rq rq;
@@ -677,6 +705,13 @@ struct mlx5e_channel {
/* XDP_REDIRECT */
struct mlx5e_xdpsq xdpsq;
+ /* AF_XDP zero-copy */
+ struct mlx5e_rq xskrq;
+ struct mlx5e_xdpsq xsksq;
+ struct mlx5e_icosq xskicosq;
+ /* xskicosq can be accessed from any CPU - the spinlock protects it. */
+ spinlock_t xskicosq_lock;
+
/* data path - accessed per napi poll */
struct irq_desc *irq_desc;
struct mlx5e_ch_stats *stats;
@@ -685,6 +720,7 @@ struct mlx5e_channel {
struct mlx5e_priv *priv;
struct mlx5_core_dev *mdev;
struct hwtstamp_config *tstamp;
+ DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES);
int ix;
int cpu;
cpumask_var_t xps_cpumask;
@@ -700,14 +736,17 @@ struct mlx5e_channel_stats {
struct mlx5e_ch_stats ch;
struct mlx5e_sq_stats sq[MLX5E_MAX_NUM_TC];
struct mlx5e_rq_stats rq;
+ struct mlx5e_rq_stats xskrq;
struct mlx5e_xdpsq_stats rq_xdpsq;
struct mlx5e_xdpsq_stats xdpsq;
+ struct mlx5e_xdpsq_stats xsksq;
} ____cacheline_aligned_in_smp;
enum {
MLX5E_STATE_OPENED,
MLX5E_STATE_DESTROYING,
MLX5E_STATE_XDP_TX_ENABLED,
+ MLX5E_STATE_XDP_OPEN,
};
struct mlx5e_rqt {
@@ -740,6 +779,17 @@ struct mlx5e_modify_sq_param {
int rl_index;
};
+struct mlx5e_xsk {
+ /* UMEMs are stored separately from channels, because we don't want to
+ * lose them when channels are recreated. The kernel also stores UMEMs,
+ * but it doesn't distinguish between zero-copy and non-zero-copy UMEMs,
+ * so rely on our mechanism.
+ */
+ struct xdp_umem **umems;
+ u16 refcnt;
+ bool ever_used;
+};
+
struct mlx5e_priv {
/* priv data path fields - start */
struct mlx5e_txqsq *txq2sq[MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC];
@@ -760,6 +810,7 @@ struct mlx5e_priv {
struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS];
struct mlx5e_tir inner_indir_tir[MLX5E_NUM_INDIR_TIRS];
struct mlx5e_tir direct_tir[MLX5E_MAX_NUM_CHANNELS];
+ struct mlx5e_tir xsk_tir[MLX5E_MAX_NUM_CHANNELS];
struct mlx5e_rss_params rss_params;
u32 tx_rates[MLX5E_MAX_NUM_SQS];
@@ -796,6 +847,7 @@ struct mlx5e_priv {
struct mlx5e_tls *tls;
#endif
struct devlink_health_reporter *tx_reporter;
+ struct mlx5e_xsk xsk;
};
struct mlx5e_profile {
@@ -839,8 +891,9 @@ bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev,
struct mlx5e_params *params);
void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info);
-void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info,
- bool recycle);
+void mlx5e_page_release_dynamic(struct mlx5e_rq *rq,
+ struct mlx5e_dma_info *dma_info,
+ bool recycle);
void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq);
@@ -900,6 +953,30 @@ void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_rss_params *rss_params,
void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen);
struct mlx5e_tirc_config mlx5e_tirc_get_default_config(enum mlx5e_traffic_types tt);
+struct mlx5e_xsk_param;
+
+struct mlx5e_rq_param;
+int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
+ struct mlx5e_rq_param *param, struct mlx5e_xsk_param *xsk,
+ struct xdp_umem *umem, struct mlx5e_rq *rq);
+int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time);
+void mlx5e_deactivate_rq(struct mlx5e_rq *rq);
+void mlx5e_close_rq(struct mlx5e_rq *rq);
+
+struct mlx5e_sq_param;
+int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params,
+ struct mlx5e_sq_param *param, struct mlx5e_icosq *sq);
+void mlx5e_close_icosq(struct mlx5e_icosq *sq);
+int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
+ struct mlx5e_sq_param *param, struct xdp_umem *umem,
+ struct mlx5e_xdpsq *sq, bool is_redirect);
+void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq);
+
+struct mlx5e_cq_param;
+int mlx5e_open_cq(struct mlx5e_channel *c, struct net_dim_cq_moder moder,
+ struct mlx5e_cq_param *param, struct mlx5e_cq *cq);
+void mlx5e_close_cq(struct mlx5e_cq *cq);
+
int mlx5e_open_locked(struct net_device *netdev);
int mlx5e_close_locked(struct net_device *netdev);
@@ -1070,10 +1147,10 @@ int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv);
int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc);
void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc);
-int mlx5e_create_direct_rqts(struct mlx5e_priv *priv);
-void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv);
-int mlx5e_create_direct_tirs(struct mlx5e_priv *priv);
-void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv);
+int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs);
+void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs);
+int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs);
+void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs);
void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt);
int mlx5e_create_tis(struct mlx5_core_dev *mdev, int tc,
@@ -1142,6 +1219,7 @@ void mlx5e_detach_netdev(struct mlx5e_priv *priv);
void mlx5e_destroy_netdev(struct mlx5e_priv *priv);
void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv);
void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
+ struct mlx5e_xsk *xsk,
struct mlx5e_rss_params *rss_params,
struct mlx5e_params *params,
u16 max_channels, u16 mtu);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index 0de908b12fcc..79301d116667 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -49,48 +49,56 @@ u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params,
return frag_sz;
}
-u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params)
+u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
{
- u32 linear_frag_sz = mlx5e_rx_get_linear_frag_sz(params, NULL);
+ u32 linear_frag_sz = mlx5e_rx_get_linear_frag_sz(params, xsk);
return MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(linear_frag_sz);
}
-bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params)
+bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
{
- u32 frag_sz = mlx5e_rx_get_linear_frag_sz(params, NULL);
+ /* AF_XDP allocates SKBs on XDP_PASS - ensure they don't occupy more
+ * than one page. For this, check both with and without xsk.
+ */
+ u32 linear_frag_sz = max(mlx5e_rx_get_linear_frag_sz(params, xsk),
+ mlx5e_rx_get_linear_frag_sz(params, NULL));
- return !params->lro_en && frag_sz <= PAGE_SIZE;
+ return !params->lro_en && linear_frag_sz <= PAGE_SIZE;
}
#define MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ ((BIT(__mlx5_bit_sz(wq, log_wqe_stride_size)) - 1) + \
MLX5_MPWQE_LOG_STRIDE_SZ_BASE)
bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params)
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
{
- u32 frag_sz = mlx5e_rx_get_linear_frag_sz(params, NULL);
+ u32 linear_frag_sz = mlx5e_rx_get_linear_frag_sz(params, xsk);
s8 signed_log_num_strides_param;
u8 log_num_strides;
- if (!mlx5e_rx_is_linear_skb(params))
+ if (!mlx5e_rx_is_linear_skb(params, xsk))
return false;
- if (order_base_2(frag_sz) > MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ)
+ if (order_base_2(linear_frag_sz) > MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ)
return false;
if (MLX5_CAP_GEN(mdev, ext_stride_num_range))
return true;
- log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(frag_sz);
+ log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(linear_frag_sz);
signed_log_num_strides_param =
(s8)log_num_strides - MLX5_MPWQE_LOG_NUM_STRIDES_BASE;
return signed_log_num_strides_param >= 0;
}
-u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params)
+u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
{
- u8 log_pkts_per_wqe = mlx5e_mpwqe_log_pkts_per_wqe(params);
+ u8 log_pkts_per_wqe = mlx5e_mpwqe_log_pkts_per_wqe(params, xsk);
/* Numbers are unsigned, don't subtract to avoid underflow. */
if (params->log_rq_mtu_frames <
@@ -101,27 +109,30 @@ u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params)
}
u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params)
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
{
- if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params))
- return order_base_2(mlx5e_rx_get_linear_frag_sz(params, NULL));
+ if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk))
+ return order_base_2(mlx5e_rx_get_linear_frag_sz(params, xsk));
return MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev);
}
u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params)
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
{
return MLX5_MPWRQ_LOG_WQE_SZ -
- mlx5e_mpwqe_get_log_stride_size(mdev, params);
+ mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
}
u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params)
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
{
bool is_linear_skb = (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC) ?
- mlx5e_rx_is_linear_skb(params) :
- mlx5e_rx_mpwqe_is_linear_skb(mdev, params);
+ mlx5e_rx_is_linear_skb(params, xsk) :
+ mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk);
- return is_linear_skb ? mlx5e_get_linear_rq_headroom(params, NULL) : 0;
+ return is_linear_skb ? mlx5e_get_linear_rq_headroom(params, xsk) : 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
index f83417b822bf..bd882b5ee9a7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
@@ -40,22 +40,85 @@ struct mlx5e_channel_param {
struct mlx5e_cq_param icosq_cq;
};
+static inline bool mlx5e_qid_get_ch_if_in_group(struct mlx5e_params *params,
+ u16 qid,
+ enum mlx5e_rq_group group,
+ u16 *ix)
+{
+ int nch = params->num_channels;
+ int ch = qid - nch * group;
+
+ if (ch < 0 || ch >= nch)
+ return false;
+
+ *ix = ch;
+ return true;
+}
+
+static inline void mlx5e_qid_get_ch_and_group(struct mlx5e_params *params,
+ u16 qid,
+ u16 *ix,
+ enum mlx5e_rq_group *group)
+{
+ u16 nch = params->num_channels;
+
+ *ix = qid % nch;
+ *group = qid / nch;
+}
+
+static inline bool mlx5e_qid_validate(struct mlx5e_params *params, u64 qid)
+{
+ return qid < params->num_channels * MLX5E_NUM_RQ_GROUPS;
+}
+
/* Parameter calculations */
u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk);
u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk);
-u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params);
-bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params);
+u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
+bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params);
-u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params);
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
+u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params);
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params);
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params);
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
+
+/* Build queue parameters */
+
+void mlx5e_build_rq_param(struct mlx5e_priv *priv,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ struct mlx5e_rq_param *param);
+void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
+ struct mlx5e_sq_param *param);
+void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ struct mlx5e_cq_param *param);
+void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
+ struct mlx5e_params *params,
+ struct mlx5e_cq_param *param);
+void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv,
+ u8 log_wq_size,
+ struct mlx5e_cq_param *param);
+void mlx5e_build_icosq_param(struct mlx5e_priv *priv,
+ u8 log_wq_size,
+ struct mlx5e_sq_param *param);
+void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv,
+ struct mlx5e_params *params,
+ struct mlx5e_sq_param *param);
#endif /* __MLX5_EN_PARAMS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index ee99efde9143..b0b982cf69bb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -31,6 +31,7 @@
*/
#include <linux/bpf_trace.h>
+#include <net/xdp_sock.h>
#include "en/xdp.h"
#include "en/params.h"
@@ -113,12 +114,12 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
xdpi.page.di = *di;
}
- return sq->xmit_xdp_frame(sq, &xdptxd, &xdpi);
+ return sq->xmit_xdp_frame(sq, &xdptxd, &xdpi, 0);
}
/* returns true if packet was consumed by xdp */
bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
- void *va, u16 *rx_headroom, u32 *len)
+ void *va, u16 *rx_headroom, u32 *len, bool xsk)
{
struct bpf_prog *prog = READ_ONCE(rq->xdp_prog);
struct xdp_buff xdp;
@@ -132,9 +133,13 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
xdp_set_data_meta_invalid(&xdp);
xdp.data_end = xdp.data + *len;
xdp.data_hard_start = va;
+ if (xsk)
+ xdp.handle = di->xsk.handle;
xdp.rxq = &rq->xdp_rxq;
act = bpf_prog_run_xdp(prog, &xdp);
+ if (xsk)
+ xdp.handle += xdp.data - xdp.data_hard_start;
switch (act) {
case XDP_PASS:
*rx_headroom = xdp.data - xdp.data_hard_start;
@@ -152,7 +157,8 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
goto xdp_abort;
__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags);
__set_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags);
- mlx5e_page_dma_unmap(rq, di);
+ if (!xsk)
+ mlx5e_page_dma_unmap(rq, di);
rq->stats->xdp_redirect++;
return true;
default:
@@ -206,7 +212,7 @@ static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
stats->mpwqe++;
}
-static void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq)
+void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq)
{
struct mlx5_wq_cyc *wq = &sq->wq;
struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
@@ -229,9 +235,32 @@ static void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq)
session->wqe = NULL; /* Close session */
}
+enum {
+ MLX5E_XDP_CHECK_OK = 1,
+ MLX5E_XDP_CHECK_START_MPWQE = 2,
+};
+
+static int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq)
+{
+ if (unlikely(!sq->mpwqe.wqe)) {
+ if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc,
+ MLX5_SEND_WQE_MAX_WQEBBS))) {
+ /* SQ is full, ring doorbell */
+ mlx5e_xmit_xdp_doorbell(sq);
+ sq->stats->full++;
+ return -EBUSY;
+ }
+
+ return MLX5E_XDP_CHECK_START_MPWQE;
+ }
+
+ return MLX5E_XDP_CHECK_OK;
+}
+
static bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq,
struct mlx5e_xdp_xmit_data *xdptxd,
- struct mlx5e_xdp_info *xdpi)
+ struct mlx5e_xdp_info *xdpi,
+ int check_result)
{
struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
struct mlx5e_xdpsq_stats *stats = sq->stats;
@@ -241,15 +270,16 @@ static bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq,
return false;
}
- if (unlikely(!session->wqe)) {
- if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc,
- MLX5_SEND_WQE_MAX_WQEBBS))) {
- /* SQ is full, ring doorbell */
- mlx5e_xmit_xdp_doorbell(sq);
- stats->full++;
- return false;
- }
+ if (!check_result)
+ check_result = mlx5e_xmit_xdp_frame_check_mpwqe(sq);
+ if (unlikely(check_result < 0))
+ return false;
+ if (check_result == MLX5E_XDP_CHECK_START_MPWQE) {
+ /* Start the session when nothing can fail, so it's guaranteed
+ * that if there is an active session, it has at least one dseg,
+ * and it's safe to complete it at any time.
+ */
mlx5e_xdp_mpwqe_session_start(sq);
}
@@ -264,9 +294,22 @@ static bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq,
return true;
}
+static int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq)
+{
+ if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, 1))) {
+ /* SQ is full, ring doorbell */
+ mlx5e_xmit_xdp_doorbell(sq);
+ sq->stats->full++;
+ return -EBUSY;
+ }
+
+ return MLX5E_XDP_CHECK_OK;
+}
+
static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq,
struct mlx5e_xdp_xmit_data *xdptxd,
- struct mlx5e_xdp_info *xdpi)
+ struct mlx5e_xdp_info *xdpi,
+ int check_result)
{
struct mlx5_wq_cyc *wq = &sq->wq;
u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
@@ -288,12 +331,10 @@ static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq,
return false;
}
- if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1))) {
- /* SQ is full, ring doorbell */
- mlx5e_xmit_xdp_doorbell(sq);
- stats->full++;
+ if (!check_result)
+ check_result = mlx5e_xmit_xdp_frame_check(sq);
+ if (unlikely(check_result < 0))
return false;
- }
cseg->fm_ce_se = 0;
@@ -323,6 +364,7 @@ static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq,
static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq,
struct mlx5e_xdp_wqe_info *wi,
+ u32 *xsk_frames,
bool recycle)
{
struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo;
@@ -340,7 +382,11 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq,
break;
case MLX5E_XDP_XMIT_MODE_PAGE:
/* XDP_TX from the regular RQ */
- mlx5e_page_release(xdpi.page.rq, &xdpi.page.di, recycle);
+ mlx5e_page_release_dynamic(xdpi.page.rq, &xdpi.page.di, recycle);
+ break;
+ case MLX5E_XDP_XMIT_MODE_XSK:
+ /* AF_XDP send */
+ (*xsk_frames)++;
break;
default:
WARN_ON_ONCE(true);
@@ -352,6 +398,7 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
{
struct mlx5e_xdpsq *sq;
struct mlx5_cqe64 *cqe;
+ u32 xsk_frames = 0;
u16 sqcc;
int i;
@@ -393,10 +440,13 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
sqcc += wi->num_wqebbs;
- mlx5e_free_xdpsq_desc(sq, wi, true);
+ mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, true);
} while (!last_wqe);
} while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
+ if (xsk_frames)
+ xsk_umem_complete_tx(sq->umem, xsk_frames);
+
sq->stats->cqes += i;
mlx5_cqwq_update_db_record(&cq->wq);
@@ -410,6 +460,8 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq)
{
+ u32 xsk_frames = 0;
+
while (sq->cc != sq->pc) {
struct mlx5e_xdp_wqe_info *wi;
u16 ci;
@@ -419,8 +471,11 @@ void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq)
sq->cc += wi->num_wqebbs;
- mlx5e_free_xdpsq_desc(sq, wi, false);
+ mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, false);
}
+
+ if (xsk_frames)
+ xsk_umem_complete_tx(sq->umem, xsk_frames);
}
int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
@@ -466,7 +521,7 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
xdpi.frame.xdpf = xdpf;
xdpi.frame.dma_addr = xdptxd.dma_addr;
- if (unlikely(!sq->xmit_xdp_frame(sq, &xdptxd, &xdpi))) {
+ if (unlikely(!sq->xmit_xdp_frame(sq, &xdptxd, &xdpi, 0))) {
dma_unmap_single(sq->pdev, xdptxd.dma_addr,
xdptxd.len, DMA_TO_DEVICE);
xdp_return_frame_rx_napi(xdpf);
@@ -500,6 +555,8 @@ void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq)
void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw)
{
+ sq->xmit_xdp_frame_check = is_mpw ?
+ mlx5e_xmit_xdp_frame_check_mpwqe : mlx5e_xmit_xdp_frame_check;
sq->xmit_xdp_frame = is_mpw ?
mlx5e_xmit_xdp_frame_mpwqe : mlx5e_xmit_xdp_frame;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
index 9200cb9f499b..2d934c8d3807 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
@@ -42,7 +42,8 @@
struct mlx5e_xsk_param;
int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk);
bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
- void *va, u16 *rx_headroom, u32 *len);
+ void *va, u16 *rx_headroom, u32 *len, bool xsk);
+void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq);
bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq);
void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq);
void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw);
@@ -67,6 +68,21 @@ static inline bool mlx5e_xdp_tx_is_enabled(struct mlx5e_priv *priv)
return test_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state);
}
+static inline void mlx5e_xdp_set_open(struct mlx5e_priv *priv)
+{
+ set_bit(MLX5E_STATE_XDP_OPEN, &priv->state);
+}
+
+static inline void mlx5e_xdp_set_closed(struct mlx5e_priv *priv)
+{
+ clear_bit(MLX5E_STATE_XDP_OPEN, &priv->state);
+}
+
+static inline bool mlx5e_xdp_is_open(struct mlx5e_priv *priv)
+{
+ return test_bit(MLX5E_STATE_XDP_OPEN, &priv->state);
+}
+
static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq)
{
if (sq->doorbell_cseg) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/Makefile
new file mode 100644
index 000000000000..5ee42991900a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/Makefile
@@ -0,0 +1 @@
+subdir-ccflags-y += -I$(src)/../..
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
new file mode 100644
index 000000000000..6a55573ec8f2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
@@ -0,0 +1,192 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "rx.h"
+#include "en/xdp.h"
+#include <net/xdp_sock.h>
+
+/* RX data path */
+
+bool mlx5e_xsk_pages_enough_umem(struct mlx5e_rq *rq, int count)
+{