summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2020-09-14 14:07:58 -0700
committerDavid S. Miller <davem@davemloft.net>2020-09-14 14:07:58 -0700
commit7952d7edf30beedbfa67414c31eab18584710fba (patch)
tree566c50f90eb8e2aed30f08a6314c67cbfadf3750
parente0d9ae699e3ab5b907b9bc826ea37dba50d29dbd (diff)
parent8cbf74149903382cf422c653d4ee8fb2f44e3ec8 (diff)
Merge branch '40GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue
Tony Nguyen says: ==================== 40GbE Intel Wired LAN Driver Updates 2020-09-14 This series contains updates to i40e driver only. Li RongQing removes binding affinity mask to a fixed CPU and sets prefetch of Rx buffer page to occur conditionally. Björn provides AF_XDP performance improvements by not prefetching HW descriptors, using 16 byte descriptors, and moving buffer allocation out of Rx processing loop. v2: Define prefetch_page_address in a common header for patch 2. Dropped, previous, patch 5 as it is being reworked to be more generalized. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e.h2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_debugfs.c10
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c16
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_trace.h6
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c21
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.h2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx_common.h13
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_type.h5
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_xsk.c24
-rw-r--r--include/linux/prefetch.h8
10 files changed, 59 insertions, 48 deletions
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index a7e212d1caa2..ada0e93c38f0 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -90,7 +90,7 @@
#define I40E_OEM_RELEASE_MASK 0x0000ffff
#define I40E_RX_DESC(R, i) \
- (&(((union i40e_32byte_rx_desc *)((R)->desc))[i]))
+ (&(((union i40e_rx_desc *)((R)->desc))[i]))
#define I40E_TX_DESC(R, i) \
(&(((struct i40e_tx_desc *)((R)->desc))[i]))
#define I40E_TX_CTXTDESC(R, i) \
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index d3ad2e3aa838..d7c13ca9be7d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -604,10 +604,9 @@ static void i40e_dbg_dump_desc(int cnt, int vsi_seid, int ring_id, int desc_n,
} else {
rxd = I40E_RX_DESC(ring, i);
dev_info(&pf->pdev->dev,
- " d[%03x] = 0x%016llx 0x%016llx 0x%016llx 0x%016llx\n",
+ " d[%03x] = 0x%016llx 0x%016llx\n",
i, rxd->read.pkt_addr,
- rxd->read.hdr_addr,
- rxd->read.rsvd1, rxd->read.rsvd2);
+ rxd->read.hdr_addr);
}
}
} else if (cnt == 3) {
@@ -625,10 +624,9 @@ static void i40e_dbg_dump_desc(int cnt, int vsi_seid, int ring_id, int desc_n,
} else {
rxd = I40E_RX_DESC(ring, desc_n);
dev_info(&pf->pdev->dev,
- "vsi = %02i rx ring = %02i d[%03x] = 0x%016llx 0x%016llx 0x%016llx 0x%016llx\n",
+ "vsi = %02i rx ring = %02i d[%03x] = 0x%016llx 0x%016llx\n",
vsi_seid, ring_id, desc_n,
- rxd->read.pkt_addr, rxd->read.hdr_addr,
- rxd->read.rsvd1, rxd->read.rsvd2);
+ rxd->read.pkt_addr, rxd->read.hdr_addr);
}
} else {
dev_info(&pf->pdev->dev, "dump desc rx/tx/xdp <vsi_seid> <ring_id> [<desc_n>]\n");
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 05c6d3ea11e6..07207e21874f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -3321,8 +3321,8 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
rx_ctx.base = (ring->dma / 128);
rx_ctx.qlen = ring->count;
- /* use 32 byte descriptors */
- rx_ctx.dsize = 1;
+ /* use 16 byte descriptors */
+ rx_ctx.dsize = 0;
/* descriptor type is always zero
* rx_ctx.dtype = 0;
@@ -11186,11 +11186,10 @@ static int i40e_init_msix(struct i40e_pf *pf)
* i40e_vsi_alloc_q_vector - Allocate memory for a single interrupt vector
* @vsi: the VSI being configured
* @v_idx: index of the vector in the vsi struct
- * @cpu: cpu to be used on affinity_mask
*
* We allocate one q_vector. If allocation fails we return -ENOMEM.
**/
-static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx, int cpu)
+static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx)
{
struct i40e_q_vector *q_vector;
@@ -11223,7 +11222,7 @@ static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx, int cpu)
static int i40e_vsi_alloc_q_vectors(struct i40e_vsi *vsi)
{
struct i40e_pf *pf = vsi->back;
- int err, v_idx, num_q_vectors, current_cpu;
+ int err, v_idx, num_q_vectors;
/* if not MSIX, give the one vector only to the LAN VSI */
if (pf->flags & I40E_FLAG_MSIX_ENABLED)
@@ -11233,15 +11232,10 @@ static int i40e_vsi_alloc_q_vectors(struct i40e_vsi *vsi)
else
return -EINVAL;
- current_cpu = cpumask_first(cpu_online_mask);
-
for (v_idx = 0; v_idx < num_q_vectors; v_idx++) {
- err = i40e_vsi_alloc_q_vector(vsi, v_idx, current_cpu);
+ err = i40e_vsi_alloc_q_vector(vsi, v_idx);
if (err)
goto err_out;
- current_cpu = cpumask_next(current_cpu, cpu_online_mask);
- if (unlikely(current_cpu >= nr_cpu_ids))
- current_cpu = cpumask_first(cpu_online_mask);
}
return 0;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_trace.h b/drivers/net/ethernet/intel/i40e/i40e_trace.h
index 424f02077e2e..983f8b98b275 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_trace.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_trace.h
@@ -112,7 +112,7 @@ DECLARE_EVENT_CLASS(
i40e_rx_template,
TP_PROTO(struct i40e_ring *ring,
- union i40e_32byte_rx_desc *desc,
+ union i40e_16byte_rx_desc *desc,
struct sk_buff *skb),
TP_ARGS(ring, desc, skb),
@@ -140,7 +140,7 @@ DECLARE_EVENT_CLASS(
DEFINE_EVENT(
i40e_rx_template, i40e_clean_rx_irq,
TP_PROTO(struct i40e_ring *ring,
- union i40e_32byte_rx_desc *desc,
+ union i40e_16byte_rx_desc *desc,
struct sk_buff *skb),
TP_ARGS(ring, desc, skb));
@@ -148,7 +148,7 @@ DEFINE_EVENT(
DEFINE_EVENT(
i40e_rx_template, i40e_clean_rx_irq_rx,
TP_PROTO(struct i40e_ring *ring,
- union i40e_32byte_rx_desc *desc,
+ union i40e_16byte_rx_desc *desc,
struct sk_buff *skb),
TP_ARGS(ring, desc, skb));
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 91ab824926b9..1606ba5318f7 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -533,11 +533,11 @@ static void i40e_fd_handle_status(struct i40e_ring *rx_ring, u64 qword0_raw,
{
struct i40e_pf *pf = rx_ring->vsi->back;
struct pci_dev *pdev = pf->pdev;
- struct i40e_32b_rx_wb_qw0 *qw0;
+ struct i40e_16b_rx_wb_qw0 *qw0;
u32 fcnt_prog, fcnt_avail;
u32 error;
- qw0 = (struct i40e_32b_rx_wb_qw0 *)&qword0_raw;
+ qw0 = (struct i40e_16b_rx_wb_qw0 *)&qword0_raw;
error = (qword1 & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >>
I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT;
@@ -1418,7 +1418,7 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
u64_stats_init(&rx_ring->syncp);
/* Round up to nearest 4K */
- rx_ring->size = rx_ring->count * sizeof(union i40e_32byte_rx_desc);
+ rx_ring->size = rx_ring->count * sizeof(union i40e_rx_desc);
rx_ring->size = ALIGN(rx_ring->size, 4096);
rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
&rx_ring->dma, GFP_KERNEL);
@@ -1953,7 +1953,7 @@ static struct i40e_rx_buffer *i40e_get_rx_buffer(struct i40e_ring *rx_ring,
struct i40e_rx_buffer *rx_buffer;
rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
- prefetchw(rx_buffer->page);
+ prefetch_page_address(rx_buffer->page);
/* we are reusing so sync this buffer for CPU use */
dma_sync_single_range_for_cpu(rx_ring->dev,
@@ -2296,6 +2296,19 @@ void i40e_finalize_xdp_rx(struct i40e_ring *rx_ring, unsigned int xdp_res)
}
/**
+ * i40e_inc_ntc: Advance the next_to_clean index
+ * @rx_ring: Rx ring
+ **/
+static void i40e_inc_ntc(struct i40e_ring *rx_ring)
+{
+ u32 ntc = rx_ring->next_to_clean + 1;
+
+ ntc = (ntc < rx_ring->count) ? ntc : 0;
+ rx_ring->next_to_clean = ntc;
+ prefetch(I40E_RX_DESC(rx_ring, ntc));
+}
+
+/**
* i40e_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
* @rx_ring: rx descriptor ring to transact packets on
* @budget: Total limit on number of packets to process
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index 703b644fd71f..66c2b92c0d10 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -110,7 +110,7 @@ enum i40e_dyn_idx_t {
*/
#define I40E_RX_HDR_SIZE I40E_RXBUFFER_256
#define I40E_PACKET_HDR_PAD (ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2))
-#define i40e_rx_desc i40e_32byte_rx_desc
+#define i40e_rx_desc i40e_16byte_rx_desc
#define I40E_RX_DMA_ATTR \
(DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h b/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h
index 667c4dc4b39f..1397dd3c1c57 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h
@@ -99,19 +99,6 @@ static inline bool i40e_rx_is_programming_status(u64 qword1)
return qword1 & I40E_RXD_QW1_LENGTH_SPH_MASK;
}
-/**
- * i40e_inc_ntc: Advance the next_to_clean index
- * @rx_ring: Rx ring
- **/
-static inline void i40e_inc_ntc(struct i40e_ring *rx_ring)
-{
- u32 ntc = rx_ring->next_to_clean + 1;
-
- ntc = (ntc < rx_ring->count) ? ntc : 0;
- rx_ring->next_to_clean = ntc;
- prefetch(I40E_RX_DESC(rx_ring, ntc));
-}
-
void i40e_xsk_clean_rx_ring(struct i40e_ring *rx_ring);
void i40e_xsk_clean_tx_ring(struct i40e_ring *tx_ring);
bool i40e_xsk_any_rx_ring_enabled(struct i40e_vsi *vsi);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h
index 52410d609ba1..97d29df65f9e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_type.h
@@ -628,7 +628,7 @@ union i40e_16byte_rx_desc {
__le64 hdr_addr; /* Header buffer address */
} read;
struct {
- struct {
+ struct i40e_16b_rx_wb_qw0 {
struct {
union {
__le16 mirroring_status;
@@ -647,6 +647,9 @@ union i40e_16byte_rx_desc {
__le64 status_error_len;
} qword1;
} wb; /* writeback */
+ struct {
+ u64 qword[2];
+ } raw;
};
union i40e_32byte_rx_desc {
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index 2a1153d8957b..6acede0acdca 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -258,6 +258,18 @@ static struct sk_buff *i40e_construct_skb_zc(struct i40e_ring *rx_ring,
}
/**
+ * i40e_inc_ntc: Advance the next_to_clean index
+ * @rx_ring: Rx ring
+ **/
+static void i40e_inc_ntc(struct i40e_ring *rx_ring)
+{
+ u32 ntc = rx_ring->next_to_clean + 1;
+
+ ntc = (ntc < rx_ring->count) ? ntc : 0;
+ rx_ring->next_to_clean = ntc;
+}
+
+/**
* i40e_clean_rx_irq_zc - Consumes Rx packets from the hardware ring
* @rx_ring: Rx ring
* @budget: NAPI budget
@@ -269,8 +281,8 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
unsigned int total_rx_bytes = 0, total_rx_packets = 0;
u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
unsigned int xdp_res, xdp_xmit = 0;
- bool failure = false;
struct sk_buff *skb;
+ bool failure;
while (likely(total_rx_packets < (unsigned int)budget)) {
union i40e_rx_desc *rx_desc;
@@ -278,13 +290,6 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
unsigned int size;
u64 qword;
- if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
- failure = failure ||
- !i40e_alloc_rx_buffers_zc(rx_ring,
- cleaned_count);
- cleaned_count = 0;
- }
-
rx_desc = I40E_RX_DESC(rx_ring, rx_ring->next_to_clean);
qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
@@ -359,6 +364,9 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
napi_gro_receive(&rx_ring->q_vector->napi, skb);
}
+ if (cleaned_count >= I40E_RX_BUFFER_WRITE)
+ failure = !i40e_alloc_rx_buffers_zc(rx_ring, cleaned_count);
+
i40e_finalize_xdp_rx(rx_ring, xdp_xmit);
i40e_update_rx_stats(rx_ring, total_rx_bytes, total_rx_packets);
diff --git a/include/linux/prefetch.h b/include/linux/prefetch.h
index 13eafebf3549..b83a3f944f28 100644
--- a/include/linux/prefetch.h
+++ b/include/linux/prefetch.h
@@ -15,6 +15,7 @@
#include <asm/processor.h>
#include <asm/cache.h>
+struct page;
/*
prefetch(x) attempts to pre-emptively get the memory pointed to
by address "x" into the CPU L1 cache.
@@ -62,4 +63,11 @@ static inline void prefetch_range(void *addr, size_t len)
#endif
}
+static inline void prefetch_page_address(struct page *page)
+{
+#if defined(WANT_PAGE_VIRTUAL) || defined(HASHED_PAGE_VIRTUAL)
+ prefetch(page);
+#endif
+}
+
#endif