summaryrefslogtreecommitdiffstats
path: root/drivers/net
diff options
context:
space:
mode:
authorBen Hutchings <bhutchings@solarflare.com>2013-08-29 19:19:29 +0100
committerBen Hutchings <bhutchings@solarflare.com>2013-08-29 19:19:29 +0100
commit8127d661e77f5ec410093bce411f540afa34593f (patch)
treee2aeec27754a0656cd8093cd51d7ef2a3450bf1b /drivers/net
parent4c75b43a7795671a52a002034d370ea1352f95c8 (diff)
sfc: Add support for Solarflare SFC9100 family
This adds support for the EF10 network controller architecture and the SFC9100 family, starting with SFC9120 'Farmingdale', and bumps the driver version to 4.0. New features in the SFC9100 family include: - Flexible allocation of internal resources to PCIe physical and virtual functions under firmware control - RX event merging to reduce DMA writes at high packet rates - Integrated RX timestamping - PIO buffers for lower TX latency - Firmware-driven data path that supports additional offload features and filter types - Delivery of packets between functions and to multiple recipients, allowing firmware to implement a vswitch - Multiple RX flow hash (RSS) contexts with their own hash keys and indirection tables - 40G MAC (single port only) ...not all of which are enabled in this initial driver or the initial firmware release. Much of the new code is by Jon Cooper. Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Diffstat (limited to 'drivers/net')
-rw-r--r--drivers/net/ethernet/sfc/Kconfig9
-rw-r--r--drivers/net/ethernet/sfc/Makefile4
-rw-r--r--drivers/net/ethernet/sfc/bitfield.h4
-rw-r--r--drivers/net/ethernet/sfc/ef10.c3043
-rw-r--r--drivers/net/ethernet/sfc/efx.c34
-rw-r--r--drivers/net/ethernet/sfc/ethtool.c2
-rw-r--r--drivers/net/ethernet/sfc/mcdi.c66
-rw-r--r--drivers/net/ethernet/sfc/mcdi.h8
-rw-r--r--drivers/net/ethernet/sfc/mcdi_port.c21
-rw-r--r--drivers/net/ethernet/sfc/net_driver.h6
-rw-r--r--drivers/net/ethernet/sfc/nic.h79
-rw-r--r--drivers/net/ethernet/sfc/workarounds.h6
12 files changed, 3260 insertions, 22 deletions
diff --git a/drivers/net/ethernet/sfc/Kconfig b/drivers/net/ethernet/sfc/Kconfig
index 4136ccc4a954..8b7152565c5e 100644
--- a/drivers/net/ethernet/sfc/Kconfig
+++ b/drivers/net/ethernet/sfc/Kconfig
@@ -1,5 +1,5 @@
config SFC
- tristate "Solarflare SFC4000/SFC9000-family support"
+ tristate "Solarflare SFC4000/SFC9000/SFC9100-family support"
depends on PCI
select MDIO
select CRC32
@@ -8,12 +8,13 @@ config SFC
select PTP_1588_CLOCK
---help---
This driver supports 10-gigabit Ethernet cards based on
- the Solarflare SFC4000 and SFC9000-family controllers.
+ the Solarflare SFC4000, SFC9000-family and SFC9100-family
+ controllers.
To compile this driver as a module, choose M here. The module
will be called sfc.
config SFC_MTD
- bool "Solarflare SFC4000/SFC9000-family MTD support"
+ bool "Solarflare SFC4000/SFC9000/SFC9100-family MTD support"
depends on SFC && MTD && !(SFC=y && MTD=m)
default y
---help---
@@ -21,7 +22,7 @@ config SFC_MTD
(e.g. /dev/mtd1). This is required to update the firmware or
the boot configuration under Linux.
config SFC_MCDI_MON
- bool "Solarflare SFC9000-family hwmon support"
+ bool "Solarflare SFC9000/SFC9100-family hwmon support"
depends on SFC && HWMON && !(SFC=y && HWMON=m)
default y
---help---
diff --git a/drivers/net/ethernet/sfc/Makefile b/drivers/net/ethernet/sfc/Makefile
index a61272661a73..3a83c0dca8e6 100644
--- a/drivers/net/ethernet/sfc/Makefile
+++ b/drivers/net/ethernet/sfc/Makefile
@@ -1,5 +1,5 @@
-sfc-y += efx.o nic.o farch.o falcon.o siena.o tx.o rx.o \
- selftest.o ethtool.o qt202x_phy.o mdio_10g.o \
+sfc-y += efx.o nic.o farch.o falcon.o siena.o ef10.o tx.o \
+ rx.o selftest.o ethtool.o qt202x_phy.o mdio_10g.o \
tenxpress.o txc43128_phy.o falcon_boards.o \
mcdi.o mcdi_port.o mcdi_mon.o ptp.o
sfc-$(CONFIG_SFC_MTD) += mtd.o
diff --git a/drivers/net/ethernet/sfc/bitfield.h b/drivers/net/ethernet/sfc/bitfield.h
index 5400a33f254f..f45b0db94694 100644
--- a/drivers/net/ethernet/sfc/bitfield.h
+++ b/drivers/net/ethernet/sfc/bitfield.h
@@ -29,6 +29,10 @@
/* Lowest bit numbers and widths */
#define EFX_DUMMY_FIELD_LBN 0
#define EFX_DUMMY_FIELD_WIDTH 0
+#define EFX_WORD_0_LBN 0
+#define EFX_WORD_0_WIDTH 16
+#define EFX_WORD_1_LBN 16
+#define EFX_WORD_1_WIDTH 16
#define EFX_DWORD_0_LBN 0
#define EFX_DWORD_0_WIDTH 32
#define EFX_DWORD_1_LBN 32
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
new file mode 100644
index 000000000000..5f42313b4965
--- /dev/null
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -0,0 +1,3043 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2012-2013 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include "net_driver.h"
+#include "ef10_regs.h"
+#include "io.h"
+#include "mcdi.h"
+#include "mcdi_pcol.h"
+#include "nic.h"
+#include "workarounds.h"
+#include <linux/in.h>
+#include <linux/jhash.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+
+/* Hardware control for EF10 architecture including 'Huntington'. */
+
+#define EFX_EF10_DRVGEN_EV 7
+enum {
+ EFX_EF10_TEST = 1,
+ EFX_EF10_REFILL,
+};
+
+/* The reserved RSS context value */
+#define EFX_EF10_RSS_CONTEXT_INVALID 0xffffffff
+
+/* The filter table(s) are managed by firmware and we have write-only
+ * access. When removing filters we must identify them to the
+ * firmware by a 64-bit handle, but this is too wide for Linux kernel
+ * interfaces (32-bit for RX NFC, 16-bit for RFS). Also, we need to
+ * be able to tell in advance whether a requested insertion will
+ * replace an existing filter. Therefore we maintain a software hash
+ * table, which should be at least as large as the hardware hash
+ * table.
+ *
+ * Huntington has a single 8K filter table shared between all filter
+ * types and both ports.
+ */
+#define HUNT_FILTER_TBL_ROWS 8192
+
+struct efx_ef10_filter_table {
+/* The RX match field masks supported by this fw & hw, in order of priority */
+ enum efx_filter_match_flags rx_match_flags[
+ MC_CMD_GET_PARSER_DISP_INFO_OUT_SUPPORTED_MATCHES_MAXNUM];
+ unsigned int rx_match_count;
+
+ struct {
+ unsigned long spec; /* pointer to spec plus flag bits */
+/* BUSY flag indicates that an update is in progress. STACK_OLD is
+ * used to mark and sweep stack-owned MAC filters.
+ */
+#define EFX_EF10_FILTER_FLAG_BUSY 1UL
+#define EFX_EF10_FILTER_FLAG_STACK_OLD 2UL
+#define EFX_EF10_FILTER_FLAGS 3UL
+ u64 handle; /* firmware handle */
+ } *entry;
+ wait_queue_head_t waitq;
+/* Shadow of net_device address lists, guarded by mac_lock */
+#define EFX_EF10_FILTER_STACK_UC_MAX 32
+#define EFX_EF10_FILTER_STACK_MC_MAX 256
+ struct {
+ u8 addr[ETH_ALEN];
+ u16 id;
+ } stack_uc_list[EFX_EF10_FILTER_STACK_UC_MAX],
+ stack_mc_list[EFX_EF10_FILTER_STACK_MC_MAX];
+ int stack_uc_count; /* negative for PROMISC */
+ int stack_mc_count; /* negative for PROMISC/ALLMULTI */
+};
+
+/* An arbitrary search limit for the software hash table */
+#define EFX_EF10_FILTER_SEARCH_LIMIT 200
+
+static void efx_ef10_rx_push_indir_table(struct efx_nic *efx);
+static void efx_ef10_rx_free_indir_table(struct efx_nic *efx);
+static void efx_ef10_filter_table_remove(struct efx_nic *efx);
+
+static int efx_ef10_get_warm_boot_count(struct efx_nic *efx)
+{
+ efx_dword_t reg;
+
+ efx_readd(efx, &reg, ER_DZ_BIU_MC_SFT_STATUS);
+ return EFX_DWORD_FIELD(reg, EFX_WORD_1) == 0xb007 ?
+ EFX_DWORD_FIELD(reg, EFX_WORD_0) : -EIO;
+}
+
+static unsigned int efx_ef10_mem_map_size(struct efx_nic *efx)
+{
+ return resource_size(&efx->pci_dev->resource[EFX_MEM_BAR]);
+}
+
+static int efx_ef10_init_capabilities(struct efx_nic *efx)
+{
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_CAPABILITIES_OUT_LEN);
+ struct efx_ef10_nic_data *nic_data = efx->nic_data;
+ size_t outlen;
+ int rc;
+
+ BUILD_BUG_ON(MC_CMD_GET_CAPABILITIES_IN_LEN != 0);
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_GET_CAPABILITIES, NULL, 0,
+ outbuf, sizeof(outbuf), &outlen);
+ if (rc)
+ return rc;
+
+ if (outlen >= sizeof(outbuf)) {
+ nic_data->datapath_caps =
+ MCDI_DWORD(outbuf, GET_CAPABILITIES_OUT_FLAGS1);
+ if (!(nic_data->datapath_caps &
+ (1 << MC_CMD_GET_CAPABILITIES_OUT_TX_TSO_LBN))) {
+ netif_err(efx, drv, efx->net_dev,
+ "Capabilities don't indicate TSO support.\n");
+ return -ENODEV;
+ }
+ }
+
+ return 0;
+}
+
+static int efx_ef10_get_sysclk_freq(struct efx_nic *efx)
+{
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_CLOCK_OUT_LEN);
+ int rc;
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_GET_CLOCK, NULL, 0,
+ outbuf, sizeof(outbuf), NULL);
+ if (rc)
+ return rc;
+ rc = MCDI_DWORD(outbuf, GET_CLOCK_OUT_SYS_FREQ);
+ return rc > 0 ? rc : -ERANGE;
+}
+
+static int efx_ef10_get_mac_address(struct efx_nic *efx, u8 *mac_address)
+{
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_MAC_ADDRESSES_OUT_LEN);
+ size_t outlen;
+ int rc;
+
+ BUILD_BUG_ON(MC_CMD_GET_MAC_ADDRESSES_IN_LEN != 0);
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_GET_MAC_ADDRESSES, NULL, 0,
+ outbuf, sizeof(outbuf), &outlen);
+ if (rc)
+ return rc;
+ if (outlen < MC_CMD_GET_MAC_ADDRESSES_OUT_LEN)
+ return -EIO;
+
+ memcpy(mac_address,
+ MCDI_PTR(outbuf, GET_MAC_ADDRESSES_OUT_MAC_ADDR_BASE), ETH_ALEN);
+ return 0;
+}
+
+static int efx_ef10_probe(struct efx_nic *efx)
+{
+ struct efx_ef10_nic_data *nic_data;
+ int i, rc;
+
+ /* We can have one VI for each 8K region. However we need
+ * multiple TX queues per channel.
+ */
+ efx->max_channels =
+ min_t(unsigned int,
+ EFX_MAX_CHANNELS,
+ resource_size(&efx->pci_dev->resource[EFX_MEM_BAR]) /
+ (EFX_VI_PAGE_SIZE * EFX_TXQ_TYPES));
+ BUG_ON(efx->max_channels == 0);
+
+ nic_data = kzalloc(sizeof(*nic_data), GFP_KERNEL);
+ if (!nic_data)
+ return -ENOMEM;
+ efx->nic_data = nic_data;
+
+ rc = efx_nic_alloc_buffer(efx, &nic_data->mcdi_buf,
+ 8 + MCDI_CTL_SDU_LEN_MAX_V2, GFP_KERNEL);
+ if (rc)
+ goto fail1;
+
+ /* Get the MC's warm boot count. In case it's rebooting right
+ * now, be prepared to retry.
+ */
+ i = 0;
+ for (;;) {
+ rc = efx_ef10_get_warm_boot_count(efx);
+ if (rc >= 0)
+ break;
+ if (++i == 5)
+ goto fail2;
+ ssleep(1);
+ }
+ nic_data->warm_boot_count = rc;
+
+ nic_data->rx_rss_context = EFX_EF10_RSS_CONTEXT_INVALID;
+
+ /* In case we're recovering from a crash (kexec), we want to
+ * cancel any outstanding request by the previous user of this
+ * function. We send a special message using the least
+ * significant bits of the 'high' (doorbell) register.
+ */
+ _efx_writed(efx, cpu_to_le32(1), ER_DZ_MC_DB_HWRD);
+
+ rc = efx_mcdi_init(efx);
+ if (rc)
+ goto fail2;
+
+ /* Reset (most) configuration for this function */
+ rc = efx_mcdi_reset(efx, RESET_TYPE_ALL);
+ if (rc)
+ goto fail3;
+
+ /* Enable event logging */
+ rc = efx_mcdi_log_ctrl(efx, true, false, 0);
+ if (rc)
+ goto fail3;
+
+ rc = efx_ef10_init_capabilities(efx);
+ if (rc < 0)
+ goto fail3;
+
+ efx->rx_packet_len_offset =
+ ES_DZ_RX_PREFIX_PKTLEN_OFST - ES_DZ_RX_PREFIX_SIZE;
+
+ if (!(nic_data->datapath_caps &
+ (1 << MC_CMD_GET_CAPABILITIES_OUT_RX_PREFIX_LEN_14_LBN))) {
+ netif_err(efx, probe, efx->net_dev,
+ "current firmware does not support an RX prefix\n");
+ rc = -ENODEV;
+ goto fail3;
+ }
+
+ rc = efx_mcdi_port_get_number(efx);
+ if (rc < 0)
+ goto fail3;
+ efx->port_num = rc;
+
+ rc = efx_ef10_get_mac_address(efx, efx->net_dev->perm_addr);
+ if (rc)
+ goto fail3;
+
+ rc = efx_ef10_get_sysclk_freq(efx);
+ if (rc < 0)
+ goto fail3;
+ efx->timer_quantum_ns = 1536000 / rc; /* 1536 cycles */
+
+ /* Check whether firmware supports bug 35388 workaround */
+ rc = efx_mcdi_set_workaround(efx, MC_CMD_WORKAROUND_BUG35388, true);
+ if (rc == 0)
+ nic_data->workaround_35388 = true;
+ else if (rc != -ENOSYS && rc != -ENOENT)
+ goto fail3;
+ netif_dbg(efx, probe, efx->net_dev,
+ "workaround for bug 35388 is %sabled\n",
+ nic_data->workaround_35388 ? "en" : "dis");
+
+ rc = efx_mcdi_mon_probe(efx);
+ if (rc)
+ goto fail3;
+
+ efx_ptp_probe(efx);
+
+ return 0;
+
+fail3:
+ efx_mcdi_fini(efx);
+fail2:
+ efx_nic_free_buffer(efx, &nic_data->mcdi_buf);
+fail1:
+ kfree(nic_data);
+ efx->nic_data = NULL;
+ return rc;
+}
+
+static int efx_ef10_free_vis(struct efx_nic *efx)
+{
+ int rc = efx_mcdi_rpc(efx, MC_CMD_FREE_VIS, NULL, 0, NULL, 0, NULL);
+
+ /* -EALREADY means nothing to free, so ignore */
+ if (rc == -EALREADY)
+ rc = 0;
+ return rc;
+}
+
+static void efx_ef10_remove(struct efx_nic *efx)
+{
+ struct efx_ef10_nic_data *nic_data = efx->nic_data;
+ int rc;
+
+ efx_mcdi_mon_remove(efx);
+
+ /* This needs to be after efx_ptp_remove_channel() with no filters */
+ efx_ef10_rx_free_indir_table(efx);
+
+ rc = efx_ef10_free_vis(efx);
+ WARN_ON(rc != 0);
+
+ efx_mcdi_fini(efx);
+ efx_nic_free_buffer(efx, &nic_data->mcdi_buf);
+ kfree(nic_data);
+}
+
+static int efx_ef10_alloc_vis(struct efx_nic *efx,
+ unsigned int min_vis, unsigned int max_vis)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_ALLOC_VIS_IN_LEN);
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_ALLOC_VIS_OUT_LEN);
+ struct efx_ef10_nic_data *nic_data = efx->nic_data;
+ size_t outlen;
+ int rc;
+
+ MCDI_SET_DWORD(inbuf, ALLOC_VIS_IN_MIN_VI_COUNT, min_vis);
+ MCDI_SET_DWORD(inbuf, ALLOC_VIS_IN_MAX_VI_COUNT, max_vis);
+ rc = efx_mcdi_rpc(efx, MC_CMD_ALLOC_VIS, inbuf, sizeof(inbuf),
+ outbuf, sizeof(outbuf), &outlen);
+ if (rc != 0)
+ return rc;
+
+ if (outlen < MC_CMD_ALLOC_VIS_OUT_LEN)
+ return -EIO;
+
+ netif_dbg(efx, drv, efx->net_dev, "base VI is A0x%03x\n",
+ MCDI_DWORD(outbuf, ALLOC_VIS_OUT_VI_BASE));
+
+ nic_data->vi_base = MCDI_DWORD(outbuf, ALLOC_VIS_OUT_VI_BASE);
+ nic_data->n_allocated_vis = MCDI_DWORD(outbuf, ALLOC_VIS_OUT_VI_COUNT);
+ return 0;
+}
+
+static int efx_ef10_dimension_resources(struct efx_nic *efx)
+{
+ unsigned int n_vis =
+ max(efx->n_channels, efx->n_tx_channels * EFX_TXQ_TYPES);
+
+ return efx_ef10_alloc_vis(efx, n_vis, n_vis);
+}
+
+static int efx_ef10_init_nic(struct efx_nic *efx)
+{
+ struct efx_ef10_nic_data *nic_data = efx->nic_data;
+ int rc;
+
+ if (nic_data->must_realloc_vis) {
+ /* We cannot let the number of VIs change now */
+ rc = efx_ef10_alloc_vis(efx, nic_data->n_allocated_vis,
+ nic_data->n_allocated_vis);
+ if (rc)
+ return rc;
+ nic_data->must_realloc_vis = false;
+ }
+
+ efx_ef10_rx_push_indir_table(efx);
+ return 0;
+}
+
+static int efx_ef10_map_reset_flags(u32 *flags)
+{
+ enum {
+ EF10_RESET_PORT = ((ETH_RESET_MAC | ETH_RESET_PHY) <<
+ ETH_RESET_SHARED_SHIFT),
+ EF10_RESET_MC = ((ETH_RESET_DMA | ETH_RESET_FILTER |
+ ETH_RESET_OFFLOAD | ETH_RESET_MAC |
+ ETH_RESET_PHY | ETH_RESET_MGMT) <<
+ ETH_RESET_SHARED_SHIFT)
+ };
+
+ /* We assume for now that our PCI function is permitted to
+ * reset everything.
+ */
+
+ if ((*flags & EF10_RESET_MC) == EF10_RESET_MC) {
+ *flags &= ~EF10_RESET_MC;
+ return RESET_TYPE_WORLD;
+ }
+
+ if ((*flags & EF10_RESET_PORT) == EF10_RESET_PORT) {
+ *flags &= ~EF10_RESET_PORT;
+ return RESET_TYPE_ALL;
+ }
+
+ /* no invisible reset implemented */
+
+ return -EINVAL;
+}
+
+#define EF10_DMA_STAT(ext_name, mcdi_name) \
+ [EF10_STAT_ ## ext_name] = \
+ { #ext_name, 64, 8 * MC_CMD_MAC_ ## mcdi_name }
+#define EF10_DMA_INVIS_STAT(int_name, mcdi_name) \
+ [EF10_STAT_ ## int_name] = \
+ { NULL, 64, 8 * MC_CMD_MAC_ ## mcdi_name }
+#define EF10_OTHER_STAT(ext_name) \
+ [EF10_STAT_ ## ext_name] = { #ext_name, 0, 0 }
+
+static const struct efx_hw_stat_desc efx_ef10_stat_desc[EF10_STAT_COUNT] = {
+ EF10_DMA_STAT(tx_bytes, TX_BYTES),
+ EF10_DMA_STAT(tx_packets, TX_PKTS),
+ EF10_DMA_STAT(tx_pause, TX_PAUSE_PKTS),
+ EF10_DMA_STAT(tx_control, TX_CONTROL_PKTS),
+ EF10_DMA_STAT(tx_unicast, TX_UNICAST_PKTS),
+ EF10_DMA_STAT(tx_multicast, TX_MULTICAST_PKTS),
+ EF10_DMA_STAT(tx_broadcast, TX_BROADCAST_PKTS),
+ EF10_DMA_STAT(tx_lt64, TX_LT64_PKTS),
+ EF10_DMA_STAT(tx_64, TX_64_PKTS),
+ EF10_DMA_STAT(tx_65_to_127, TX_65_TO_127_PKTS),
+ EF10_DMA_STAT(tx_128_to_255, TX_128_TO_255_PKTS),
+ EF10_DMA_STAT(tx_256_to_511, TX_256_TO_511_PKTS),
+ EF10_DMA_STAT(tx_512_to_1023, TX_512_TO_1023_PKTS),
+ EF10_DMA_STAT(tx_1024_to_15xx, TX_1024_TO_15XX_PKTS),
+ EF10_DMA_STAT(tx_15xx_to_jumbo, TX_15XX_TO_JUMBO_PKTS),
+ EF10_DMA_STAT(rx_bytes, RX_BYTES),
+ EF10_DMA_INVIS_STAT(rx_bytes_minus_good_bytes, RX_BAD_BYTES),
+ EF10_OTHER_STAT(rx_good_bytes),
+ EF10_OTHER_STAT(rx_bad_bytes),
+ EF10_DMA_STAT(rx_packets, RX_PKTS),
+ EF10_DMA_STAT(rx_good, RX_GOOD_PKTS),
+ EF10_DMA_STAT(rx_bad, RX_BAD_FCS_PKTS),
+ EF10_DMA_STAT(rx_pause, RX_PAUSE_PKTS),
+ EF10_DMA_STAT(rx_control, RX_CONTROL_PKTS),
+ EF10_DMA_STAT(rx_unicast, RX_UNICAST_PKTS),
+ EF10_DMA_STAT(rx_multicast, RX_MULTICAST_PKTS),
+ EF10_DMA_STAT(rx_broadcast, RX_BROADCAST_PKTS),
+ EF10_DMA_STAT(rx_lt64, RX_UNDERSIZE_PKTS),
+ EF10_DMA_STAT(rx_64, RX_64_PKTS),
+ EF10_DMA_STAT(rx_65_to_127, RX_65_TO_127_PKTS),
+ EF10_DMA_STAT(rx_128_to_255, RX_128_TO_255_PKTS),
+ EF10_DMA_STAT(rx_256_to_511, RX_256_TO_511_PKTS),
+ EF10_DMA_STAT(rx_512_to_1023, RX_512_TO_1023_PKTS),
+ EF10_DMA_STAT(rx_1024_to_15xx, RX_1024_TO_15XX_PKTS),
+ EF10_DMA_STAT(rx_15xx_to_jumbo, RX_15XX_TO_JUMBO_PKTS),
+ EF10_DMA_STAT(rx_gtjumbo, RX_GTJUMBO_PKTS),
+ EF10_DMA_STAT(rx_bad_gtjumbo, RX_JABBER_PKTS),
+ EF10_DMA_STAT(rx_overflow, RX_OVERFLOW_PKTS),
+ EF10_DMA_STAT(rx_align_error, RX_ALIGN_ERROR_PKTS),
+ EF10_DMA_STAT(rx_length_error, RX_LENGTH_ERROR_PKTS),
+ EF10_DMA_STAT(rx_nodesc_drops, RX_NODESC_DROPS),
+};
+
+#define HUNT_COMMON_STAT_MASK ((1ULL << EF10_STAT_tx_bytes) | \
+ (1ULL << EF10_STAT_tx_packets) | \
+ (1ULL << EF10_STAT_tx_pause) | \
+ (1ULL << EF10_STAT_tx_unicast) | \
+ (1ULL << EF10_STAT_tx_multicast) | \
+ (1ULL << EF10_STAT_tx_broadcast) | \
+ (1ULL << EF10_STAT_rx_bytes) | \
+ (1ULL << EF10_STAT_rx_bytes_minus_good_bytes) | \
+ (1ULL << EF10_STAT_rx_good_bytes) | \
+ (1ULL << EF10_STAT_rx_bad_bytes) | \
+ (1ULL << EF10_STAT_rx_packets) | \
+ (1ULL << EF10_STAT_rx_good) | \
+ (1ULL << EF10_STAT_rx_bad) | \
+ (1ULL << EF10_STAT_rx_pause) | \
+ (1ULL << EF10_STAT_rx_control) | \
+ (1ULL << EF10_STAT_rx_unicast) | \
+ (1ULL << EF10_STAT_rx_multicast) | \
+ (1ULL << EF10_STAT_rx_broadcast) | \
+ (1ULL << EF10_STAT_rx_lt64) | \
+ (1ULL << EF10_STAT_rx_64) | \
+ (1ULL << EF10_STAT_rx_65_to_127) | \
+ (1ULL << EF10_STAT_rx_128_to_255) | \
+ (1ULL << EF10_STAT_rx_256_to_511) | \
+ (1ULL << EF10_STAT_rx_512_to_1023) | \
+ (1ULL << EF10_STAT_rx_1024_to_15xx) | \
+ (1ULL << EF10_STAT_rx_15xx_to_jumbo) | \
+ (1ULL << EF10_STAT_rx_gtjumbo) | \
+ (1ULL << EF10_STAT_rx_bad_gtjumbo) | \
+ (1ULL << EF10_STAT_rx_overflow) | \
+ (1ULL << EF10_STAT_rx_nodesc_drops))
+
+/* These statistics are only provided by the 10G MAC. For a 10G/40G
+ * switchable port we do not expose these because they might not
+ * include all the packets they should.
+ */
+#define HUNT_10G_ONLY_STAT_MASK ((1ULL << EF10_STAT_tx_control) | \
+ (1ULL << EF10_STAT_tx_lt64) | \
+ (1ULL << EF10_STAT_tx_64) | \
+ (1ULL << EF10_STAT_tx_65_to_127) | \
+ (1ULL << EF10_STAT_tx_128_to_255) | \
+ (1ULL << EF10_STAT_tx_256_to_511) | \
+ (1ULL << EF10_STAT_tx_512_to_1023) | \
+ (1ULL << EF10_STAT_tx_1024_to_15xx) | \
+ (1ULL << EF10_STAT_tx_15xx_to_jumbo))
+
+/* These statistics are only provided by the 40G MAC. For a 10G/40G
+ * switchable port we do expose these because the errors will otherwise
+ * be silent.
+ */
+#define HUNT_40G_EXTRA_STAT_MASK ((1ULL << EF10_STAT_rx_align_error) | \
+ (1ULL << EF10_STAT_rx_length_error))
+
+#if BITS_PER_LONG == 64
+#define STAT_MASK_BITMAP(bits) (bits)
+#else
+#define STAT_MASK_BITMAP(bits) (bits) & 0xffffffff, (bits) >> 32
+#endif
+
+static const unsigned long *efx_ef10_stat_mask(struct efx_nic *efx)
+{
+ static const unsigned long hunt_40g_stat_mask[] = {
+ STAT_MASK_BITMAP(HUNT_COMMON_STAT_MASK |
+ HUNT_40G_EXTRA_STAT_MASK)
+ };
+ static const unsigned long hunt_10g_only_stat_mask[] = {
+ STAT_MASK_BITMAP(HUNT_COMMON_STAT_MASK |
+ HUNT_10G_ONLY_STAT_MASK)
+ };
+ u32 port_caps = efx_mcdi_phy_get_caps(efx);
+
+ if (port_caps & (1 << MC_CMD_PHY_CAP_40000FDX_LBN))
+ return hunt_40g_stat_mask;
+ else
+ return hunt_10g_only_stat_mask;
+}
+
+static size_t efx_ef10_describe_stats(struct efx_nic *efx, u8 *names)
+{
+ return efx_nic_describe_stats(efx_ef10_stat_desc, EF10_STAT_COUNT,
+ efx_ef10_stat_mask(efx), names);
+}
+
+static int efx_ef10_try_update_nic_stats(struct efx_nic *efx)
+{
+ struct efx_ef10_nic_data *nic_data = efx->nic_data;
+ const unsigned long *stats_mask = efx_ef10_stat_mask(efx);
+ __le64 generation_start, generation_end;
+ u64 *stats = nic_data->stats;
+ __le64 *dma_stats;
+
+ dma_stats = efx->stats_buffer.addr;
+ nic_data = efx->nic_data;
+
+ generation_end = dma_stats[MC_CMD_MAC_GENERATION_END];
+ if (generation_end == EFX_MC_STATS_GENERATION_INVALID)
+ return 0;
+ rmb();
+ efx_nic_update_stats(efx_ef10_stat_desc, EF10_STAT_COUNT, stats_mask,
+ stats, efx->stats_buffer.addr, false);
+ generation_start = dma_stats[MC_CMD_MAC_GENERATION_START];
+ if (generation_end != generation_start)
+ return -EAGAIN;
+
+ /* Update derived statistics */
+ stats[EF10_STAT_rx_good_bytes] =
+ stats[EF10_STAT_rx_bytes] -
+ stats[EF10_STAT_rx_bytes_minus_good_bytes];
+ efx_update_diff_stat(&stats[EF10_STAT_rx_bad_bytes],
+ stats[EF10_STAT_rx_bytes_minus_good_bytes]);
+
+ return 0;
+}
+
+
+static size_t efx_ef10_update_stats(struct efx_nic *efx, u64 *full_stats,
+ struct rtnl_link_stats64 *core_stats)
+{
+ const unsigned long *mask = efx_ef10_stat_mask(efx);
+ struct efx_ef10_nic_data *nic_data = efx->nic_data;
+ u64 *stats = nic_data->stats;
+ size_t stats_count = 0, index;
+ int retry;
+
+ /* If we're unlucky enough to read statistics during the DMA, wait
+ * up to 10ms for it to finish (typically takes <500us)
+ */
+ for (retry = 0; retry < 100; ++retry) {
+ if (efx_ef10_try_update_nic_stats(efx) == 0)
+ break;
+ udelay(100);
+ }
+
+ if (full_stats) {
+ for_each_set_bit(index, mask, EF10_STAT_COUNT) {
+ if (efx_ef10_stat_desc[index].name) {
+ *full_stats++ = stats[index];
+ ++stats_count;
+ }
+ }
+ }
+
+ if (core_stats) {
+ core_stats->rx_packets = stats[EF10_STAT_rx_packets];
+ core_stats->tx_packets = stats[EF10_STAT_tx_packets];
+ core_stats->rx_bytes = stats[EF10_STAT_rx_bytes];
+ core_stats->tx_bytes = stats[EF10_STAT_tx_bytes];
+ core_stats->rx_dropped = stats[EF10_STAT_rx_nodesc_drops];
+ core_stats->multicast = stats[EF10_STAT_rx_multicast];
+ core_stats->rx_length_errors =
+ stats[EF10_STAT_rx_gtjumbo] +
+ stats[EF10_STAT_rx_length_error];
+ core_stats->rx_crc_errors = stats[EF10_STAT_rx_bad];
+ core_stats->rx_frame_errors = stats[EF10_STAT_rx_align_error];
+ core_stats->rx_fifo_errors = stats[EF10_STAT_rx_overflow];
+ core_stats->rx_errors = (core_stats->rx_length_errors +
+ core_stats->rx_crc_errors +
+ core_stats->rx_frame_errors);
+ }
+
+ return stats_count;
+}
+
+static void efx_ef10_push_irq_moderation(struct efx_channel *channel)
+{
+ struct efx_nic *efx = channel->efx;
+ unsigned int mode, value;
+ efx_dword_t timer_cmd;
+
+ if (channel->irq_moderation) {
+ mode = 3;
+ value = channel->irq_moderation - 1;
+ } else {
+ mode = 0;
+ value = 0;
+ }
+
+ if (EFX_EF10_WORKAROUND_35388(efx)) {
+ EFX_POPULATE_DWORD_3(timer_cmd, ERF_DD_EVQ_IND_TIMER_FLAGS,
+ EFE_DD_EVQ_IND_TIMER_FLAGS,
+ ERF_DD_EVQ_IND_TIMER_MODE, mode,
+ ERF_DD_EVQ_IND_TIMER_VAL, value);
+ efx_writed_page(efx, &timer_cmd, ER_DD_EVQ_INDIRECT,
+ channel->channel);
+ } else {
+ EFX_POPULATE_DWORD_2(timer_cmd, ERF_DZ_TC_TIMER_MODE, mode,
+ ERF_DZ_TC_TIMER_VAL, value);
+ efx_writed_page(efx, &timer_cmd, ER_DZ_EVQ_TMR,
+ channel->channel);
+ }
+}
+
+static void efx_ef10_get_wol(struct efx_nic *efx, struct ethtool_wolinfo *wol)
+{
+ wol->supported = 0;
+ wol->wolopts = 0;
+ memset(&wol->sopass, 0, sizeof(wol->sopass));
+}
+
+static int efx_ef10_set_wol(struct efx_nic *efx, u32 type)
+{
+ if (type != 0)
+ return -EINVAL;
+ return 0;
+}
+
+static void efx_ef10_mcdi_request(struct efx_nic *efx,
+ const efx_dword_t *hdr, size_t hdr_len,
+ const efx_dword_t *sdu, size_t sdu_len)
+{
+ struct efx_ef10_nic_data *nic_data = efx->nic_data;
+ u8 *pdu = nic_data->mcdi_buf.addr;
+
+ memcpy(pdu, hdr, hdr_len);
+ memcpy(pdu + hdr_len, sdu, sdu_len);
+ wmb();
+
+ /* The hardware provides 'low' and 'high' (doorbell) registers
+ * for passing the 64-bit address of an MCDI request to
+ * firmware. However the dwords are swapped by firmware. The
+ * least significant bits of the doorbell are then 0 for all
+ * MCDI requests due to alignment.
+ */
+ _efx_writed(efx, cpu_to_le32((u64)nic_data->mcdi_buf.dma_addr >> 32),
+ ER_DZ_MC_DB_LWRD);
+ _efx_writed(efx, cpu_to_le32((u32)nic_data->mcdi_buf.dma_addr),
+ ER_DZ_MC_DB_HWRD);
+}
+
+static bool efx_ef10_mcdi_poll_response(struct efx_nic *efx)
+{
+ struct efx_ef10_nic_data *nic_data = efx->nic_data;
+ const efx_dword_t hdr = *(const efx_dword_t *)nic_data->mcdi_buf.addr;
+
+ rmb();
+ return EFX_DWORD_FIELD(hdr, MCDI_HEADER_RESPONSE);
+}
+
+static void
+efx_ef10_mcdi_read_response(struct efx_nic *efx, efx_dword_t *outbuf,
+ size_t offset, size_t outlen)
+{
+ struct efx_ef10_nic_data *nic_data = efx->nic_data;
+ const u8 *pdu = nic_data->mcdi_buf.addr;
+
+ memcpy(outbuf, pdu + offset, outlen);
+}
+
+static int efx_ef10_mcdi_poll_reboot(struct efx_nic *efx)
+{
+ struct efx_ef10_nic_data *nic_data = efx->nic_data;
+ int rc;
+
+ rc = efx_ef10_get_warm_boot_count(efx);
+ if (rc < 0) {
+ /* The firmware is presumably in the process of
+ * rebooting. However, we are supposed to report each
+ * reboot just once, so we must only do that once we
+ * can read and store the updated warm boot count.
+ */
+ return 0;
+ }
+
+ if (rc == nic_data->warm_boot_count)
+ return 0;
+
+ nic_data->warm_boot_count = rc;
+
+ /* All our allocations have been reset */
+ nic_data->must_realloc_vis = true;
+ nic_data->must_restore_filters = true;
+ nic_data->rx_rss_context = EFX_EF10_RSS_CONTEXT_INVALID;
+
+ return -EIO;
+}
+
+/* Handle an MSI interrupt
+ *
+ * Handle an MSI hardware interrupt. This routine schedules event
+ * queue processing. No interrupt acknowledgement cycle is necessary.
+ * Also, we never need to check that the interrupt is for us, since
+ * MSI interrupts cannot be shared.
+ */
+static irqreturn_t efx_ef10_msi_interrupt(int irq, void *dev_id)
+{
+ struct efx_msi_context *context = dev_id;
+ struct efx_nic *efx = context->efx;
+
+ netif_vdbg(efx, intr, efx->net_dev,
+ "IRQ %d on CPU %d\n", irq, raw_smp_processor_id());
+
+ if (likely(ACCESS_ONCE(efx->irq_soft_enabled))) {
+ /* Note test interrupts */
+ if (context->index == efx->irq_level)
+ efx->last_irq_cpu = raw_smp_processor_id();
+
+ /* Schedule processing of the channel */
+ efx_schedule_channel_irq(efx->channel[context->index]);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t efx_ef10_legacy_interrupt(int irq, void *dev_id)
+{
+ struct efx_nic *efx = dev_id;
+ bool soft_enabled = ACCESS_ONCE(efx->irq_soft_enabled);
+ struct efx_channel *channel;
+ efx_dword_t reg;
+ u32 queues;
+
+ /* Read the ISR which also ACKs the interrupts */
+ efx_readd(efx, &reg, ER_DZ_BIU_INT_ISR);
+ queues = EFX_DWORD_FIELD(reg, ERF_DZ_ISR_REG);
+
+ if (queues == 0)
+ return IRQ_NONE;
+
+ if (likely(soft_enabled)) {
+ /* Note test interrupts */
+ if (queues & (1U << efx->irq_level))
+ efx->last_irq_cpu = raw_smp_processor_id();
+
+ efx_for_each_channel(channel, efx) {
+ if (queues & 1)
+ efx_schedule_channel_irq(channel);
+ queues >>= 1;
+ }
+ }
+
+ netif_vdbg(efx, intr, efx->net_dev,
+ "IRQ %d on CPU %d status " EFX_DWORD_FMT "\n",
+ irq, raw_smp_processor_id(), EFX_DWORD_VAL(reg));
+
+ return IRQ_HANDLED;
+}
+
+static void efx_ef10_irq_test_generate(struct efx_nic *efx)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_TRIGGER_INTERRUPT_IN_LEN);
+
+ BUILD_BUG_ON(MC_CMD_TRIGGER_INTERRUPT_OUT_LEN != 0);
+
+ MCDI_SET_DWORD(inbuf, TRIGGER_INTERRUPT_IN_INTR_LEVEL, efx->irq_level);
+ (void) efx_mcdi_rpc(efx, MC_CMD_TRIGGER_INTERRUPT,
+ inbuf, sizeof(inbuf), NULL, 0, NULL);
+}
+
+static int efx_ef10_tx_probe(struct efx_tx_queue *tx_queue)
+{
+ return efx_nic_alloc_buffer(tx_queue->efx, &tx_queue->txd.buf,
+ (tx_queue->ptr_mask + 1) *
+ sizeof(efx_qword_t),
+ GFP_KERNEL);
+}
+
+/* This writes to the TX_DESC_WPTR and also pushes data */
+static inline void efx_ef10_push_tx_desc(struct efx_tx_queue *tx_queue,
+ const efx_qword_t *txd)
+{
+ unsigned int write_ptr;
+ efx_oword_t reg;
+
+ write_ptr = tx_queue->write_count & tx_queue->ptr_mask;
+ EFX_POPULATE_OWORD_1(reg, ERF_DZ_TX_DESC_WPTR, write_ptr);
+ reg.qword[0] = *txd;
+ efx_writeo_page(tx_queue->efx, &reg,
+ ER_DZ_TX_DESC_UPD, tx_queue->queue);
+}
+
+static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_INIT_TXQ_IN_LEN(EFX_MAX_DMAQ_SIZE * 8 /
+ EFX_BUF_SIZE));
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_INIT_TXQ_OUT_LEN);
+ bool csum_offload = tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD;
+ size_t entries = tx_queue->txd.buf.len / EFX_BUF_SIZE;
+ struct efx_channel *channel = tx_queue->channel;
+ struct efx_nic *efx = tx_queue->efx;
+ size_t inlen, outlen;
+ dma_addr_t dma_addr;
+ efx_qword_t *txd;
+ int rc;
+ int i;
+
+ MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_SIZE, tx_queue->ptr_mask + 1);
+ MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_TARGET_EVQ, channel->channel);
+ MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_LABEL, tx_queue->queue);
+ MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_INSTANCE, tx_queue->queue);
+ MCDI_POPULATE_DWORD_2(inbuf, INIT_TXQ_IN_FLAGS,
+ INIT_TXQ_IN_FLAG_IP_CSUM_DIS, !csum_offload,
+ INIT_TXQ_IN_FLAG_TCP_CSUM_DIS, !csum_offload);
+ MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_OWNER_ID, 0);
+ MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_PORT_ID, EVB_PORT_ID_ASSIGNED);
+
+ dma_addr = tx_queue->txd.buf.dma_addr;
+
+ netif_dbg(efx, hw, efx->net_dev, "pushing TXQ %d. %zu entries (%llx)\n",
+ tx_queue->queue, entries, (u64)dma_addr);
+
+ for (i = 0; i < entries; ++i) {
+ MCDI_SET_ARRAY_QWORD(inbuf, INIT_TXQ_IN_DMA_ADDR, i, dma_addr);
+ dma_addr += EFX_BUF_SIZE;
+ }
+
+ inlen = MC_CMD_INIT_TXQ_IN_LEN(entries);
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_INIT_TXQ, inbuf, inlen,
+ outbuf, sizeof(outbuf), &outlen);
+ if (rc)
+ goto fail;
+
+ /* A previous user of this TX queue might have set us up the
+ * bomb by writing a descriptor to the TX push collector but
+ * not the doorbell. (Each collector belongs to a port, not a
+ * queue or function, so cannot easily be reset.) We must
+ * attempt to push a no-op descriptor in its place.
+ */
+ tx_queue->buffer[0].flags = EFX_TX_BUF_OPTION;
+ tx_queue->insert_count = 1;
+ txd = efx_tx_desc(tx_queue, 0);
+ EFX_POPULATE_QWORD_4(*txd,
+ ESF_DZ_TX_DESC_IS_OPT, true,
+ ESF_DZ_TX_OPTION_TYPE,
+ ESE_DZ_TX_OPTION_DESC_CRC_CSUM,
+ ESF_DZ_TX_OPTION_UDP_TCP_CSUM, csum_offload,
+ ESF_DZ_TX_OPTION_IP_CSUM, csum_offload);
+ tx_queue->write_count = 1;
+ wmb();
+ efx_ef10_push_tx_desc(tx_queue, txd);
+
+ return;
+
+fail:
+ WARN_ON(true);
+ netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc);
+}
+
+static void efx_ef10_tx_fini(struct efx_tx_queue *tx_queue)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_FINI_TXQ_IN_LEN);
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_FINI_TXQ_OUT_LEN);
+ struct efx_nic *efx = tx_queue->efx;
+ size_t outlen;
+ int rc;
+
+ MCDI_SET_DWORD(inbuf, FINI_TXQ_IN_INSTANCE,
+ tx_queue->queue);
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_FINI_TXQ, inbuf, sizeof(inbuf),
+ outbuf, sizeof(outbuf), &outlen);
+
+ if (rc && rc != -EALREADY)
+ goto fail;
+
+ return;
+
+fail:
+ netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc);
+}
+
+static void efx_ef10_tx_remove(struct efx_tx_queue *tx_queue)
+{
+ efx_nic_free_buffer(tx_queue->efx, &tx_queue->txd.buf);
+}
+
+/* This writes to the TX_DESC_WPTR; write pointer for TX descriptor ring */
+static inline void efx_ef10_notify_tx_desc(struct efx_tx_queue *tx_queue)
+{
+ unsigned int write_ptr;
+ efx_dword_t reg;
+