summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS7
-rw-r--r--drivers/net/ethernet/Kconfig1
-rw-r--r--drivers/net/ethernet/Makefile1
-rw-r--r--drivers/net/ethernet/cavium/Kconfig40
-rw-r--r--drivers/net/ethernet/cavium/Makefile5
-rw-r--r--drivers/net/ethernet/cavium/thunder/Makefile11
-rw-r--r--drivers/net/ethernet/cavium/thunder/nic.h414
-rw-r--r--drivers/net/ethernet/cavium/thunder/nic_main.c940
-rw-r--r--drivers/net/ethernet/cavium/thunder/nic_reg.h213
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c601
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_main.c1332
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_queues.c1544
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_queues.h381
-rw-r--r--drivers/net/ethernet/cavium/thunder/q_struct.h701
-rw-r--r--drivers/net/ethernet/cavium/thunder/thunder_bgx.c966
-rw-r--r--drivers/net/ethernet/cavium/thunder/thunder_bgx.h223
16 files changed, 7380 insertions, 0 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index df106f87a3ba..d1b1d2294b6a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -921,6 +921,13 @@ M: Krzysztof Halasa <khalasa@piap.pl>
S: Maintained
F: arch/arm/mach-cns3xxx/
+ARM/CAVIUM THUNDER NETWORK DRIVER
+M: Sunil Goutham <sgoutham@cavium.com>
+M: Robert Richter <rric@kernel.org>
+L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+S: Supported
+F: drivers/net/ethernet/cavium/
+
ARM/CIRRUS LOGIC CLPS711X ARM ARCHITECTURE
M: Alexander Shiyan <shc_work@mail.ru>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig
index eadcb053807e..9a8308553520 100644
--- a/drivers/net/ethernet/Kconfig
+++ b/drivers/net/ethernet/Kconfig
@@ -34,6 +34,7 @@ source "drivers/net/ethernet/adi/Kconfig"
source "drivers/net/ethernet/broadcom/Kconfig"
source "drivers/net/ethernet/brocade/Kconfig"
source "drivers/net/ethernet/calxeda/Kconfig"
+source "drivers/net/ethernet/cavium/Kconfig"
source "drivers/net/ethernet/chelsio/Kconfig"
source "drivers/net/ethernet/cirrus/Kconfig"
source "drivers/net/ethernet/cisco/Kconfig"
diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile
index 1367afcd0a8b..4395d99115a0 100644
--- a/drivers/net/ethernet/Makefile
+++ b/drivers/net/ethernet/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_NET_BFIN) += adi/
obj-$(CONFIG_NET_VENDOR_BROADCOM) += broadcom/
obj-$(CONFIG_NET_VENDOR_BROCADE) += brocade/
obj-$(CONFIG_NET_CALXEDA_XGMAC) += calxeda/
+obj-$(CONFIG_NET_VENDOR_CAVIUM) += cavium/
obj-$(CONFIG_NET_VENDOR_CHELSIO) += chelsio/
obj-$(CONFIG_NET_VENDOR_CIRRUS) += cirrus/
obj-$(CONFIG_NET_VENDOR_CISCO) += cisco/
diff --git a/drivers/net/ethernet/cavium/Kconfig b/drivers/net/ethernet/cavium/Kconfig
new file mode 100644
index 000000000000..6365fb4242be
--- /dev/null
+++ b/drivers/net/ethernet/cavium/Kconfig
@@ -0,0 +1,40 @@
+#
+# Cavium ethernet device configuration
+#
+
+config NET_VENDOR_CAVIUM
+ tristate "Cavium ethernet drivers"
+ depends on PCI
+ ---help---
+ Enable support for the Cavium ThunderX Network Interface
+ Controller (NIC). The NIC provides the controller and DMA
+ engines to move network traffic to/from the memory. The NIC
+ works closely with TNS, BGX and SerDes to implement the
+ functions replacing and virtualizing those of a typical
+ standalone PCIe NIC chip.
+
+ If you have a Cavium Thunder board, say Y.
+
+if NET_VENDOR_CAVIUM
+
+config THUNDER_NIC_PF
+ tristate "Thunder Physical function driver"
+ default NET_VENDOR_CAVIUM
+ select THUNDER_NIC_BGX
+ ---help---
+ This driver supports Thunder's NIC physical function.
+
+config THUNDER_NIC_VF
+ tristate "Thunder Virtual function driver"
+ default NET_VENDOR_CAVIUM
+ ---help---
+ This driver supports Thunder's NIC virtual function
+
+config THUNDER_NIC_BGX
+ tristate "Thunder MAC interface driver (BGX)"
+ default NET_VENDOR_CAVIUM
+ ---help---
+ This driver supports programming and controlling of MAC
+ interface from NIC physical function driver.
+
+endif # NET_VENDOR_CAVIUM
diff --git a/drivers/net/ethernet/cavium/Makefile b/drivers/net/ethernet/cavium/Makefile
new file mode 100644
index 000000000000..7aac4780d050
--- /dev/null
+++ b/drivers/net/ethernet/cavium/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for the Cavium ethernet device drivers.
+#
+
+obj-$(CONFIG_NET_VENDOR_CAVIUM) += thunder/
diff --git a/drivers/net/ethernet/cavium/thunder/Makefile b/drivers/net/ethernet/cavium/thunder/Makefile
new file mode 100644
index 000000000000..5c4615ccaa14
--- /dev/null
+++ b/drivers/net/ethernet/cavium/thunder/Makefile
@@ -0,0 +1,11 @@
+#
+# Makefile for Cavium's Thunder ethernet device
+#
+
+obj-$(CONFIG_THUNDER_NIC_BGX) += thunder_bgx.o
+obj-$(CONFIG_THUNDER_NIC_PF) += nicpf.o
+obj-$(CONFIG_THUNDER_NIC_VF) += nicvf.o
+
+nicpf-y := nic_main.o
+nicvf-y := nicvf_main.o nicvf_queues.o
+nicvf-y += nicvf_ethtool.o
diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h
new file mode 100644
index 000000000000..9b0be527909b
--- /dev/null
+++ b/drivers/net/ethernet/cavium/thunder/nic.h
@@ -0,0 +1,414 @@
+/*
+ * Copyright (C) 2015 Cavium, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ */
+
+#ifndef NIC_H
+#define NIC_H
+
+#include <linux/netdevice.h>
+#include <linux/interrupt.h>
+#include "thunder_bgx.h"
+
+/* PCI device IDs */
+#define PCI_DEVICE_ID_THUNDER_NIC_PF 0xA01E
+#define PCI_DEVICE_ID_THUNDER_PASS1_NIC_VF 0x0011
+#define PCI_DEVICE_ID_THUNDER_NIC_VF 0xA034
+#define PCI_DEVICE_ID_THUNDER_BGX 0xA026
+
+/* PCI BAR nos */
+#define PCI_CFG_REG_BAR_NUM 0
+#define PCI_MSIX_REG_BAR_NUM 4
+
+/* NIC SRIOV VF count */
+#define MAX_NUM_VFS_SUPPORTED 128
+#define DEFAULT_NUM_VF_ENABLED 8
+
+#define NIC_TNS_BYPASS_MODE 0
+#define NIC_TNS_MODE 1
+
+/* NIC priv flags */
+#define NIC_SRIOV_ENABLED BIT(0)
+
+/* Min/Max packet size */
+#define NIC_HW_MIN_FRS 64
+#define NIC_HW_MAX_FRS 9200 /* 9216 max packet including FCS */
+
+/* Max pkinds */
+#define NIC_MAX_PKIND 16
+
+/* Rx Channels */
+/* Receive channel configuration in TNS bypass mode
+ * Below is configuration in TNS bypass mode
+ * BGX0-LMAC0-CHAN0 - VNIC CHAN0
+ * BGX0-LMAC1-CHAN0 - VNIC CHAN16
+ * ...
+ * BGX1-LMAC0-CHAN0 - VNIC CHAN128
+ * ...
+ * BGX1-LMAC3-CHAN0 - VNIC CHAN174
+ */
+#define NIC_INTF_COUNT 2 /* Interfaces btw VNIC and TNS/BGX */
+#define NIC_CHANS_PER_INF 128
+#define NIC_MAX_CHANS (NIC_INTF_COUNT * NIC_CHANS_PER_INF)
+#define NIC_CPI_COUNT 2048 /* No of channel parse indices */
+
+/* TNS bypass mode: 1-1 mapping between VNIC and BGX:LMAC */
+#define NIC_MAX_BGX MAX_BGX_PER_CN88XX
+#define NIC_CPI_PER_BGX (NIC_CPI_COUNT / NIC_MAX_BGX)
+#define NIC_MAX_CPI_PER_LMAC 64 /* Max when CPI_ALG is IP diffserv */
+#define NIC_RSSI_PER_BGX (NIC_RSSI_COUNT / NIC_MAX_BGX)
+
+/* Tx scheduling */
+#define NIC_MAX_TL4 1024
+#define NIC_MAX_TL4_SHAPERS 256 /* 1 shaper for 4 TL4s */
+#define NIC_MAX_TL3 256
+#define NIC_MAX_TL3_SHAPERS 64 /* 1 shaper for 4 TL3s */
+#define NIC_MAX_TL2 64
+#define NIC_MAX_TL2_SHAPERS 2 /* 1 shaper for 32 TL2s */
+#define NIC_MAX_TL1 2
+
+/* TNS bypass mode */
+#define NIC_TL2_PER_BGX 32
+#define NIC_TL4_PER_BGX (NIC_MAX_TL4 / NIC_MAX_BGX)
+#define NIC_TL4_PER_LMAC (NIC_MAX_TL4 / NIC_CHANS_PER_INF)
+
+/* NIC VF Interrupts */
+#define NICVF_INTR_CQ 0
+#define NICVF_INTR_SQ 1
+#define NICVF_INTR_RBDR 2
+#define NICVF_INTR_PKT_DROP 3
+#define NICVF_INTR_TCP_TIMER 4
+#define NICVF_INTR_MBOX 5
+#define NICVF_INTR_QS_ERR 6
+
+#define NICVF_INTR_CQ_SHIFT 0
+#define NICVF_INTR_SQ_SHIFT 8
+#define NICVF_INTR_RBDR_SHIFT 16
+#define NICVF_INTR_PKT_DROP_SHIFT 20
+#define NICVF_INTR_TCP_TIMER_SHIFT 21
+#define NICVF_INTR_MBOX_SHIFT 22
+#define NICVF_INTR_QS_ERR_SHIFT 23
+
+#define NICVF_INTR_CQ_MASK (0xFF << NICVF_INTR_CQ_SHIFT)
+#define NICVF_INTR_SQ_MASK (0xFF << NICVF_INTR_SQ_SHIFT)
+#define NICVF_INTR_RBDR_MASK (0x03 << NICVF_INTR_RBDR_SHIFT)
+#define NICVF_INTR_PKT_DROP_MASK BIT(NICVF_INTR_PKT_DROP_SHIFT)
+#define NICVF_INTR_TCP_TIMER_MASK BIT(NICVF_INTR_TCP_TIMER_SHIFT)
+#define NICVF_INTR_MBOX_MASK BIT(NICVF_INTR_MBOX_SHIFT)
+#define NICVF_INTR_QS_ERR_MASK BIT(NICVF_INTR_QS_ERR_SHIFT)
+
+/* MSI-X interrupts */
+#define NIC_PF_MSIX_VECTORS 10
+#define NIC_VF_MSIX_VECTORS 20
+
+#define NIC_PF_INTR_ID_ECC0_SBE 0
+#define NIC_PF_INTR_ID_ECC0_DBE 1
+#define NIC_PF_INTR_ID_ECC1_SBE 2
+#define NIC_PF_INTR_ID_ECC1_DBE 3
+#define NIC_PF_INTR_ID_ECC2_SBE 4
+#define NIC_PF_INTR_ID_ECC2_DBE 5
+#define NIC_PF_INTR_ID_ECC3_SBE 6
+#define NIC_PF_INTR_ID_ECC3_DBE 7
+#define NIC_PF_INTR_ID_MBOX0 8
+#define NIC_PF_INTR_ID_MBOX1 9
+
+/* Global timer for CQ timer thresh interrupts
+ * Calculated for SCLK of 700Mhz
+ * value written should be a 1/16th of what is expected
+ *
+ * 1 tick per 0.05usec = value of 2.2
+ * This 10% would be covered in CQ timer thresh value
+ */
+#define NICPF_CLK_PER_INT_TICK 2
+
+struct nicvf_cq_poll {
+ u8 cq_idx; /* Completion queue index */
+ struct napi_struct napi;
+};
+
+#define NIC_RSSI_COUNT 4096 /* Total no of RSS indices */
+#define NIC_MAX_RSS_HASH_BITS 8
+#define NIC_MAX_RSS_IDR_TBL_SIZE (1 << NIC_MAX_RSS_HASH_BITS)
+#define RSS_HASH_KEY_SIZE 5 /* 320 bit key */
+
+struct nicvf_rss_info {
+ bool enable;
+#define RSS_L2_EXTENDED_HASH_ENA BIT(0)
+#define RSS_IP_HASH_ENA BIT(1)
+#define RSS_TCP_HASH_ENA BIT(2)
+#define RSS_TCP_SYN_DIS BIT(3)
+#define RSS_UDP_HASH_ENA BIT(4)
+#define RSS_L4_EXTENDED_HASH_ENA BIT(5)
+#define RSS_ROCE_ENA BIT(6)
+#define RSS_L3_BI_DIRECTION_ENA BIT(7)
+#define RSS_L4_BI_DIRECTION_ENA BIT(8)
+ u64 cfg;
+ u8 hash_bits;
+ u16 rss_size;
+ u8 ind_tbl[NIC_MAX_RSS_IDR_TBL_SIZE];
+ u64 key[RSS_HASH_KEY_SIZE];
+} ____cacheline_aligned_in_smp;
+
+enum rx_stats_reg_offset {
+ RX_OCTS = 0x0,
+ RX_UCAST = 0x1,
+ RX_BCAST = 0x2,
+ RX_MCAST = 0x3,
+ RX_RED = 0x4,
+ RX_RED_OCTS = 0x5,
+ RX_ORUN = 0x6,
+ RX_ORUN_OCTS = 0x7,
+ RX_FCS = 0x8,
+ RX_L2ERR = 0x9,
+ RX_DRP_BCAST = 0xa,
+ RX_DRP_MCAST = 0xb,
+ RX_DRP_L3BCAST = 0xc,
+ RX_DRP_L3MCAST = 0xd,
+ RX_STATS_ENUM_LAST,
+};
+
+enum tx_stats_reg_offset {
+ TX_OCTS = 0x0,
+ TX_UCAST = 0x1,
+ TX_BCAST = 0x2,
+ TX_MCAST = 0x3,
+ TX_DROP = 0x4,
+ TX_STATS_ENUM_LAST,
+};
+
+struct nicvf_hw_stats {
+ u64 rx_bytes_ok;
+ u64 rx_ucast_frames_ok;
+ u64 rx_bcast_frames_ok;
+ u64 rx_mcast_frames_ok;
+ u64 rx_fcs_errors;
+ u64 rx_l2_errors;
+ u64 rx_drop_red;
+ u64 rx_drop_red_bytes;
+ u64 rx_drop_overrun;
+ u64 rx_drop_overrun_bytes;
+ u64 rx_drop_bcast;
+ u64 rx_drop_mcast;
+ u64 rx_drop_l3_bcast;
+ u64 rx_drop_l3_mcast;
+ u64 tx_bytes_ok;
+ u64 tx_ucast_frames_ok;
+ u64 tx_bcast_frames_ok;
+ u64 tx_mcast_frames_ok;
+ u64 tx_drops;
+};
+
+struct nicvf_drv_stats {
+ /* Rx */
+ u64 rx_frames_ok;
+ u64 rx_frames_64;
+ u64 rx_frames_127;
+ u64 rx_frames_255;
+ u64 rx_frames_511;
+ u64 rx_frames_1023;
+ u64 rx_frames_1518;
+ u64 rx_frames_jumbo;
+ u64 rx_drops;
+ /* Tx */
+ u64 tx_frames_ok;
+ u64 tx_drops;
+ u64 tx_busy;
+ u64 tx_tso;
+};
+
+struct nicvf {
+ struct net_device *netdev;
+ struct pci_dev *pdev;
+ u8 vf_id;
+ u8 node;
+ u8 tns_mode;
+ u16 mtu;
+ struct queue_set *qs;
+ void __iomem *reg_base;
+ bool link_up;
+ u8 duplex;
+ u32 speed;
+ struct page *rb_page;
+ u32 rb_page_offset;
+ bool rb_alloc_fail;
+ bool rb_work_scheduled;
+ struct delayed_work rbdr_work;
+ struct tasklet_struct rbdr_task;
+ struct tasklet_struct qs_err_task;
+ struct tasklet_struct cq_task;
+ struct nicvf_cq_poll *napi[8];
+ struct nicvf_rss_info rss_info;
+ u8 cpi_alg;
+ /* Interrupt coalescing settings */
+ u32 cq_coalesce_usecs;
+
+ u32 msg_enable;
+ struct nicvf_hw_stats stats;
+ struct nicvf_drv_stats drv_stats;
+ struct bgx_stats bgx_stats;
+ struct work_struct reset_task;
+
+ /* MSI-X */
+ bool msix_enabled;
+ u8 num_vec;
+ struct msix_entry msix_entries[NIC_VF_MSIX_VECTORS];
+ char irq_name[NIC_VF_MSIX_VECTORS][20];
+ bool irq_allocated[NIC_VF_MSIX_VECTORS];
+
+ bool pf_ready_to_rcv_msg;
+ bool pf_acked;
+ bool pf_nacked;
+ bool bgx_stats_acked;
+} ____cacheline_aligned_in_smp;
+
+/* PF <--> VF Mailbox communication
+ * Eight 64bit registers are shared between PF and VF.
+ * Separate set for each VF.
+ * Writing '1' into last register mbx7 means end of message.
+ */
+
+/* PF <--> VF mailbox communication */
+#define NIC_PF_VF_MAILBOX_SIZE 2
+#define NIC_MBOX_MSG_TIMEOUT 2000 /* ms */
+
+/* Mailbox message types */
+#define NIC_MBOX_MSG_READY 0x01 /* Is PF ready to rcv msgs */
+#define NIC_MBOX_MSG_ACK 0x02 /* ACK the message received */
+#define NIC_MBOX_MSG_NACK 0x03 /* NACK the message received */
+#define NIC_MBOX_MSG_QS_CFG 0x04 /* Configure Qset */
+#define NIC_MBOX_MSG_RQ_CFG 0x05 /* Configure receive queue */
+#define NIC_MBOX_MSG_SQ_CFG 0x06 /* Configure Send queue */
+#define NIC_MBOX_MSG_RQ_DROP_CFG 0x07 /* Configure receive queue */
+#define NIC_MBOX_MSG_SET_MAC 0x08 /* Add MAC ID to DMAC filter */
+#define NIC_MBOX_MSG_SET_MAX_FRS 0x09 /* Set max frame size */
+#define NIC_MBOX_MSG_CPI_CFG 0x0A /* Config CPI, RSSI */
+#define NIC_MBOX_MSG_RSS_SIZE 0x0B /* Get RSS indir_tbl size */
+#define NIC_MBOX_MSG_RSS_CFG 0x0C /* Config RSS table */
+#define NIC_MBOX_MSG_RSS_CFG_CONT 0x0D /* RSS config continuation */
+#define NIC_MBOX_MSG_RQ_BP_CFG 0x0E /* RQ backpressure config */
+#define NIC_MBOX_MSG_RQ_SW_SYNC 0x0F /* Flush inflight pkts to RQ */
+#define NIC_MBOX_MSG_BGX_STATS 0x10 /* Get stats from BGX */
+#define NIC_MBOX_MSG_BGX_LINK_CHANGE 0x11 /* BGX:LMAC link status */
+#define NIC_MBOX_MSG_CFG_DONE 0x12 /* VF configuration done */
+#define NIC_MBOX_MSG_SHUTDOWN 0x13 /* VF is being shutdown */
+
+struct nic_cfg_msg {
+ u8 msg;
+ u8 vf_id;
+ u8 tns_mode;
+ u8 node_id;
+ u64 mac_addr;
+};
+
+/* Qset configuration */
+struct qs_cfg_msg {
+ u8 msg;
+ u8 num;
+ u64 cfg;
+};
+
+/* Receive queue configuration */
+struct rq_cfg_msg {
+ u8 msg;
+ u8 qs_num;
+ u8 rq_num;
+ u64 cfg;
+};
+
+/* Send queue configuration */
+struct sq_cfg_msg {
+ u8 msg;
+ u8 qs_num;
+ u8 sq_num;
+ u64 cfg;
+};
+
+/* Set VF's MAC address */
+struct set_mac_msg {
+ u8 msg;
+ u8 vf_id;
+ u64 addr;
+};
+
+/* Set Maximum frame size */
+struct set_frs_msg {
+ u8 msg;
+ u8 vf_id;
+ u16 max_frs;
+};
+
+/* Set CPI algorithm type */
+struct cpi_cfg_msg {
+ u8 msg;
+ u8 vf_id;
+ u8 rq_cnt;
+ u8 cpi_alg;
+};
+
+/* Get RSS table size */
+struct rss_sz_msg {
+ u8 msg;
+ u8 vf_id;
+ u16 ind_tbl_size;
+};
+
+/* Set RSS configuration */
+struct rss_cfg_msg {
+ u8 msg;
+ u8 vf_id;
+ u8 hash_bits;
+ u8 tbl_len;
+ u8 tbl_offset;
+#define RSS_IND_TBL_LEN_PER_MBX_MSG 8
+ u8 ind_tbl[RSS_IND_TBL_LEN_PER_MBX_MSG];
+};
+
+struct bgx_stats_msg {
+ u8 msg;
+ u8 vf_id;
+ u8 rx;
+ u8 idx;
+ u64 stats;
+};
+
+/* Physical interface link status */
+struct bgx_link_status {
+ u8 msg;
+ u8 link_up;
+ u8 duplex;
+ u32 speed;
+};
+
+/* 128 bit shared memory between PF and each VF */
+union nic_mbx {
+ struct { u8 msg; } msg;
+ struct nic_cfg_msg nic_cfg;
+ struct qs_cfg_msg qs;
+ struct rq_cfg_msg rq;
+ struct sq_cfg_msg sq;
+ struct set_mac_msg mac;
+ struct set_frs_msg frs;
+ struct cpi_cfg_msg cpi_cfg;
+ struct rss_sz_msg rss_size;
+ struct rss_cfg_msg rss_cfg;
+ struct bgx_stats_msg bgx_stats;
+ struct bgx_link_status link_status;
+};
+
+int nicvf_set_real_num_queues(struct net_device *netdev,
+ int tx_queues, int rx_queues);
+int nicvf_open(struct net_device *netdev);
+int nicvf_stop(struct net_device *netdev);
+int nicvf_send_msg_to_pf(struct nicvf *vf, union nic_mbx *mbx);
+void nicvf_config_cpi(struct nicvf *nic);
+void nicvf_config_rss(struct nicvf *nic);
+void nicvf_set_rss_key(struct nicvf *nic);
+void nicvf_free_skb(struct nicvf *nic, struct sk_buff *skb);
+void nicvf_set_ethtool_ops(struct net_device *netdev);
+void nicvf_update_stats(struct nicvf *nic);
+void nicvf_update_lmac_stats(struct nicvf *nic);
+
+#endif /* NIC_H */
diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c
new file mode 100644
index 000000000000..0f1f58b54bf1
--- /dev/null
+++ b/drivers/net/ethernet/cavium/thunder/nic_main.c
@@ -0,0 +1,940 @@
+/*
+ * Copyright (C) 2015 Cavium, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/etherdevice.h>
+#include <linux/of.h>
+
+#include "nic_reg.h"
+#include "nic.h"
+#include "q_struct.h"
+#include "thunder_bgx.h"
+
+#define DRV_NAME "thunder-nic"
+#define DRV_VERSION "1.0"
+
+struct nicpf {
+ struct pci_dev *pdev;
+ u8 rev_id;
+#define NIC_NODE_ID_MASK 0x300000000000
+#define NIC_NODE_ID(x) ((x & NODE_ID_MASK) >> 44)
+ u8 node;
+ unsigned int flags;
+ u8 num_vf_en; /* No of VF enabled */
+ bool vf_enabled[MAX_NUM_VFS_SUPPORTED];
+ void __iomem *reg_base; /* Register start address */
+ struct pkind_cfg pkind;
+#define NIC_SET_VF_LMAC_MAP(bgx, lmac) (((bgx & 0xF) << 4) | (lmac & 0xF))
+#define NIC_GET_BGX_FROM_VF_LMAC_MAP(map) ((map >> 4) & 0xF)
+#define NIC_GET_LMAC_FROM_VF_LMAC_MAP(map) (map & 0xF)
+ u8 vf_lmac_map[MAX_LMAC];
+ struct delayed_work dwork;
+ struct workqueue_struct *check_link;
+ u8 link[MAX_LMAC];
+ u8 duplex[MAX_LMAC];
+ u32 speed[MAX_LMAC];
+ u16 cpi_base[MAX_NUM_VFS_SUPPORTED];
+ u16 rss_ind_tbl_size;
+ bool mbx_lock[MAX_NUM_VFS_SUPPORTED];
+
+ /* MSI-X */
+ bool msix_enabled;
+ u8 num_vec;
+ struct msix_entry msix_entries[NIC_PF_MSIX_VECTORS];
+ bool irq_allocated[NIC_PF_MSIX_VECTORS];
+};
+
+/* Supported devices */
+static const struct pci_device_id nic_id_table[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_NIC_PF) },
+ { 0, } /* end of table */
+};
+
+MODULE_AUTHOR("Sunil Goutham");
+MODULE_DESCRIPTION("Cavium Thunder NIC Physical Function Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_VERSION(DRV_VERSION);
+MODULE_DEVICE_TABLE(pci, nic_id_table);
+
+/* The Cavium ThunderX network controller can *only* be found in SoCs
+ * containing the ThunderX ARM64 CPU implementation. All accesses to the device
+ * registers on this platform are implicitly strongly ordered with respect
+ * to memory accesses. So writeq_relaxed() and readq_relaxed() are safe to use
+ * with no memory barriers in this driver. The readq()/writeq() functions add
+ * explicit ordering operation which in this case are redundant, and only
+ * add overhead.
+ */
+
+/* Register read/write APIs */
+static void nic_reg_write(struct nicpf *nic, u64 offset, u64 val)
+{
+ writeq_relaxed(val, nic->reg_base + offset);
+}
+
+static u64 nic_reg_read(struct nicpf *nic, u64 offset)
+{
+ return readq_relaxed(nic->reg_base + offset);
+}
+
+/* PF -> VF mailbox communication APIs */
+static void nic_enable_mbx_intr(struct nicpf *nic)
+{
+ /* Enable mailbox interrupt for all 128 VFs */
+ nic_reg_write(nic, NIC_PF_MAILBOX_ENA_W1S, ~0ull);
+ nic_reg_write(nic, NIC_PF_MAILBOX_ENA_W1S + sizeof(u64), ~0ull);
+}
+
+static void nic_clear_mbx_intr(struct nicpf *nic, int vf, int mbx_reg)
+{
+ nic_reg_write(nic, NIC_PF_MAILBOX_INT + (mbx_reg << 3), BIT_ULL(vf));
+}
+
+static u64 nic_get_mbx_addr(int vf)
+{
+ return NIC_PF_VF_0_127_MAILBOX_0_1 + (vf << NIC_VF_NUM_SHIFT);
+}
+
+/* Send a mailbox message to VF
+ * @vf: vf to which this message to be sent
+ * @mbx: Message to be sent
+ */
+static void nic_send_msg_to_vf(struct nicpf *nic, int vf, union nic_mbx *mbx)
+{
+ void __iomem *mbx_addr = nic->reg_base + nic_get_mbx_addr(vf);
+ u64 *msg = (u64 *)mbx;
+
+ /* In first revision HW, mbox interrupt is triggerred
+ * when PF writes to MBOX(1), in next revisions when
+ * PF writes to MBOX(0)
+ */
+ if (nic->rev_id == 0) {
+ /* see the comment for nic_reg_write()/nic_reg_read()
+ * functions above
+ */
+ writeq_relaxed(msg[0], mbx_addr);
+ writeq_relaxed(msg[1], mbx_addr + 8);
+ } else {
+ writeq_relaxed(msg[1], mbx_addr + 8);
+ writeq_relaxed(msg[0], mbx_addr);
+ }
+}
+
+/* Responds to VF's READY message with VF's
+ * ID, node, MAC address e.t.c
+ * @vf: VF which sent READY message
+ */
+static void nic_mbx_send_ready(struct nicpf *nic, int vf)
+{
+ union nic_mbx mbx = {};
+ int bgx_idx, lmac;
+ const char *mac;
+
+ mbx.nic_cfg.msg = NIC_MBOX_MSG_READY;
+ mbx.nic_cfg.vf_id = vf;
+
+ mbx.nic_cfg.tns_mode = NIC_TNS_BYPASS_MODE;
+
+ bgx_idx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+ lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+
+ mac = bgx_get_lmac_mac(nic->node, bgx_idx, lmac);
+ if (mac)
+ ether_addr_copy((u8 *)&mbx.nic_cfg.mac_addr, mac);
+
+ mbx.nic_cfg.node_id = nic->node;
+ nic_send_msg_to_vf(nic, vf, &mbx);
+}
+
+/* ACKs VF's mailbox message
+ * @vf: VF to which ACK to be sent
+ */
+static void nic_mbx_send_ack(struct nicpf *nic, int vf)
+{
+ union nic_mbx mbx = {};
+
+ mbx.msg.msg = NIC_MBOX_MSG_ACK;
+ nic_send_msg_to_vf(nic, vf, &mbx);
+}
+
+/* NACKs VF's mailbox message that PF is not able to
+ * complete the action
+ * @vf: VF to which ACK to be sent
+ */
+static void nic_mbx_send_nack(struct nicpf *nic, int vf)
+{
+ union nic_mbx mbx = {};
+
+ mbx.msg.msg = NIC_MBOX_MSG_NACK;
+ nic_send_msg_to_vf(nic, vf, &mbx);
+}
+
+/* Flush all in flight receive packets to memory and
+ * bring down an active RQ
+ */
+static int nic_rcv_queue_sw_sync(struct nicpf *nic)
+{
+ u16 timeout = ~0x00;
+
+ nic_reg_write(nic, NIC_PF_SW_SYNC_RX, 0x01);
+ /* Wait till sync cycle is finished */
+ while (timeout) {
+ if (nic_reg_read(nic, NIC_PF_SW_SYNC_RX_DONE) & 0x1)
+ break;
+ timeout--;
+ }
+ nic_reg_write(nic, NIC_PF_SW_SYNC_RX, 0x00);
+ if (!timeout) {
+ dev_err(&nic->pdev->dev, "Receive queue software sync failed");
+ return 1;
+ }
+ return 0;
+}
+
+/* Get BGX Rx/Tx stats and respond to VF's request */
+static void nic_get_bgx_stats(struct nicpf *nic, struct bgx_stats_msg *bgx)
+{
+ int bgx_idx, lmac;
+ union nic_mbx mbx = {};
+
+ bgx_idx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[bgx->vf_id]);
+ lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[bgx->vf_id]);
+
+ mbx.bgx_stats.msg = NIC_MBOX_MSG_BGX_STATS;
+ mbx.bgx_stats.vf_id = bgx->vf_id;
+ mbx.bgx_stats.rx = bgx->rx;
+ mbx.bgx_stats.idx = bgx->idx;
+ if (bgx->rx)
+ mbx.bgx_stats.stats = bgx_get_rx_stats(nic->node, bgx_idx,
+ lmac, bgx->idx);
+ else
+ mbx.bgx_stats.stats = bgx_get_tx_stats(nic->node, bgx_idx,
+ lmac, bgx->idx);
+ nic_send_msg_to_vf(nic, bgx->vf_id, &mbx);
+}
+
+/* Update hardware min/max frame size */
+static int nic_update_hw_frs(struct nicpf *nic, int new_frs, int vf)
+{
+ if ((new_frs > NIC_HW_MAX_FRS) || (new_frs < NIC_HW_MIN_FRS)) {
+ dev_err(&nic->pdev->dev,
+ "Invalid MTU setting from VF%d rejected, should be between %d and %d\n",
+ vf, NIC_HW_MIN_FRS, NIC_HW_MAX_FRS);
+ return 1;
+ }
+ new_frs += ETH_HLEN;
+ if (new_frs <= nic->pkind.maxlen)
+ return 0;
+
+ nic->pkind.maxlen = new_frs;
+ nic_reg_write(nic, NIC_PF_PKIND_0_15_CFG, *(u64 *)&nic->pkind);
+ return 0;
+}
+
+/* Set minimum transmit packet size */
+static void nic_set_tx_pkt_pad(struct nicpf *nic, int size)
+{
+ int lmac;
+ u64 lmac_cfg;
+
+ /* Max value that can be set is 60 */
+ if (size > 60)
+ size = 60;
+
+ for (lmac = 0; lmac < (MAX_BGX_PER_CN88XX * MAX_LMAC_PER_BGX); lmac++) {
+ lmac_cfg = nic_reg_read(nic, NIC_PF_LMAC_0_7_CFG | (lmac << 3));
+ lmac_cfg &= ~(0xF << 2);
+ lmac_cfg |= ((size / 4) << 2);
+ nic_reg_write(nic, NIC_PF_LMAC_0_7_CFG | (lmac << 3), lmac_cfg);
+ }
+}
+
+/* Function to check number of LMACs present and set VF::LMAC mapping.
+ * Mapping will be used while initializing channels.
+ */
+static void nic_set_lmac_vf_mapping(struct nicpf *nic)
+{
+ unsigned bgx_map = bgx_get_map(nic->node);
+ int bgx, next_bgx_lmac = 0;
+ int lmac, lmac_cnt = 0;
+ u64 lmac_credit;
+
+ nic->num_vf_en = 0;
+
+ for (bgx = 0; bgx < NIC_MAX_BGX; bgx++) {
+ if (!(bgx_map & (1 << bgx)))
+ continue;
+ lmac_cnt = bgx_get_lmac_count(nic->node, bgx);
+ for (lmac = 0; lmac < lmac_cnt; lmac++)
+ nic->vf_lmac_map[next_bgx_lmac++] =
+ NIC_SET_VF_LMAC_MAP(bgx, lmac);
+ nic->num_vf_en += lmac_cnt;
+
+ /* Program LMAC credits */
+ lmac_credit = (1ull << 1); /* channel credit enable */
+ lmac_credit |= (0x1ff << 2); /* Max outstanding pkt count */
+ /* 48KB BGX Tx buffer size, each unit is of size 16bytes */
+ lmac_credit |= (((((48 * 1024) / lmac_cnt) -
+ NIC_HW_MAX_FRS) / 16) << 12);
+ lmac = bgx * MAX_LMAC_PER_BGX;
+ for (; lmac < lmac_cnt + (bgx * MAX_LMAC_PER_BGX); lmac++)
+ nic_reg_write(nic,
+ NIC_PF_LMAC_0_7_CREDIT + (lmac * 8),
+ lmac_credit);
+ }
+}
+
+#define BGX0_BLOCK 8
+#define BGX1_BLOCK 9
+
+static void nic_init_hw(struct nicpf *nic)
+{
+ int i;
+
+ /* Reset NIC, in case the driver is repeatedly inserted and removed */
+ nic_reg_write(nic, NIC_PF_SOFT_RESET, 1);
+
+ /* Enable NIC HW block */
+ nic_reg_write(nic, NIC_PF_CFG, 0x3);
+
+ /* Enable backpressure */
+ nic_reg_write(nic, NIC_PF_BP_CFG, (1ULL << 6) | 0x03);
+
+ /* Disable TNS mode on both interfaces */
+ nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG,
+ (NIC_TNS_BYPASS_MODE << 7) | BGX0_BLOCK);
+ nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG | (1 << 8),
+ (NIC_TNS_BYPASS_MODE << 7) | BGX1_BLOCK);
+ nic_reg_write(nic, NIC_PF_INTF_0_1_BP_CFG,
+ (1ULL << 63) | BGX0_BLOCK);
+ nic_reg_write(nic, NIC_PF_INTF_0_1_BP_CFG + (1 << 8),
+ (1ULL << 63) | BGX1_BLOCK);
+
+ /* PKIND configuration */
+ nic->pkind.minlen = 0;
+ nic->pkind.maxlen = NIC_HW_MAX_FRS + ETH_HLEN;
+ nic->pkind.lenerr_en = 1;
+ nic->pkind.rx_hdr = 0;
+ nic->pkind.hdr_sl = 0;
+
+ for (i = 0; i < NIC_MAX_PKIND; i++)
+ nic_reg_write(nic, NIC_PF_PKIND_0_15_CFG | (i << 3),
+ *(u64 *)&nic->pkind);
+
+ nic_set_tx_pkt_pad(nic, NIC_HW_MIN_FRS);
+
+ /* Timer config */
+ nic_reg_write(nic, NIC_PF_INTR_TIMER_CFG, NICPF_CLK_PER_INT_TICK);
+}
+
+/* Channel parse index configuration */
+static void nic_config_cpi(struct nicpf *nic, struct cpi_cfg_msg *cfg)
+{
+ u32 vnic, bgx, lmac, chan;
+ u32 padd, cpi_count = 0;
+ u64 cpi_base, cpi, rssi_base, rssi;
+ u8 qset, rq_idx = 0;
+
+ vnic = cfg->vf_id;
+ bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vnic]);
+ lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vnic]);
+
+ chan = (lmac * MAX_BGX_CHANS_PER_LMAC) + (bgx * NIC_CHANS_PER_INF);
+ cpi_base = (lmac * NIC_MAX_CPI_PER_LMAC) + (bgx * NIC_CPI_PER_BGX);
+ rssi_base = (lmac * nic->rss_ind_tbl_size) + (bgx * NIC_RSSI_PER_BGX);
+
+ /* Rx channel configuration */
+ nic_reg_write(nic, NIC_PF_CHAN_0_255_RX_BP_CFG | (chan << 3),
+ (1ull << 63) | (vnic << 0));
+ nic_reg_write(nic, NIC_PF_CHAN_0_255_RX_CFG | (chan << 3),
+ ((u64)cfg->cpi_alg << 62) | (cpi_base << 48));
+
+ if (cfg->cpi_alg == CPI_ALG_NONE)
+ cpi_count = 1;
+ else if (cfg->cpi_alg == CPI_ALG_VLAN) /* 3 bits of PCP */
+ cpi_count = 8;
+ else if (cfg->cpi_alg == CPI_ALG_VLAN16) /* 3 bits PCP + DEI */
+ cpi_count = 16;
+ else if (cfg->cpi_alg == CPI_ALG_DIFF) /* 6bits DSCP */
+ cpi_count = NIC_MAX_CPI_PER_LMAC;
+
+ /* RSS Qset, Qidx mapping */
+ qset = cfg->vf_id;
+ rssi = rssi_base;
+ for (; rssi < (rssi_base + cfg->rq_cnt); rssi++) {
+ nic_reg_write(nic, NIC_PF_RSSI_0_4097_RQ | (rssi << 3),
+ (qset << 3) | rq_idx);
+ rq_idx++;
+ }
+
+ rssi = 0;
+ cpi = cpi_base;
+ for (; cpi < (cpi_base + cpi_count); cpi++) {
+ /* Determine port to channel adder */
+ if (cfg->cpi_alg != CPI_ALG_DIFF)
+ padd = cpi % cpi_count;
+ else
+ padd = cpi % 8; /* 3 bits CS out of 6bits DSCP */
+
+ /* Leave RSS_SIZE as '0' to disable RSS */
+ nic_reg_write(nic, NIC_PF_CPI_0_2047_CFG | (cpi << 3),
+ (vnic << 24) | (padd << 16) | (rssi_base + rssi));
+
+ if ((rssi + 1) >= cfg->rq_cnt)
+ continue;
+
+ if (cfg->cpi_alg == CPI_ALG_VLAN)
+ rssi++;
+ else if (cfg->cpi_alg == CPI_ALG_VLAN16)
+ rssi = ((cpi - cpi_base) & 0xe) >> 1;
+ else if (cfg->cpi_alg == CPI_ALG_DIFF)
+ rssi = ((cpi - cpi_base) & 0x38) >> 3;
+ }
+ nic->cpi_base[cfg->vf_id] = cpi_base;
+}
+
+/* Responsds to VF with its RSS indirection table size */
+static void nic_send_rss_size(struct nicpf *nic, int vf)
+{
+ union nic_mbx mbx = {};
+ u64 *msg;
+
+ msg = (u64 *)&mbx;
+
+ mbx.rss_size.msg = NIC_MBOX_MSG_RSS_SIZE;
+ mbx.rss_size.ind_tbl_size = nic->rss_ind_tbl_size;
+ nic_send_msg_to_vf(nic, vf, &mbx);
+}
+
+/* Receive side scaling configuration
+ * configure:
+ * - RSS index
+ * - indir table i.e hash::RQ mapping
+ * - no of hash bits to consider
+ */
+static void nic_config_rss(struct nicpf *nic, struct rss_cfg_msg *cfg)
+{
+ u8 qset, idx = 0;
+ u64 cpi_cfg, cpi_base, rssi_base, rssi;
+
+ cpi_base = nic->cpi_base[cfg->vf_id];
+ cpi_cfg = nic_reg_read(nic, NIC_PF_CPI_0_2047_CFG | (cpi_base << 3));
+ rssi_base = (cpi_cfg & 0x0FFF) + cfg->tbl_offset;
+
+ rssi = rssi_base;
+ qset = cfg->vf_id;
+
+ for (; rssi < (rssi_base + cfg->tbl_len); rssi++) {
+ nic_reg_write(nic, NIC_PF_RSSI_0_4097_RQ | (rssi << 3),
+ (qset << 3) | (cfg->ind_tbl[idx] & 0x7));
+ idx++;
+ }
+
+ cpi_cfg &= ~(0xFULL << 20);
+ cpi_cfg |= (cfg->hash_bits << 20);
+ nic_reg_write(nic, NIC_PF_CPI_0_2047_CFG | (cpi_base << 3), cpi_cfg);
+}
+
+/* 4 level transmit side scheduler configutation
+ * for TNS bypass mode
+ *
+ * Sample configuration for SQ0
+ * VNIC0-SQ0 -> TL4(0) -> TL3[0] -> TL2[0] -> TL1[0] -> BGX0
+ * VNIC1-SQ0 -> TL4(8) -> TL3[2] -> TL2[0] -> TL1[0] -> BGX0
+ * VNIC2-SQ0 -> TL4(16) -> TL3[4] -> TL2[1] -> TL1[0] -> BGX0
+ * VNIC3-SQ0 -> TL4(24) -> TL3[6] -> TL2[1] -> TL1[0] -> BGX0
+ * VNIC4-SQ0 -> TL4(512) -> TL3[128] -> TL2[32] -> TL1[1] -> BGX1
+ * VNIC5-SQ0 -> TL4(520) -> TL3[130] -> TL2[32] -> TL1[1] -> BGX1
+ * VNIC6-SQ0 -> TL4(528) -> TL3[132] -> TL2[33] -> TL1[1] -> BGX1
+ * VNIC7-SQ0 -> TL4(536) -> TL3[134] -> TL2[33] -> TL1[1] -> BGX1
+ */
+static void nic_tx_channel_cfg(struct nicpf *nic, u8 vnic, u8 sq_idx)
+{
+ u32 bgx, lmac, chan;
+ u32 tl2, tl3, tl4;
+ u32 rr_quantum;
+
+ bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vnic]);
+ lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vnic]);
+ /* 24 bytes for FCS, IPG and preamble */
+ rr_quantum = ((NIC_HW_MAX_FRS + 24) / 4);
+
+ tl4 = (lmac * NIC_TL4_PER_LMAC) + (bgx * NIC_TL4_PER_BGX);
+ tl4 += sq_idx;
+ tl3 = tl4 / (NIC_MAX_TL4 / NIC_MAX_TL3);
+ nic_reg_write(nic, NIC_PF_QSET_0_127_SQ_0_7_CFG2 |
+ ((u64)vnic << NIC_QS_ID_SHIFT) |
+ ((u32)sq_idx << NIC_Q_NUM_SHIFT), tl4);
+ nic_reg_write(nic, NIC_PF_TL4_0_1023_CFG | (tl4 << 3),
+ ((u64)vnic << 27) | ((u32)sq_idx << 24) | rr_quantum);
+
+ nic_reg_write(nic, NIC_PF_TL3_0_255_CFG | (tl3 << 3), rr_quantum);
+ chan = (lmac * MAX_BGX_CHANS_PER_LMAC) + (bgx * NIC_CHANS_PER_INF);
+ nic_reg_write(nic, NIC_PF_TL3_0_255_CHAN | (tl3 << 3), chan);
+ /* Enable backpressure on the channel */
+ nic_reg_write(nic, NIC_PF_CHAN_0_255_TX_CFG | (chan << 3), 1);<