summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/hfi1/hfi.h
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/hfi1/hfi.h')
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h1950
1 files changed, 1950 insertions, 0 deletions
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
new file mode 100644
index 000000000000..4417a0fd3ef9
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -0,0 +1,1950 @@
+#ifndef _HFI1_KERNEL_H
+#define _HFI1_KERNEL_H
+/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/fs.h>
+#include <linux/completion.h>
+#include <linux/kref.h>
+#include <linux/sched.h>
+#include <linux/cdev.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+#include <rdma/rdma_vt.h>
+
+#include "chip_registers.h"
+#include "common.h"
+#include "verbs.h"
+#include "pio.h"
+#include "chip.h"
+#include "mad.h"
+#include "qsfp.h"
+#include "platform.h"
+#include "affinity.h"
+
+/* bumped 1 from s/w major version of TrueScale */
+#define HFI1_CHIP_VERS_MAJ 3U
+
+/* don't care about this except printing */
+#define HFI1_CHIP_VERS_MIN 0U
+
+/* The Organization Unique Identifier (Mfg code), and its position in GUID */
+#define HFI1_OUI 0x001175
+#define HFI1_OUI_LSB 40
+
+#define DROP_PACKET_OFF 0
+#define DROP_PACKET_ON 1
+
+extern unsigned long hfi1_cap_mask;
+#define HFI1_CAP_KGET_MASK(mask, cap) ((mask) & HFI1_CAP_##cap)
+#define HFI1_CAP_UGET_MASK(mask, cap) \
+ (((mask) >> HFI1_CAP_USER_SHIFT) & HFI1_CAP_##cap)
+#define HFI1_CAP_KGET(cap) (HFI1_CAP_KGET_MASK(hfi1_cap_mask, cap))
+#define HFI1_CAP_UGET(cap) (HFI1_CAP_UGET_MASK(hfi1_cap_mask, cap))
+#define HFI1_CAP_IS_KSET(cap) (!!HFI1_CAP_KGET(cap))
+#define HFI1_CAP_IS_USET(cap) (!!HFI1_CAP_UGET(cap))
+#define HFI1_MISC_GET() ((hfi1_cap_mask >> HFI1_CAP_MISC_SHIFT) & \
+ HFI1_CAP_MISC_MASK)
+/* Offline Disabled Reason is 4-bits */
+#define HFI1_ODR_MASK(rsn) ((rsn) & OPA_PI_MASK_OFFLINE_REASON)
+
+/*
+ * Control context is always 0 and handles the error packets.
+ * It also handles the VL15 and multicast packets.
+ */
+#define HFI1_CTRL_CTXT 0
+
+/*
+ * Driver context will store software counters for each of the events
+ * associated with these status registers
+ */
+#define NUM_CCE_ERR_STATUS_COUNTERS 41
+#define NUM_RCV_ERR_STATUS_COUNTERS 64
+#define NUM_MISC_ERR_STATUS_COUNTERS 13
+#define NUM_SEND_PIO_ERR_STATUS_COUNTERS 36
+#define NUM_SEND_DMA_ERR_STATUS_COUNTERS 4
+#define NUM_SEND_EGRESS_ERR_STATUS_COUNTERS 64
+#define NUM_SEND_ERR_STATUS_COUNTERS 3
+#define NUM_SEND_CTXT_ERR_STATUS_COUNTERS 5
+#define NUM_SEND_DMA_ENG_ERR_STATUS_COUNTERS 24
+
+/*
+ * per driver stats, either not device nor port-specific, or
+ * summed over all of the devices and ports.
+ * They are described by name via ipathfs filesystem, so layout
+ * and number of elements can change without breaking compatibility.
+ * If members are added or deleted hfi1_statnames[] in debugfs.c must
+ * change to match.
+ */
+struct hfi1_ib_stats {
+ __u64 sps_ints; /* number of interrupts handled */
+ __u64 sps_errints; /* number of error interrupts */
+ __u64 sps_txerrs; /* tx-related packet errors */
+ __u64 sps_rcverrs; /* non-crc rcv packet errors */
+ __u64 sps_hwerrs; /* hardware errors reported (parity, etc.) */
+ __u64 sps_nopiobufs; /* no pio bufs avail from kernel */
+ __u64 sps_ctxts; /* number of contexts currently open */
+ __u64 sps_lenerrs; /* number of kernel packets where RHF != LRH len */
+ __u64 sps_buffull;
+ __u64 sps_hdrfull;
+};
+
+extern struct hfi1_ib_stats hfi1_stats;
+extern const struct pci_error_handlers hfi1_pci_err_handler;
+
+/*
+ * First-cut criterion for "device is active" is
+ * two thousand dwords combined Tx, Rx traffic per
+ * 5-second interval. SMA packets are 64 dwords,
+ * and occur "a few per second", presumably each way.
+ */
+#define HFI1_TRAFFIC_ACTIVE_THRESHOLD (2000)
+
+/*
+ * Below contains all data related to a single context (formerly called port).
+ */
+
+#ifdef CONFIG_DEBUG_FS
+struct hfi1_opcode_stats_perctx;
+#endif
+
+struct ctxt_eager_bufs {
+ ssize_t size; /* total size of eager buffers */
+ u32 count; /* size of buffers array */
+ u32 numbufs; /* number of buffers allocated */
+ u32 alloced; /* number of rcvarray entries used */
+ u32 rcvtid_size; /* size of each eager rcv tid */
+ u32 threshold; /* head update threshold */
+ struct eager_buffer {
+ void *addr;
+ dma_addr_t phys;
+ ssize_t len;
+ } *buffers;
+ struct {
+ void *addr;
+ dma_addr_t phys;
+ } *rcvtids;
+};
+
+struct exp_tid_set {
+ struct list_head list;
+ u32 count;
+};
+
+struct hfi1_ctxtdata {
+ /* shadow the ctxt's RcvCtrl register */
+ u64 rcvctrl;
+ /* rcvhdrq base, needs mmap before useful */
+ void *rcvhdrq;
+ /* kernel virtual address where hdrqtail is updated */
+ volatile __le64 *rcvhdrtail_kvaddr;
+ /*
+ * Shared page for kernel to signal user processes that send buffers
+ * need disarming. The process should call HFI1_CMD_DISARM_BUFS
+ * or HFI1_CMD_ACK_EVENT with IPATH_EVENT_DISARM_BUFS set.
+ */
+ unsigned long *user_event_mask;
+ /* when waiting for rcv or pioavail */
+ wait_queue_head_t wait;
+ /* rcvhdrq size (for freeing) */
+ size_t rcvhdrq_size;
+ /* number of rcvhdrq entries */
+ u16 rcvhdrq_cnt;
+ /* size of each of the rcvhdrq entries */
+ u16 rcvhdrqentsize;
+ /* mmap of hdrq, must fit in 44 bits */
+ dma_addr_t rcvhdrq_phys;
+ dma_addr_t rcvhdrqtailaddr_phys;
+ struct ctxt_eager_bufs egrbufs;
+ /* this receive context's assigned PIO ACK send context */
+ struct send_context *sc;
+
+ /* dynamic receive available interrupt timeout */
+ u32 rcvavail_timeout;
+ /*
+ * number of opens (including slave sub-contexts) on this instance
+ * (ignoring forks, dup, etc. for now)
+ */
+ int cnt;
+ /*
+ * how much space to leave at start of eager TID entries for
+ * protocol use, on each TID
+ */
+ /* instead of calculating it */
+ unsigned ctxt;
+ /* non-zero if ctxt is being shared. */
+ u16 subctxt_cnt;
+ /* non-zero if ctxt is being shared. */
+ u16 subctxt_id;
+ u8 uuid[16];
+ /* job key */
+ u16 jkey;
+ /* number of RcvArray groups for this context. */
+ u32 rcv_array_groups;
+ /* index of first eager TID entry. */
+ u32 eager_base;
+ /* number of expected TID entries */
+ u32 expected_count;
+ /* index of first expected TID entry. */
+ u32 expected_base;
+
+ struct exp_tid_set tid_group_list;
+ struct exp_tid_set tid_used_list;
+ struct exp_tid_set tid_full_list;
+
+ /* lock protecting all Expected TID data */
+ struct mutex exp_lock;
+ /* number of pio bufs for this ctxt (all procs, if shared) */
+ u32 piocnt;
+ /* first pio buffer for this ctxt */
+ u32 pio_base;
+ /* chip offset of PIO buffers for this ctxt */
+ u32 piobufs;
+ /* per-context configuration flags */
+ u32 flags;
+ /* per-context event flags for fileops/intr communication */
+ unsigned long event_flags;
+ /* WAIT_RCV that timed out, no interrupt */
+ u32 rcvwait_to;
+ /* WAIT_PIO that timed out, no interrupt */
+ u32 piowait_to;
+ /* WAIT_RCV already happened, no wait */
+ u32 rcvnowait;
+ /* WAIT_PIO already happened, no wait */
+ u32 pionowait;
+ /* total number of polled urgent packets */
+ u32 urgent;
+ /* saved total number of polled urgent packets for poll edge trigger */
+ u32 urgent_poll;
+ /* pid of process using this ctxt */
+ pid_t pid;
+ pid_t subpid[HFI1_MAX_SHARED_CTXTS];
+ /* same size as task_struct .comm[], command that opened context */
+ char comm[TASK_COMM_LEN];
+ /* so file ops can get at unit */
+ struct hfi1_devdata *dd;
+ /* so functions that need physical port can get it easily */
+ struct hfi1_pportdata *ppd;
+ /* A page of memory for rcvhdrhead, rcvegrhead, rcvegrtail * N */
+ void *subctxt_uregbase;
+ /* An array of pages for the eager receive buffers * N */
+ void *subctxt_rcvegrbuf;
+ /* An array of pages for the eager header queue entries * N */
+ void *subctxt_rcvhdr_base;
+ /* The version of the library which opened this ctxt */
+ u32 userversion;
+ /* Bitmask of active slaves */
+ u32 active_slaves;
+ /* Type of packets or conditions we want to poll for */
+ u16 poll_type;
+ /* receive packet sequence counter */
+ u8 seq_cnt;
+ u8 redirect_seq_cnt;
+ /* ctxt rcvhdrq head offset */
+ u32 head;
+ u32 pkt_count;
+ /* QPs waiting for context processing */
+ struct list_head qp_wait_list;
+ /* interrupt handling */
+ u64 imask; /* clear interrupt mask */
+ int ireg; /* clear interrupt register */
+ unsigned numa_id; /* numa node of this context */
+ /* verbs stats per CTX */
+ struct hfi1_opcode_stats_perctx *opstats;
+ /*
+ * This is the kernel thread that will keep making
+ * progress on the user sdma requests behind the scenes.
+ * There is one per context (shared contexts use the master's).
+ */
+ struct task_struct *progress;
+ struct list_head sdma_queues;
+ /* protect sdma queues */
+ spinlock_t sdma_qlock;
+
+ /* Is ASPM interrupt supported for this context */
+ bool aspm_intr_supported;
+ /* ASPM state (enabled/disabled) for this context */
+ bool aspm_enabled;
+ /* Timer for re-enabling ASPM if interrupt activity quietens down */
+ struct timer_list aspm_timer;
+ /* Lock to serialize between intr, timer intr and user threads */
+ spinlock_t aspm_lock;
+ /* Is ASPM processing enabled for this context (in intr context) */
+ bool aspm_intr_enable;
+ /* Last interrupt timestamp */
+ ktime_t aspm_ts_last_intr;
+ /* Last timestamp at which we scheduled a timer for this context */
+ ktime_t aspm_ts_timer_sched;
+
+ /*
+ * The interrupt handler for a particular receive context can vary
+ * throughout it's lifetime. This is not a lock protected data member so
+ * it must be updated atomically and the prev and new value must always
+ * be valid. Worst case is we process an extra interrupt and up to 64
+ * packets with the wrong interrupt handler.
+ */
+ int (*do_interrupt)(struct hfi1_ctxtdata *rcd, int threaded);
+};
+
+/*
+ * Represents a single packet at a high level. Put commonly computed things in
+ * here so we do not have to keep doing them over and over. The rule of thumb is
+ * if something is used one time to derive some value, store that something in
+ * here. If it is used multiple times, then store the result of that derivation
+ * in here.
+ */
+struct hfi1_packet {
+ void *ebuf;
+ void *hdr;
+ struct hfi1_ctxtdata *rcd;
+ __le32 *rhf_addr;
+ struct rvt_qp *qp;
+ struct hfi1_other_headers *ohdr;
+ u64 rhf;
+ u32 maxcnt;
+ u32 rhqoff;
+ u32 hdrqtail;
+ int numpkt;
+ u16 tlen;
+ u16 hlen;
+ s16 etail;
+ u16 rsize;
+ u8 updegr;
+ u8 rcv_flags;
+ u8 etype;
+};
+
+static inline bool has_sc4_bit(struct hfi1_packet *p)
+{
+ return !!rhf_dc_info(p->rhf);
+}
+
+/*
+ * Private data for snoop/capture support.
+ */
+struct hfi1_snoop_data {
+ int mode_flag;
+ struct cdev cdev;
+ struct device *class_dev;
+ /* protect snoop data */
+ spinlock_t snoop_lock;
+ struct list_head queue;
+ wait_queue_head_t waitq;
+ void *filter_value;
+ int (*filter_callback)(void *hdr, void *data, void *value);
+ u64 dcc_cfg; /* saved value of DCC Cfg register */
+};
+
+/* snoop mode_flag values */
+#define HFI1_PORT_SNOOP_MODE 1U
+#define HFI1_PORT_CAPTURE_MODE 2U
+
+struct rvt_sge_state;
+
+/*
+ * Get/Set IB link-level config parameters for f_get/set_ib_cfg()
+ * Mostly for MADs that set or query link parameters, also ipath
+ * config interfaces
+ */
+#define HFI1_IB_CFG_LIDLMC 0 /* LID (LS16b) and Mask (MS16b) */
+#define HFI1_IB_CFG_LWID_DG_ENB 1 /* allowed Link-width downgrade */
+#define HFI1_IB_CFG_LWID_ENB 2 /* allowed Link-width */
+#define HFI1_IB_CFG_LWID 3 /* currently active Link-width */
+#define HFI1_IB_CFG_SPD_ENB 4 /* allowed Link speeds */
+#define HFI1_IB_CFG_SPD 5 /* current Link spd */
+#define HFI1_IB_CFG_RXPOL_ENB 6 /* Auto-RX-polarity enable */
+#define HFI1_IB_CFG_LREV_ENB 7 /* Auto-Lane-reversal enable */
+#define HFI1_IB_CFG_LINKLATENCY 8 /* Link Latency (IB1.2 only) */
+#define HFI1_IB_CFG_HRTBT 9 /* IB heartbeat off/enable/auto; DDR/QDR only */
+#define HFI1_IB_CFG_OP_VLS 10 /* operational VLs */
+#define HFI1_IB_CFG_VL_HIGH_CAP 11 /* num of VL high priority weights */
+#define HFI1_IB_CFG_VL_LOW_CAP 12 /* num of VL low priority weights */
+#define HFI1_IB_CFG_OVERRUN_THRESH 13 /* IB overrun threshold */
+#define HFI1_IB_CFG_PHYERR_THRESH 14 /* IB PHY error threshold */
+#define HFI1_IB_CFG_LINKDEFAULT 15 /* IB link default (sleep/poll) */
+#define HFI1_IB_CFG_PKEYS 16 /* update partition keys */
+#define HFI1_IB_CFG_MTU 17 /* update MTU in IBC */
+#define HFI1_IB_CFG_VL_HIGH_LIMIT 19
+#define HFI1_IB_CFG_PMA_TICKS 20 /* PMA sample tick resolution */
+#define HFI1_IB_CFG_PORT 21 /* switch port we are connected to */
+
+/*
+ * HFI or Host Link States
+ *
+ * These describe the states the driver thinks the logical and physical
+ * states are in. Used as an argument to set_link_state(). Implemented
+ * as bits for easy multi-state checking. The actual state can only be
+ * one.
+ */
+#define __HLS_UP_INIT_BP 0
+#define __HLS_UP_ARMED_BP 1
+#define __HLS_UP_ACTIVE_BP 2
+#define __HLS_DN_DOWNDEF_BP 3 /* link down default */
+#define __HLS_DN_POLL_BP 4
+#define __HLS_DN_DISABLE_BP 5
+#define __HLS_DN_OFFLINE_BP 6
+#define __HLS_VERIFY_CAP_BP 7
+#define __HLS_GOING_UP_BP 8
+#define __HLS_GOING_OFFLINE_BP 9
+#define __HLS_LINK_COOLDOWN_BP 10
+
+#define HLS_UP_INIT BIT(__HLS_UP_INIT_BP)
+#define HLS_UP_ARMED BIT(__HLS_UP_ARMED_BP)
+#define HLS_UP_ACTIVE BIT(__HLS_UP_ACTIVE_BP)
+#define HLS_DN_DOWNDEF BIT(__HLS_DN_DOWNDEF_BP) /* link down default */
+#define HLS_DN_POLL BIT(__HLS_DN_POLL_BP)
+#define HLS_DN_DISABLE BIT(__HLS_DN_DISABLE_BP)
+#define HLS_DN_OFFLINE BIT(__HLS_DN_OFFLINE_BP)
+#define HLS_VERIFY_CAP BIT(__HLS_VERIFY_CAP_BP)
+#define HLS_GOING_UP BIT(__HLS_GOING_UP_BP)
+#define HLS_GOING_OFFLINE BIT(__HLS_GOING_OFFLINE_BP)
+#define HLS_LINK_COOLDOWN BIT(__HLS_LINK_COOLDOWN_BP)
+
+#define HLS_UP (HLS_UP_INIT | HLS_UP_ARMED | HLS_UP_ACTIVE)
+#define HLS_DOWN ~(HLS_UP)
+
+/* use this MTU size if none other is given */
+#define HFI1_DEFAULT_ACTIVE_MTU 10240
+/* use this MTU size as the default maximum */
+#define HFI1_DEFAULT_MAX_MTU 10240
+/* default partition key */
+#define DEFAULT_PKEY 0xffff
+
+/*
+ * Possible fabric manager config parameters for fm_{get,set}_table()
+ */
+#define FM_TBL_VL_HIGH_ARB 1 /* Get/set VL high prio weights */
+#define FM_TBL_VL_LOW_ARB 2 /* Get/set VL low prio weights */
+#define FM_TBL_BUFFER_CONTROL 3 /* Get/set Buffer Control */
+#define FM_TBL_SC2VLNT 4 /* Get/set SC->VLnt */
+#define FM_TBL_VL_PREEMPT_ELEMS 5 /* Get (no set) VL preempt elems */
+#define FM_TBL_VL_PREEMPT_MATRIX 6 /* Get (no set) VL preempt matrix */
+
+/*
+ * Possible "operations" for f_rcvctrl(ppd, op, ctxt)
+ * these are bits so they can be combined, e.g.
+ * HFI1_RCVCTRL_INTRAVAIL_ENB | HFI1_RCVCTRL_CTXT_ENB
+ */
+#define HFI1_RCVCTRL_TAILUPD_ENB 0x01
+#define HFI1_RCVCTRL_TAILUPD_DIS 0x02
+#define HFI1_RCVCTRL_CTXT_ENB 0x04
+#define HFI1_RCVCTRL_CTXT_DIS 0x08
+#define HFI1_RCVCTRL_INTRAVAIL_ENB 0x10
+#define HFI1_RCVCTRL_INTRAVAIL_DIS 0x20
+#define HFI1_RCVCTRL_PKEY_ENB 0x40 /* Note, default is enabled */
+#define HFI1_RCVCTRL_PKEY_DIS 0x80
+#define HFI1_RCVCTRL_TIDFLOW_ENB 0x0400
+#define HFI1_RCVCTRL_TIDFLOW_DIS 0x0800
+#define HFI1_RCVCTRL_ONE_PKT_EGR_ENB 0x1000
+#define HFI1_RCVCTRL_ONE_PKT_EGR_DIS 0x2000
+#define HFI1_RCVCTRL_NO_RHQ_DROP_ENB 0x4000
+#define HFI1_RCVCTRL_NO_RHQ_DROP_DIS 0x8000
+#define HFI1_RCVCTRL_NO_EGR_DROP_ENB 0x10000
+#define HFI1_RCVCTRL_NO_EGR_DROP_DIS 0x20000
+
+/* partition enforcement flags */
+#define HFI1_PART_ENFORCE_IN 0x1
+#define HFI1_PART_ENFORCE_OUT 0x2
+
+/* how often we check for synthetic counter wrap around */
+#define SYNTH_CNT_TIME 2
+
+/* Counter flags */
+#define CNTR_NORMAL 0x0 /* Normal counters, just read register */
+#define CNTR_SYNTH 0x1 /* Synthetic counters, saturate at all 1s */
+#define CNTR_DISABLED 0x2 /* Disable this counter */
+#define CNTR_32BIT 0x4 /* Simulate 64 bits for this counter */
+#define CNTR_VL 0x8 /* Per VL counter */
+#define CNTR_SDMA 0x10
+#define CNTR_INVALID_VL -1 /* Specifies invalid VL */
+#define CNTR_MODE_W 0x0
+#define CNTR_MODE_R 0x1
+
+/* VLs Supported/Operational */
+#define HFI1_MIN_VLS_SUPPORTED 1
+#define HFI1_MAX_VLS_SUPPORTED 8
+
+static inline void incr_cntr64(u64 *cntr)
+{
+ if (*cntr < (u64)-1LL)
+ (*cntr)++;
+}
+
+static inline void incr_cntr32(u32 *cntr)
+{
+ if (*cntr < (u32)-1LL)
+ (*cntr)++;
+}
+
+#define MAX_NAME_SIZE 64
+struct hfi1_msix_entry {
+ enum irq_type type;
+ struct msix_entry msix;
+ void *arg;
+ char name[MAX_NAME_SIZE];
+ cpumask_t mask;
+};
+
+/* per-SL CCA information */
+struct cca_timer {
+ struct hrtimer hrtimer;
+ struct hfi1_pportdata *ppd; /* read-only */
+ int sl; /* read-only */
+ u16 ccti; /* read/write - current value of CCTI */
+};
+
+struct link_down_reason {
+ /*
+ * SMA-facing value. Should be set from .latest when
+ * HLS_UP_* -> HLS_DN_* transition actually occurs.
+ */
+ u8 sma;
+ u8 latest;
+};
+
+enum {
+ LO_PRIO_TABLE,
+ HI_PRIO_TABLE,
+ MAX_PRIO_TABLE
+};
+
+struct vl_arb_cache {
+ /* protect vl arb cache */
+ spinlock_t lock;
+ struct ib_vl_weight_elem table[VL_ARB_TABLE_SIZE];
+};
+
+/*
+ * The structure below encapsulates data relevant to a physical IB Port.
+ * Current chips support only one such port, but the separation
+ * clarifies things a bit. Note that to conform to IB conventions,
+ * port-numbers are one-based. The first or only port is port1.
+ */
+struct hfi1_pportdata {
+ struct hfi1_ibport ibport_data;
+
+ struct hfi1_devdata *dd;
+ struct kobject pport_cc_kobj;
+ struct kobject sc2vl_kobj;
+ struct kobject sl2sc_kobj;
+ struct kobject vl2mtu_kobj;
+
+ /* PHY support */
+ u32 port_type;
+ struct qsfp_data qsfp_info;
+
+ /* GUID for this interface, in host order */
+ u64 guid;
+ /* GUID for peer interface, in host order */
+ u64 neighbor_guid;
+
+ /* up or down physical link state */
+ u32 linkup;
+
+ /*
+ * this address is mapped read-only into user processes so they can
+ * get status cheaply, whenever they want. One qword of status per port
+ */
+ u64 *statusp;
+
+ /* SendDMA related entries */
+
+ struct workqueue_struct *hfi1_wq;
+
+ /* move out of interrupt context */
+ struct work_struct link_vc_work;
+ struct work_struct link_up_work;
+ struct work_struct link_down_work;
+ struct work_struct sma_message_work;
+ struct work_struct freeze_work;
+ struct work_struct link_downgrade_work;
+ struct work_struct link_bounce_work;
+ /* host link state variables */
+ struct mutex hls_lock;
+ u32 host_link_state;
+
+ spinlock_t sdma_alllock ____cacheline_aligned_in_smp;
+
+ u32 lstate; /* logical link state */
+
+ /* these are the "32 bit" regs */
+
+ u32 ibmtu; /* The MTU programmed for this unit */
+ /*
+ * Current max size IB packet (in bytes) including IB headers, that
+ * we can send. Changes when ibmtu changes.
+ */
+ u32 ibmaxlen;
+ u32 current_egress_rate; /* units [10^6 bits/sec] */
+ /* LID programmed for this instance */
+ u16 lid;
+ /* list of pkeys programmed; 0 if not set */
+ u16 pkeys[MAX_PKEY_VALUES];
+ u16 link_width_supported;
+ u16 link_width_downgrade_supported;
+ u16 link_speed_supported;
+ u16 link_width_enabled;
+ u16 link_width_downgrade_enabled;
+ u16 link_speed_enabled;
+ u16 link_width_active;
+ u16 link_width_downgrade_tx_active;
+ u16 link_width_downgrade_rx_active;
+ u16 link_speed_active;
+ u8 vls_supported;
+ u8 vls_operational;
+ u8 actual_vls_operational;
+ /* LID mask control */
+ u8 lmc;
+ /* Rx Polarity inversion (compensate for ~tx on partner) */
+ u8 rx_pol_inv;
+
+ u8 hw_pidx; /* physical port index */
+ u8 port; /* IB port number and index into dd->pports - 1 */
+ /* type of neighbor node */
+ u8 neighbor_type;
+ u8 neighbor_normal;
+ u8 neighbor_fm_security; /* 1 if firmware checking is disabled */
+ u8 neighbor_port_number;
+ u8 is_sm_config_started;
+ u8 offline_disabled_reason;
+ u8 is_active_optimize_enabled;
+ u8 driver_link_ready; /* driver ready for active link */
+ u8 link_enabled; /* link enabled? */
+ u8 linkinit_reason;
+ u8 local_tx_rate; /* rate given to 8051 firmware */
+ u8 last_pstate; /* info only */
+
+ /* placeholders for IB MAD packet settings */
+ u8 overrun_threshold;
+ u8 phy_error_threshold;
+
+ /* Used to override LED behavior for things like maintenance beaconing*/
+ /*
+ * Alternates per phase of blink
+ * [0] holds LED off duration, [1] holds LED on duration
+ */
+ unsigned long led_override_vals[2];
+ u8 led_override_phase; /* LSB picks from vals[] */
+ atomic_t led_override_timer_active;
+ /* Used to flash LEDs in override mode */
+ struct timer_list led_override_timer;
+
+ u32 sm_trap_qp;
+ u32 sa_qp;
+
+ /*
+ * cca_timer_lock protects access to the per-SL cca_timer
+ * structures (specifically the ccti member).
+ */
+ spinlock_t cca_timer_lock ____cacheline_aligned_in_smp;
+ struct cca_timer cca_timer[OPA_MAX_SLS];
+
+ /* List of congestion control table entries */
+ struct ib_cc_table_entry_shadow ccti_entries[CC_TABLE_SHADOW_MAX];
+
+ /* congestion entries, each entry corresponding to a SL */
+ struct opa_congestion_setting_entry_shadow
+ congestion_entries[OPA_MAX_SLS];
+
+ /*
+ * cc_state_lock protects (write) access to the per-port
+ * struct cc_state.
+ */
+ spinlock_t cc_state_lock ____cacheline_aligned_in_smp;
+
+ struct cc_state __rcu *cc_state;
+
+ /* Total number of congestion control table entries */
+ u16 total_cct_entry;
+
+ /* Bit map identifying service level */
+ u32 cc_sl_control_map;
+
+ /* CA's max number of 64 entry units in the congestion control table */
+ u8 cc_max_table_entries;
+
+ /*
+ * begin congestion log related entries
+ * cc_log_lock protects all congestion log related data
+ */
+ spinlock_t cc_log_lock ____cacheline_aligned_in_smp;
+ u8 threshold_cong_event_map[OPA_MAX_SLS / 8];
+ u16 threshold_event_counter;
+ struct opa_hfi1_cong_log_event_internal cc_events[OPA_CONG_LOG_ELEMS];
+ int cc_log_idx; /* index for logging events */
+ int cc_mad_idx; /* index for reporting events */
+ /* end congestion log related entries */
+
+ struct vl_arb_cache vl_arb_cache[MAX_PRIO_TABLE];
+
+ /* port relative counter buffer */
+ u64 *cntrs;
+ /* port relative synthetic counter buffer */
+ u64 *scntrs;
+ /* port_xmit_discards are synthesized from different egress errors */
+ u64 port_xmit_discards;
+ u64 port_xmit_discards_vl[C_VL_COUNT];
+ u64 port_xmit_constraint_errors;
+ u64 port_rcv_constraint_errors;
+ /* count of 'link_err' interrupts from DC */
+ u64 link_downed;
+ /* number of times link retrained successfully */
+ u64 link_up;
+ /* number of times a link unknown frame was reported */
+ u64 unknown_frame_count;
+ /* port_ltp_crc_mode is returned in 'portinfo' MADs */
+ u16 port_ltp_crc_mode;
+ /* port_crc_mode_enabled is the crc we support */
+ u8 port_crc_mode_enabled;
+ /* mgmt_allowed is also returned in 'portinfo' MADs */
+ u8 mgmt_allowed;
+ u8 part_enforce; /* partition enforcement flags */
+ struct link_down_reason local_link_down_reason;
+ struct link_down_reason neigh_link_down_reason;
+ /* Value to be sent to link peer on LinkDown .*/
+ u8 remote_link_down_reason;
+ /* Error events that will cause a port bounce. */
+ u32 port_error_action;
+ struct work_struct linkstate_active_work;
+ /* Does this port need to prescan for FECNs */
+ bool cc_prescan;
+};
+
+typedef int (*rhf_rcv_function_ptr)(struct hfi1_packet *packet);
+
+typedef void (*opcode_handler)(struct hfi1_packet *packet);
+
+/* return values for the RHF receive functions */
+#define RHF_RCV_CONTINUE 0 /* keep going */
+#define RHF_RCV_DONE 1 /* stop, this packet processed */
+#define RHF_RCV_REPROCESS 2 /* stop. retain this packet */
+
+struct rcv_array_data {
+ u8 group_size;
+ u16 ngroups;
+ u16 nctxt_extra;
+};
+
+struct per_vl_data {
+ u16 mtu;
+ struct send_context *sc;
+};
+
+/* 16 to directly index */
+#define PER_VL_SEND_CONTEXTS 16
+
+struct err_info_rcvport {
+ u8 status_and_code;
+ u64 packet_flit1;
+ u64 packet_flit2;
+};
+
+struct err_info_constraint {
+ u8 status;
+ u16 pkey;
+ u32 slid;
+};
+
+struct hfi1_temp {
+ unsigned int curr; /* current temperature */
+ unsigned int lo_lim; /* low temperature limit */
+ unsigned int hi_lim; /* high temperature limit */
+ unsigned int crit_lim; /* critical temperature limit */
+ u8 triggers; /* temperature triggers */
+};
+
+/* common data between shared ASIC HFIs */
+struct hfi1_asic_data {
+ struct hfi1_devdata *dds[2]; /* back pointers */
+ struct mutex asic_resource_mutex;
+};
+
+/* device data struct now contains only "general per-device" info.
+ * fields related to a physical IB port are in a hfi1_pportdata struct.
+ */
+struct sdma_engine;
+struct sdma_vl_map;
+
+#define BOARD_VERS_MAX 96 /* how long the version string can be */
+#define SERIAL_MAX 16 /* length of the serial number */
+
+typedef int (*send_routine)(struct rvt_qp *, struct hfi1_pkt_state *, u64);
+struct hfi1_devdata {
+ struct hfi1_ibdev verbs_dev; /* must be first */
+ struct list_head list;
+ /* pointers to related structs for this device */
+ /* pci access data structure */
+ struct pci_dev *pcidev;
+ struct cdev user_cdev;
+ struct cdev diag_cdev;
+ struct cdev ui_cdev;
+ struct device *user_device;
+ struct device *diag_device;
+ struct device *ui_device;
+
+ /* mem-mapped pointer to base of chip regs */
+ u8 __iomem *kregbase;
+ /* end of mem-mapped chip space excluding sendbuf and user regs */
+ u8 __iomem *kregend;
+ /* physical address of chip for io_remap, etc. */
+ resource_size_t physaddr;
+ /* receive context data */
+ struct hfi1_ctxtdata **rcd;
+ /* send context data */
+ struct send_context_info *send_contexts;
+ /* map hardware send contexts to software index */
+ u8 *hw_to_sw;
+ /* spinlock for allocating and releasing send context resources */
+ spinlock_t sc_lock;
+ /* Per VL data. Enough for all VLs but not all elements are set/used. */
+ struct per_vl_data vld[PER_VL_SEND_CONTEXTS];
+ /* lock for pio_map */
+ spinlock_t pio_map_lock;
+ /* array of kernel send contexts */
+ struct send_context **kernel_send_context;
+ /* array of vl maps */
+ struct pio_vl_map __rcu *pio_map;
+ /* seqlock for sc2vl */
+ seqlock_t sc2vl_lock;
+ u64 sc2vl[4];
+ /* Send Context initialization lock. */
+ spinlock_t sc_init_lock;
+
+ /* fields common to all SDMA engines */
+
+ /* default flags to last descriptor */
+ u64 default_desc1;
+ volatile __le64 *sdma_heads_dma; /* DMA'ed by chip */
+ dma_addr_t sdma_heads_phys;
+ void *sdma_pad_dma; /* DMA'ed by chip */
+ dma_addr_t sdma_pad_phys;
+ /* for deallocation */
+ size_t sdma_heads_size;
+ /* number from the chip */
+ u32 chip_sdma_engines;
+ /* num used */
+ u32 num_sdma;
+ /* lock for sdma_map */
+ spinlock_t sde_map_lock;
+ /* array of engines sized by num_sdma */
+ struct sdma_engine *per_sdma;
+ /* array of vl maps */
+ struct sdma_vl_map __rcu *sdma_map;
+ /* SPC freeze waitqueue and variable */
+ wait_queue_head_t sdma_unfreeze_wq;
+ atomic_t sdma_unfreeze_count;
+
+ /* common data between shared ASIC HFIs in this OS */
+ struct hfi1_asic_data *asic_data;
+
+ /* hfi1_pportdata, points to array of (physical) port-specific
+ * data structs, indexed by pidx (0..n-1)
+ */
+ struct hfi1_pportdata *pport;
+
+ /* mem-mapped pointer to base of PIO buffers */
+ void __iomem *piobase;
+ /*
+ * write-combining mem-mapped pointer to base of RcvArray
+ * memory.
+ */
+ void __iomem *rcvarray_wc;
+ /*
+ * credit return base - a per-NUMA range of DMA address that
+ * the chip will use to update the per-context free counter
+ */
+ struct credit_return_base *cr_base;
+
+ /* send context numbers and sizes for each type */
+ struct sc_config_sizes sc_sizes[SC_MAX];
+
+ u32 lcb_access_count; /* count of LCB users */
+
+ char *boardname; /* human readable board info */
+
+ /* device (not port) flags, basically device capabilities */
+ u32 flags;
+
+ /* reset value */
+ u64 z_int_counter;
+ u64 z_rcv_limit;
+ u64 z_send_schedule;
+ /* percpu int_counter */
+ u64 __percpu *int_counter;
+ u64 __percpu *rcv_limit;
+ u64 __percpu *send_schedule;
+ /* number of receive contexts in use by the driver */
+ u32 num_rcv_contexts;
+ /* number of pio send contexts in use by the driver */
+ u32 num_send_contexts;
+ /*
+ * number of ctxts available for PSM open
+ */
+ u32 freectxts;
+ /* total number of available user/PSM contexts */
+ u32 num_user_contexts;
+ /* base receive interrupt timeout, in CSR units */
+ u32 rcv_intr_timeout_csr;
+
+ u64 __iomem *egrtidbase;
+ spinlock_t sendctrl_lock; /* protect changes to SendCtrl */
+ spinlock_t rcvctrl_lock; /* protect changes to RcvCtrl */
+ /* around rcd and (user ctxts) ctxt_cnt use (intr vs free) */
+ spinlock_t uctxt_lock; /* rcd and user context changes */
+ /* exclusive access to 8051 */
+ spinlock_t dc8051_lock;
+ /* exclusive access to 8051 memory */
+ spinlock_t dc8051_memlock;
+ int dc8051_timed_out; /* remember if the 8051 timed out */
+ /*
+ * A page that will hold event notification bitmaps for all
+ * contexts. This page will be mapped into all processes.
+ */
+ unsigned long *events;
+ /*
+ * per unit status, see also portdata statusp
+ * mapped read-only into user processes so they can get unit and
+ * IB link status cheaply
+ */
+ struct hfi1_status *status;
+ u32 freezelen; /* max length of freezemsg */
+
+ /* revision register shadow */
+ u64 revision;
+ /* Base GUID for device (network order) */
+ u64 base_guid;
+
+ /* these are the "32 bit" regs */
+
+ /* value we put in kr_rcvhdrsize */
+ u32 rcvhdrsize;
+ /* number of receive contexts the chip supports */
+ u32 chip_rcv_contexts;
+ /* number of receive array entries */
+ u32 chip_rcv_array_count;
+ /* number of PIO send contexts the chip supports */
+ u32 chip_send_contexts;
+ /* number of bytes in the PIO memory buffer */
+ u32 chip_pio_mem_size;
+ /* number of bytes in the SDMA memory buffer */
+ u32 chip_sdma_mem_size;
+
+ /* size of each rcvegrbuffer */
+ u32 rcvegrbufsize;
+ /* log2 of above */
+ u16 rcvegrbufsize_shift;
+ /* both sides of the PCIe link are gen3 capable */
+ u8 link_gen3_capable;
+ /* localbus width (1, 2,4,8,16,32) from config space */
+ u32 lbus_width;
+ /* localbus speed in MHz */
+ u32 lbus_speed;
+ int unit; /* unit # of this chip */
+ int node; /* home node of this chip */
+
+ /* save these PCI fields to restore after a reset */
+ u32 pcibar0;
+ u32 pcibar1;
+ u32 pci_rom;
+ u16 pci_command;
+ u16 pcie_devctl;
+ u16 pcie_lnkctl;
+ u16 pcie_devctl2;
+ u32 pci_msix0;
+ u32 pci_lnkctl3;
+ u32 pci_tph2;
+
+ /*
+ * ASCII serial number, from flash, large enough for original
+ * all digit strings, and longer serial number format
+ */
+ u8 serial[SERIAL_MAX];
+ /* human readable board version */
+ u8 boardversion[BOARD_VERS_MAX];
+ u8 lbus_info[32]; /* human readable localbus info */
+ /* chip major rev, from CceRevision */
+ u8 majrev;
+ /* chip minor rev, from CceRevision */
+ u8 minrev;
+ /* hardware ID */
+ u8 hfi1_id;
+ /* implementation code */
+ u8 icode;
+ /* default link down value (poll/sleep) */
+ u8 link_default;
+ /* vAU of this device */
+ u8 vau;
+ /* vCU of this device */
+ u8 vcu;
+ /* link credits of this device */
+ u16 link_credits;
+ /* initial vl15 credits to use */
+ u16 vl15_init;
+
+ /* Misc small ints */
+ /* Number of physical ports available */
+ u8 num_pports;
+ /* Lowest context number which can be used by user processes */
+ u8 first_user_ctxt;
+ u8 n_krcv_queues;
+ u8 qos_shift;
+ u8 qpn_mask;
+
+ u16 rhf_offset; /* offset of RHF within receive header entry */
+ u16 irev; /* implementation revision */
+ u16 dc8051_ver; /* 8051 firmware version */
+
+ struct platform_config platform_config;
+ struct platform_config_cache pcfg_cache;
+
+ struct diag_client *diag_client;
+ spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */
+
+ u8 psxmitwait_supported;
+ /* cycle length of PS* counters in HW (in picoseconds) */
+ u16 psxmitwait_check_rate;
+ /* high volume overflow errors deferred to tasklet */
+ struct tasklet_struct error_tasklet;
+
+ /* MSI-X information */
+ struct hfi1_msix_entry *msix_entries;
+ u32 num_msix_entries;
+
+ /* INTx information */
+ u32 requested_intx_irq; /* did we request one? */
+ char intx_name[MAX_NAME_SIZE]; /* INTx name */
+
+ /* general interrupt: mask of handled interrupts */
+ u64 gi_mask[CCE_NUM_INT_CSRS];
+
+ struct rcv_array_data rcv_entries;
+
+ /*
+ * 64 bit synthetic counters
+ */
+ struct timer_list synth_stats_timer;
+
+ /*
+ * device counters
+ */
+ char *cntrnames;
+ size_t cntrnameslen;
+ size_t ndevcntrs;
+ u64 *cntrs;
+ u64 *scntrs;
+
+ /*
+ * remembered values for synthetic counters
+ */
+ u64 last_tx;
+ u64 last_rx;
+
+ /*
+ * per-port counters
+ */
+ size_t nportcntrs;
+ char *portcntrnames;
+ size_t portcntrnameslen;
+
+ struct hfi1_snoop_data hfi1_snoop;
+
+ struct err_info_rcvport err_info_rcvport;
+ struct err_info_constraint err_info_rcv_constraint;
+ struct er