diff options
Diffstat (limited to 'drivers/staging/rdma/hfi1/chip.c')
-rw-r--r-- | drivers/staging/rdma/hfi1/chip.c | 2456 |
1 files changed, 1353 insertions, 1103 deletions
diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index 46a1830b509b..16eb653903e0 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -1,12 +1,11 @@ /* + * Copyright(c) 2015, 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. - * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -18,8 +17,6 @@ * * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -64,6 +61,8 @@ #include "sdma.h" #include "eprom.h" #include "efivar.h" +#include "platform.h" +#include "aspm.h" #define NUM_IB_PORTS 1 @@ -420,10 +419,10 @@ static struct flag_table pio_err_status_flags[] = { SEC_SPC_FREEZE, SEND_PIO_ERR_STATUS_PIO_STATE_MACHINE_ERR_SMASK), /*23*/ FLAG_ENTRY("PioWriteQwValidParity", - SEC_WRITE_DROPPED|SEC_SPC_FREEZE, + SEC_WRITE_DROPPED | SEC_SPC_FREEZE, SEND_PIO_ERR_STATUS_PIO_WRITE_QW_VALID_PARITY_ERR_SMASK), /*24*/ FLAG_ENTRY("PioBlockQwCountParity", - SEC_WRITE_DROPPED|SEC_SPC_FREEZE, + SEC_WRITE_DROPPED | SEC_SPC_FREEZE, SEND_PIO_ERR_STATUS_PIO_BLOCK_QW_COUNT_PARITY_ERR_SMASK), /*25*/ FLAG_ENTRY("PioVlfVlLenParity", SEC_SPC_FREEZE, @@ -509,6 +508,12 @@ static struct flag_table sdma_err_status_flags[] = { | SEND_DMA_ERR_STATUS_SDMA_CSR_PARITY_ERR_SMASK \ | SEND_DMA_ERR_STATUS_SDMA_PCIE_REQ_TRACKING_UNC_ERR_SMASK) +/* SendEgressErrInfo bits that correspond to a PortXmitDiscard counter */ +#define PORT_DISCARD_EGRESS_ERRS \ + (SEND_EGRESS_ERR_INFO_TOO_LONG_IB_PACKET_ERR_SMASK \ + | SEND_EGRESS_ERR_INFO_VL_MAPPING_ERR_SMASK \ + | SEND_EGRESS_ERR_INFO_VL_ERR_SMASK) + /* * TXE Egress Error flags */ @@ -936,7 +941,7 @@ static struct flag_table dc8051_err_flags[] = { FLAG_ENTRY0("IRAM_MBE", D8E(IRAM_MBE)), FLAG_ENTRY0("IRAM_SBE", D8E(IRAM_SBE)), FLAG_ENTRY0("UNMATCHED_SECURE_MSG_ACROSS_BCC_LANES", - D8E(UNMATCHED_SECURE_MSG_ACROSS_BCC_LANES)), + D8E(UNMATCHED_SECURE_MSG_ACROSS_BCC_LANES)), FLAG_ENTRY0("INVALID_CSR_ADDR", D8E(INVALID_CSR_ADDR)), }; @@ -950,7 +955,7 @@ static struct flag_table dc8051_info_err_flags[] = { FLAG_ENTRY0("Unknown frame received", UNKNOWN_FRAME), FLAG_ENTRY0("Target BER not met", TARGET_BER_NOT_MET), FLAG_ENTRY0("Serdes internal loopback failure", - FAILED_SERDES_INTERNAL_LOOPBACK), + FAILED_SERDES_INTERNAL_LOOPBACK), FLAG_ENTRY0("Failed SerDes init", FAILED_SERDES_INIT), FLAG_ENTRY0("Failed LNI(Polling)", FAILED_LNI_POLLING), FLAG_ENTRY0("Failed LNI(Debounce)", FAILED_LNI_DEBOUNCE), @@ -958,7 +963,8 @@ static struct flag_table dc8051_info_err_flags[] = { FLAG_ENTRY0("Failed LNI(OptEq)", FAILED_LNI_OPTEQ), FLAG_ENTRY0("Failed LNI(VerifyCap_1)", FAILED_LNI_VERIFY_CAP1), FLAG_ENTRY0("Failed LNI(VerifyCap_2)", FAILED_LNI_VERIFY_CAP2), - FLAG_ENTRY0("Failed LNI(ConfigLT)", FAILED_LNI_CONFIGLT) + FLAG_ENTRY0("Failed LNI(ConfigLT)", FAILED_LNI_CONFIGLT), + FLAG_ENTRY0("Host Handshake Timeout", HOST_HANDSHAKE_TIMEOUT) }; /* @@ -978,7 +984,6 @@ static struct flag_table dc8051_info_host_msg_flags[] = { FLAG_ENTRY0("Link going down", 0x0100), }; - static u32 encoded_size(u32 size); static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate); static int set_physical_link_state(struct hfi1_devdata *dd, u64 state); @@ -1140,11 +1145,8 @@ struct cntr_entry { /* * accessor for stat element, context either dd or ppd */ - u64 (*rw_cntr)(const struct cntr_entry *, - void *context, - int vl, - int mode, - u64 data); + u64 (*rw_cntr)(const struct cntr_entry *, void *context, int vl, + int mode, u64 data); }; #define C_RCV_HDR_OVF_FIRST C_RCV_HDR_OVF_0 @@ -1188,7 +1190,7 @@ CNTR_ELEM(#name, \ #define OVR_LBL(ctx) C_RCV_HDR_OVF_ ## ctx #define OVR_ELM(ctx) \ CNTR_ELEM("RcvHdrOvr" #ctx, \ - (RCV_HDR_OVFL_CNT + ctx*0x100), \ + (RCV_HDR_OVFL_CNT + ctx * 0x100), \ 0, CNTR_NORMAL, port_access_u64_csr) /* 32bit TXE */ @@ -1274,7 +1276,6 @@ static inline u64 read_write_csr(const struct hfi1_devdata *dd, u32 csr, { u64 ret; - if (mode == CNTR_MODE_R) { ret = read_csr(dd, csr); } else if (mode == CNTR_MODE_W) { @@ -1291,17 +1292,65 @@ static inline u64 read_write_csr(const struct hfi1_devdata *dd, u32 csr, /* Dev Access */ static u64 dev_access_u32_csr(const struct cntr_entry *entry, - void *context, int vl, int mode, u64 data) + void *context, int vl, int mode, u64 data) { struct hfi1_devdata *dd = context; + u64 csr = entry->csr; - if (vl != CNTR_INVALID_VL) - return 0; - return read_write_csr(dd, entry->csr, mode, data); + if (entry->flags & CNTR_SDMA) { + if (vl == CNTR_INVALID_VL) + return 0; + csr += 0x100 * vl; + } else { + if (vl != CNTR_INVALID_VL) + return 0; + } + return read_write_csr(dd, csr, mode, data); +} + +static u64 access_sde_err_cnt(const struct cntr_entry *entry, + void *context, int idx, int mode, u64 data) +{ + struct hfi1_devdata *dd = (struct hfi1_devdata *)context; + + if (dd->per_sdma && idx < dd->num_sdma) + return dd->per_sdma[idx].err_cnt; + return 0; +} + +static u64 access_sde_int_cnt(const struct cntr_entry *entry, + void *context, int idx, int mode, u64 data) +{ + struct hfi1_devdata *dd = (struct hfi1_devdata *)context; + + if (dd->per_sdma && idx < dd->num_sdma) + return dd->per_sdma[idx].sdma_int_cnt; + return 0; +} + +static u64 access_sde_idle_int_cnt(const struct cntr_entry *entry, + void *context, int idx, int mode, u64 data) +{ + struct hfi1_devdata *dd = (struct hfi1_devdata *)context; + + if (dd->per_sdma && idx < dd->num_sdma) + return dd->per_sdma[idx].idle_int_cnt; + return 0; +} + +static u64 access_sde_progress_int_cnt(const struct cntr_entry *entry, + void *context, int idx, int mode, + u64 data) +{ + struct hfi1_devdata *dd = (struct hfi1_devdata *)context; + + if (dd->per_sdma && idx < dd->num_sdma) + return dd->per_sdma[idx].progress_int_cnt; + return 0; } static u64 dev_access_u64_csr(const struct cntr_entry *entry, void *context, - int vl, int mode, u64 data) + int vl, int mode, u64 data) { struct hfi1_devdata *dd = context; @@ -1322,7 +1371,7 @@ static u64 dev_access_u64_csr(const struct cntr_entry *entry, void *context, } static u64 dc_access_lcb_cntr(const struct cntr_entry *entry, void *context, - int vl, int mode, u64 data) + int vl, int mode, u64 data) { struct hfi1_devdata *dd = context; u32 csr = entry->csr; @@ -1346,7 +1395,7 @@ static u64 dc_access_lcb_cntr(const struct cntr_entry *entry, void *context, /* Port Access */ static u64 port_access_u32_csr(const struct cntr_entry *entry, void *context, - int vl, int mode, u64 data) + int vl, int mode, u64 data) { struct hfi1_pportdata *ppd = context; @@ -1356,7 +1405,7 @@ static u64 port_access_u32_csr(const struct cntr_entry *entry, void *context, } static u64 port_access_u64_csr(const struct cntr_entry *entry, - void *context, int vl, int mode, u64 data) + void *context, int vl, int mode, u64 data) { struct hfi1_pportdata *ppd = context; u64 val; @@ -1396,7 +1445,7 @@ static inline u64 read_write_sw(struct hfi1_devdata *dd, u64 *cntr, int mode, } static u64 access_sw_link_dn_cnt(const struct cntr_entry *entry, void *context, - int vl, int mode, u64 data) + int vl, int mode, u64 data) { struct hfi1_pportdata *ppd = context; @@ -1406,7 +1455,7 @@ static u64 access_sw_link_dn_cnt(const struct cntr_entry *entry, void *context, } static u64 access_sw_link_up_cnt(const struct cntr_entry *entry, void *context, - int vl, int mode, u64 data) + int vl, int mode, u64 data) { struct hfi1_pportdata *ppd = context; @@ -1427,18 +1476,25 @@ static u64 access_sw_unknown_frame_cnt(const struct cntr_entry *entry, } static u64 access_sw_xmit_discards(const struct cntr_entry *entry, - void *context, int vl, int mode, u64 data) + void *context, int vl, int mode, u64 data) { - struct hfi1_pportdata *ppd = context; + struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context; + u64 zero = 0; + u64 *counter; - if (vl != CNTR_INVALID_VL) - return 0; + if (vl == CNTR_INVALID_VL) + counter = &ppd->port_xmit_discards; + else if (vl >= 0 && vl < C_VL_COUNT) + counter = &ppd->port_xmit_discards_vl[vl]; + else + counter = &zero; - return read_write_sw(ppd->dd, &ppd->port_xmit_discards, mode, data); + return read_write_sw(ppd->dd, counter, mode, data); } static u64 access_xmit_constraint_errs(const struct cntr_entry *entry, - void *context, int vl, int mode, u64 data) + void *context, int vl, int mode, + u64 data) { struct hfi1_pportdata *ppd = context; @@ -1450,7 +1506,7 @@ static u64 access_xmit_constraint_errs(const struct cntr_entry *entry, } static u64 access_rcv_constraint_errs(const struct cntr_entry *entry, - void *context, int vl, int mode, u64 data) + void *context, int vl, int mode, u64 data) { struct hfi1_pportdata *ppd = context; @@ -1475,7 +1531,6 @@ static u64 read_write_cpu(struct hfi1_devdata *dd, u64 *z_val, u64 __percpu *cntr, int vl, int mode, u64 data) { - u64 ret = 0; if (vl != CNTR_INVALID_VL) @@ -1507,7 +1562,7 @@ static u64 access_sw_cpu_intr(const struct cntr_entry *entry, } static u64 access_sw_cpu_rcv_limit(const struct cntr_entry *entry, - void *context, int vl, int mode, u64 data) + void *context, int vl, int mode, u64 data) { struct hfi1_devdata *dd = context; @@ -1523,6 +1578,14 @@ static u64 access_sw_pio_wait(const struct cntr_entry *entry, return dd->verbs_dev.n_piowait; } +static u64 access_sw_pio_drain(const struct cntr_entry *entry, + void *context, int vl, int mode, u64 data) +{ + struct hfi1_devdata *dd = (struct hfi1_devdata *)context; + + return dd->verbs_dev.n_piodrain; +} + static u64 access_sw_vtx_wait(const struct cntr_entry *entry, void *context, int vl, int mode, u64 data) { @@ -1540,11 +1603,12 @@ static u64 access_sw_kmem_wait(const struct cntr_entry *entry, } static u64 access_sw_send_schedule(const struct cntr_entry *entry, - void *context, int vl, int mode, u64 data) + void *context, int vl, int mode, u64 data) { struct hfi1_devdata *dd = (struct hfi1_devdata *)context; - return dd->verbs_dev.n_send_schedule; + return read_write_cpu(dd, &dd->z_send_schedule, dd->send_schedule, vl, + mode, data); } /* Software counters for the error status bits within MISC_ERR_STATUS */ @@ -3882,8 +3946,8 @@ static u64 access_sw_cpu_##cntr(const struct cntr_entry *entry, \ void *context, int vl, int mode, u64 data) \ { \ struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context; \ - return read_write_cpu(ppd->dd, &ppd->ibport_data.z_ ##cntr, \ - ppd->ibport_data.cntr, vl, \ + return read_write_cpu(ppd->dd, &ppd->ibport_data.rvp.z_ ##cntr, \ + ppd->ibport_data.rvp.cntr, vl, \ mode, data); \ } @@ -3900,7 +3964,7 @@ static u64 access_ibp_##cntr(const struct cntr_entry *entry, \ if (vl != CNTR_INVALID_VL) \ return 0; \ \ - return read_write_sw(ppd->dd, &ppd->ibport_data.n_ ##cntr, \ + return read_write_sw(ppd->dd, &ppd->ibport_data.rvp.n_ ##cntr, \ mode, data); \ } @@ -4063,10 +4127,28 @@ static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = { access_sw_vtx_wait), [C_SW_PIO_WAIT] = CNTR_ELEM("PioWait", 0, 0, CNTR_NORMAL, access_sw_pio_wait), +[C_SW_PIO_DRAIN] = CNTR_ELEM("PioDrain", 0, 0, CNTR_NORMAL, + access_sw_pio_drain), [C_SW_KMEM_WAIT] = CNTR_ELEM("KmemWait", 0, 0, CNTR_NORMAL, access_sw_kmem_wait), [C_SW_SEND_SCHED] = CNTR_ELEM("SendSched", 0, 0, CNTR_NORMAL, access_sw_send_schedule), +[C_SDMA_DESC_FETCHED_CNT] = CNTR_ELEM("SDEDscFdCn", + SEND_DMA_DESC_FETCHED_CNT, 0, + CNTR_NORMAL | CNTR_32BIT | CNTR_SDMA, + dev_access_u32_csr), +[C_SDMA_INT_CNT] = CNTR_ELEM("SDMAInt", 0, 0, + CNTR_NORMAL | CNTR_32BIT | CNTR_SDMA, + access_sde_int_cnt), +[C_SDMA_ERR_CNT] = CNTR_ELEM("SDMAErrCt", 0, 0, + CNTR_NORMAL | CNTR_32BIT | CNTR_SDMA, + access_sde_err_cnt), +[C_SDMA_IDLE_INT_CNT] = CNTR_ELEM("SDMAIdInt", 0, 0, + CNTR_NORMAL | CNTR_32BIT | CNTR_SDMA, + access_sde_idle_int_cnt), +[C_SDMA_PROGRESS_INT_CNT] = CNTR_ELEM("SDMAPrIntCn", 0, 0, + CNTR_NORMAL | CNTR_32BIT | CNTR_SDMA, + access_sde_progress_int_cnt), /* MISC_ERR_STATUS */ [C_MISC_PLL_LOCK_FAIL_ERR] = CNTR_ELEM("MISC_PLL_LOCK_FAIL_ERR", 0, 0, CNTR_NORMAL, @@ -4876,28 +4958,28 @@ static struct cntr_entry port_cntrs[PORT_CNTR_LAST] = { [C_TX_WORDS] = TXE64_PORT_CNTR_ELEM(TxWords, SEND_DWORD_CNT, CNTR_NORMAL), [C_TX_WAIT] = TXE64_PORT_CNTR_ELEM(TxWait, SEND_WAIT_CNT, CNTR_SYNTH), [C_TX_FLIT_VL] = TXE64_PORT_CNTR_ELEM(TxFlitVL, SEND_DATA_VL0_CNT, - CNTR_SYNTH | CNTR_VL), + CNTR_SYNTH | CNTR_VL), [C_TX_PKT_VL] = TXE64_PORT_CNTR_ELEM(TxPktVL, SEND_DATA_PKT_VL0_CNT, - CNTR_SYNTH | CNTR_VL), + CNTR_SYNTH | CNTR_VL), [C_TX_WAIT_VL] = TXE64_PORT_CNTR_ELEM(TxWaitVL, SEND_WAIT_VL0_CNT, - CNTR_SYNTH | CNTR_VL), + CNTR_SYNTH | CNTR_VL), [C_RX_PKT] = RXE64_PORT_CNTR_ELEM(RxPkt, RCV_DATA_PKT_CNT, CNTR_NORMAL), [C_RX_WORDS] = RXE64_PORT_CNTR_ELEM(RxWords, RCV_DWORD_CNT, CNTR_NORMAL), [C_SW_LINK_DOWN] = CNTR_ELEM("SwLinkDown", 0, 0, CNTR_SYNTH | CNTR_32BIT, - access_sw_link_dn_cnt), + access_sw_link_dn_cnt), [C_SW_LINK_UP] = CNTR_ELEM("SwLinkUp", 0, 0, CNTR_SYNTH | CNTR_32BIT, - access_sw_link_up_cnt), + access_sw_link_up_cnt), [C_SW_UNKNOWN_FRAME] = CNTR_ELEM("UnknownFrame", 0, 0, CNTR_NORMAL, access_sw_unknown_frame_cnt), [C_SW_XMIT_DSCD] = CNTR_ELEM("XmitDscd", 0, 0, CNTR_SYNTH | CNTR_32BIT, - access_sw_xmit_discards), + access_sw_xmit_discards), [C_SW_XMIT_DSCD_VL] = CNTR_ELEM("XmitDscdVl", 0, 0, - CNTR_SYNTH | CNTR_32BIT | CNTR_VL, - access_sw_xmit_discards), + CNTR_SYNTH | CNTR_32BIT | CNTR_VL, + access_sw_xmit_discards), [C_SW_XMIT_CSTR_ERR] = CNTR_ELEM("XmitCstrErr", 0, 0, CNTR_SYNTH, - access_xmit_constraint_errs), + access_xmit_constraint_errs), [C_SW_RCV_CSTR_ERR] = CNTR_ELEM("RcvCstrErr", 0, 0, CNTR_SYNTH, - access_rcv_constraint_errs), + access_rcv_constraint_errs), [C_SW_IBP_LOOP_PKTS] = SW_IBP_CNTR(LoopPkts, loop_pkts), [C_SW_IBP_RC_RESENDS] = SW_IBP_CNTR(RcResend, rc_resends), [C_SW_IBP_RNR_NAKS] = SW_IBP_CNTR(RnrNak, rnr_naks), @@ -4913,9 +4995,9 @@ static struct cntr_entry port_cntrs[PORT_CNTR_LAST] = { [C_SW_CPU_RC_ACKS] = CNTR_ELEM("RcAcks", 0, 0, CNTR_NORMAL, access_sw_cpu_rc_acks), [C_SW_CPU_RC_QACKS] = CNTR_ELEM("RcQacks", 0, 0, CNTR_NORMAL, - access_sw_cpu_rc_qacks), + access_sw_cpu_rc_qacks), [C_SW_CPU_RC_DELAYED_COMP] = CNTR_ELEM("RcDelayComp", 0, 0, CNTR_NORMAL, - access_sw_cpu_rc_delayed_comp), + access_sw_cpu_rc_delayed_comp), [OVR_LBL(0)] = OVR_ELM(0), [OVR_LBL(1)] = OVR_ELM(1), [OVR_LBL(2)] = OVR_ELM(2), [OVR_LBL(3)] = OVR_ELM(3), [OVR_LBL(4)] = OVR_ELM(4), [OVR_LBL(5)] = OVR_ELM(5), @@ -5064,7 +5146,7 @@ done: * the buffer. End in '*' if the buffer is too short. */ static char *flag_string(char *buf, int buf_len, u64 flags, - struct flag_table *table, int table_size) + struct flag_table *table, int table_size) { char extra[32]; char *p = buf; @@ -5125,10 +5207,8 @@ static char *is_misc_err_name(char *buf, size_t bsize, unsigned int source) if (source < ARRAY_SIZE(cce_misc_names)) strncpy(buf, cce_misc_names[source], bsize); else - snprintf(buf, - bsize, - "Reserved%u", - source + IS_GENERAL_ERR_START); + snprintf(buf, bsize, "Reserved%u", + source + IS_GENERAL_ERR_START); return buf; } @@ -5167,7 +5247,7 @@ static char *is_various_name(char *buf, size_t bsize, unsigned int source) if (source < ARRAY_SIZE(various_names)) strncpy(buf, various_names[source], bsize); else - snprintf(buf, bsize, "Reserved%u", source+IS_VARIOUS_START); + snprintf(buf, bsize, "Reserved%u", source + IS_VARIOUS_START); return buf; } @@ -5252,51 +5332,56 @@ static char *is_reserved_name(char *buf, size_t bsize, unsigned int source) static char *cce_err_status_string(char *buf, int buf_len, u64 flags) { return flag_string(buf, buf_len, flags, - cce_err_status_flags, ARRAY_SIZE(cce_err_status_flags)); + cce_err_status_flags, + ARRAY_SIZE(cce_err_status_flags)); } static char *rxe_err_status_string(char *buf, int buf_len, u64 flags) { return flag_string(buf, buf_len, flags, - rxe_err_status_flags, ARRAY_SIZE(rxe_err_status_flags)); + rxe_err_status_flags, + ARRAY_SIZE(rxe_err_status_flags)); } static char *misc_err_status_string(char *buf, int buf_len, u64 flags) { return flag_string(buf, buf_len, flags, misc_err_status_flags, - ARRAY_SIZE(misc_err_status_flags)); + ARRAY_SIZE(misc_err_status_flags)); } static char *pio_err_status_string(char *buf, int buf_len, u64 flags) { return flag_string(buf, buf_len, flags, - pio_err_status_flags, ARRAY_SIZE(pio_err_status_flags)); + pio_err_status_flags, + ARRAY_SIZE(pio_err_status_flags)); } static char *sdma_err_status_string(char *buf, int buf_len, u64 flags) { return flag_string(buf, buf_len, flags, - sdma_err_status_flags, - ARRAY_SIZE(sdma_err_status_flags)); + sdma_err_status_flags, + ARRAY_SIZE(sdma_err_status_flags)); } static char *egress_err_status_string(char *buf, int buf_len, u64 flags) { return flag_string(buf, buf_len, flags, - egress_err_status_flags, ARRAY_SIZE(egress_err_status_flags)); + egress_err_status_flags, + ARRAY_SIZE(egress_err_status_flags)); } static char *egress_err_info_string(char *buf, int buf_len, u64 flags) { return flag_string(buf, buf_len, flags, - egress_err_info_flags, ARRAY_SIZE(egress_err_info_flags)); + egress_err_info_flags, + ARRAY_SIZE(egress_err_info_flags)); } static char *send_err_status_string(char *buf, int buf_len, u64 flags) { return flag_string(buf, buf_len, flags, - send_err_status_flags, - ARRAY_SIZE(send_err_status_flags)); + send_err_status_flags, + ARRAY_SIZE(send_err_status_flags)); } static void handle_cce_err(struct hfi1_devdata *dd, u32 unused, u64 reg) @@ -5309,7 +5394,7 @@ static void handle_cce_err(struct hfi1_devdata *dd, u32 unused, u64 reg) * report or record it. */ dd_dev_info(dd, "CCE Error: %s\n", - cce_err_status_string(buf, sizeof(buf), reg)); + cce_err_status_string(buf, sizeof(buf), reg)); if ((reg & CCE_ERR_STATUS_CCE_CLI2_ASYNC_FIFO_PARITY_ERR_SMASK) && is_ax(dd) && (dd->icode != ICODE_FUNCTIONAL_SIMULATOR)) { @@ -5339,14 +5424,14 @@ static void update_rcverr_timer(unsigned long opaque) u32 cur_ovfl_cnt = read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL); if (dd->rcv_ovfl_cnt < cur_ovfl_cnt && - ppd->port_error_action & OPA_PI_MASK_EX_BUFFER_OVERRUN) { + ppd->port_error_action & OPA_PI_MASK_EX_BUFFER_OVERRUN) { dd_dev_info(dd, "%s: PortErrorAction bounce\n", __func__); - set_link_down_reason(ppd, - OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN, 0, - OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN); + set_link_down_reason( + ppd, OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN, 0, + OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN); queue_work(ppd->hfi1_wq, &ppd->link_bounce_work); } - dd->rcv_ovfl_cnt = (u32) cur_ovfl_cnt; + dd->rcv_ovfl_cnt = (u32)cur_ovfl_cnt; mod_timer(&dd->rcverr_timer, jiffies + HZ * RCVERR_CHECK_TIME); } @@ -5372,7 +5457,7 @@ static void handle_rxe_err(struct hfi1_devdata *dd, u32 unused, u64 reg) int i = 0; dd_dev_info(dd, "Receive Error: %s\n", - rxe_err_status_string(buf, sizeof(buf), reg)); + rxe_err_status_string(buf, sizeof(buf), reg)); if (reg & ALL_RXE_FREEZE_ERR) { int flags = 0; @@ -5399,7 +5484,7 @@ static void handle_misc_err(struct hfi1_devdata *dd, u32 unused, u64 reg) int i = 0; dd_dev_info(dd, "Misc Error: %s", - misc_err_status_string(buf, sizeof(buf), reg)); + misc_err_status_string(buf, sizeof(buf), reg)); for (i = 0; i < NUM_MISC_ERR_STATUS_COUNTERS; i++) { if (reg & (1ull << i)) incr_cntr64(&dd->misc_err_status_cnt[i]); @@ -5412,7 +5497,7 @@ static void handle_pio_err(struct hfi1_devdata *dd, u32 unused, u64 reg) int i = 0; dd_dev_info(dd, "PIO Error: %s\n", - pio_err_status_string(buf, sizeof(buf), reg)); + pio_err_status_string(buf, sizeof(buf), reg)); if (reg & ALL_PIO_FREEZE_ERR) start_freeze_handling(dd->pport, 0); @@ -5429,7 +5514,7 @@ static void handle_sdma_err(struct hfi1_devdata *dd, u32 unused, u64 reg) int i = 0; dd_dev_info(dd, "SDMA Error: %s\n", - sdma_err_status_string(buf, sizeof(buf), reg)); + sdma_err_status_string(buf, sizeof(buf), reg)); if (reg & ALL_SDMA_FREEZE_ERR) start_freeze_handling(dd->pport, 0); @@ -5440,12 +5525,14 @@ static void handle_sdma_err(struct hfi1_devdata *dd, u32 unused, u64 reg) } } -static void count_port_inactive(struct hfi1_devdata *dd) +static inline void __count_port_discards(struct hfi1_pportdata *ppd) { - struct hfi1_pportdata *ppd = dd->pport; + incr_cntr64(&ppd->port_xmit_discards); +} - if (ppd->port_xmit_discards < ~(u64)0) - ppd->port_xmit_discards++; +static void count_port_inactive(struct hfi1_devdata *dd) +{ + __count_port_discards(dd->pport); } /* @@ -5457,7 +5544,8 @@ static void count_port_inactive(struct hfi1_devdata *dd) * egress error if more than one packet fails the same integrity check * since we cleared the corresponding bit in SEND_EGRESS_ERR_INFO. */ -static void handle_send_egress_err_info(struct hfi1_devdata *dd) +static void handle_send_egress_err_info(struct hfi1_devdata *dd, + int vl) { struct hfi1_pportdata *ppd = dd->pport; u64 src = read_csr(dd, SEND_EGRESS_ERR_SOURCE); /* read first */ @@ -5468,14 +5556,44 @@ static void handle_send_egress_err_info(struct hfi1_devdata *dd) write_csr(dd, SEND_EGRESS_ERR_INFO, info); dd_dev_info(dd, - "Egress Error Info: 0x%llx, %s Egress Error Src 0x%llx\n", - info, egress_err_info_string(buf, sizeof(buf), info), src); + "Egress Error Info: 0x%llx, %s Egress Error Src 0x%llx\n", + info, egress_err_info_string(buf, sizeof(buf), info), src); /* Eventually add other counters for each bit */ + if (info & PORT_DISCARD_EGRESS_ERRS) { + int weight, i; - if (info & SEND_EGRESS_ERR_INFO_TOO_LONG_IB_PACKET_ERR_SMASK) { - if (ppd->port_xmit_discards < ~(u64)0) - ppd->port_xmit_discards++; + /* + * Count all applicable bits as individual errors and + * attribute them to the packet that triggered this handler. + * This may not be completely accurate due to limitations + * on the available hardware error information. There is + * a single information register and any number of error + * packets may have occurred and contributed to it before + * this routine is called. This means that: + * a) If multiple packets with the same error occur before + * this routine is called, earlier packets are missed. + * There is only a single bit for each error type. + * b) Errors may not be attributed to the correct VL. + * The driver is attributing all bits in the info register + * to the packet that triggered this call, but bits + * could be an accumulation of different packets with + * different VLs. + * c) A single error packet may have multiple counts attached + * to it. There is no way for the driver to know if + * multiple bits set in the info register are due to a + * single packet or multiple packets. The driver assumes + * multiple packets. + */ + weight = hweight64(info & PORT_DISCARD_EGRESS_ERRS); + for (i = 0; i < weight; i++) { + __count_port_discards(ppd); + if (vl >= 0 && vl < TXE_NUM_DATA_VL) + incr_cntr64(&ppd->port_xmit_discards_vl[vl]); + else if (vl == 15) + incr_cntr64(&ppd->port_xmit_discards_vl + [C_VL_15]); + } } } @@ -5493,12 +5611,71 @@ static inline int port_inactive_err(u64 posn) * Input value is a bit position within the SEND_EGRESS_ERR_STATUS * register. Does it represent a 'disallowed packet' error? */ -static inline int disallowed_pkt_err(u64 posn) +static inline int disallowed_pkt_err(int posn) { return (posn >= SEES(TX_SDMA0_DISALLOWED_PACKET) && posn <= SEES(TX_SDMA15_DISALLOWED_PACKET)); } +/* + * Input value is a bit position of one of the SDMA engine disallowed + * packet errors. Return which engine. Use of this must be guarded by + * disallowed_pkt_err(). + */ +static inline int disallowed_pkt_engine(int posn) +{ + return posn - SEES(TX_SDMA0_DISALLOWED_PACKET); +} + +/* + * Translate an SDMA engine to a VL. Return -1 if the tranlation cannot + * be done. + */ +static int engine_to_vl(struct hfi1_devdata *dd, int engine) +{ + struct sdma_vl_map *m; + int vl; + + /* range check */ + if (engine < 0 || engine >= TXE_NUM_SDMA_ENGINES) + return -1; + + rcu_read_lock(); + m = rcu_dereference(dd->sdma_map); + vl = m->engine_to_vl[engine]; + rcu_read_unlock(); + + return vl; +} + +/* + * Translate the send context (sofware index) into a VL. Return -1 if the + * translation cannot be done. + */ +static int sc_to_vl(struct hfi1_devdata *dd, int sw_index) +{ + struct send_context_info *sci; + struct send_context *sc; + int i; + + sci = &dd->send_contexts[sw_index]; + + /* there is no information for user (PSM) and ack contexts */ + if (sci->type != SC_KERNEL) + return -1; + + sc = sci->sc; + if (!sc) + return -1; + if (dd->vld[15].sc == sc) + return 15; + for (i = 0; i < num_vls; i++) + if (dd->vld[i].sc == sc) + return i; + + return -1; +} + static void handle_egress_err(struct hfi1_devdata *dd, u32 unused, u64 reg) { u64 reg_copy = reg, handled = 0; @@ -5507,34 +5684,34 @@ static void handle_egress_err(struct hfi1_devdata *dd, u32 unused, u64 reg) if (reg & ALL_TXE_EGRESS_FREEZE_ERR) start_freeze_handling(dd->pport, 0); - if (is_ax(dd) && (reg & - SEND_EGRESS_ERR_STATUS_TX_CREDIT_RETURN_VL_ERR_SMASK) - && (dd->icode != ICODE_FUNCTIONAL_SIMULATOR)) + else if (is_ax(dd) && + (reg & SEND_EGRESS_ERR_STATUS_TX_CREDIT_RETURN_VL_ERR_SMASK) && + (dd->icode != ICODE_FUNCTIONAL_SIMULATOR)) start_freeze_handling(dd->pport, 0); while (reg_copy) { int posn = fls64(reg_copy); - /* - * fls64() returns a 1-based offset, but we generally - * want 0-based offsets. - */ + /* fls64() returns a 1-based offset, we want it zero based */ int shift = posn - 1; + u64 mask = 1ULL << shift; if (port_inactive_err(shift)) { count_port_inactive(dd); - handled |= (1ULL << shift); + handled |= mask; } else if (disallowed_pkt_err(shift)) { - handle_send_egress_err_info(dd); - handled |= (1ULL << shift); + int vl = engine_to_vl(dd, disallowed_pkt_engine(shift)); + + handle_send_egress_err_info(dd, vl); + handled |= mask; } - clear_bit(shift, (unsigned long *)®_copy); + reg_copy &= ~mask; } reg &= ~handled; if (reg) dd_dev_info(dd, "Egress Error: %s\n", - egress_err_status_string(buf, sizeof(buf), reg)); + egress_err_status_string(buf, sizeof(buf), reg)); for (i = 0; i < NUM_SEND_EGRESS_ERR_STATUS_COUNTERS; i++) { if (reg & (1ull << i)) @@ -5548,7 +5725,7 @@ static void handle_txe_err(struct hfi1_devdata *dd, u32 unused, u64 reg) int i = 0; dd_dev_info(dd, "Send Error: %s\n", - send_err_status_string(buf, sizeof(buf), reg)); + send_err_status_string(buf, sizeof(buf), reg)); for (i = 0; i < NUM_SEND_ERR_STATUS_COUNTERS; i++) { if (reg & (1ull << i)) @@ -5594,7 +5771,7 @@ static void interrupt_clear_down(struct hfi1_devdata *dd, u64 mask; dd_dev_err(dd, "Repeating %s bits 0x%llx - masking\n", - eri->desc, reg); + eri->desc, reg); /* * Read-modify-write so any other masked bits * remain masked. @@ -5618,14 +5795,15 @@ static void is_misc_err_int(struct hfi1_devdata *dd, unsigned int source) interrupt_clear_down(dd, 0, eri); } else { dd_dev_err(dd, "Unexpected misc interrupt (%u) - reserved\n", - source); + source); } } static char *send_context_err_status_string(char *buf, int buf_len, u64 flags) { return flag_string(buf, buf_len, flags, - sc_err_status_flags, ARRAY_SIZE(sc_err_status_flags)); + sc_err_status_flags, + ARRAY_SIZE(sc_err_status_flags)); } /* @@ -5650,15 +5828,15 @@ static void is_sendctxt_err_int(struct hfi1_devdata *dd, sw_index = dd->hw_to_sw[hw_context]; if (sw_index >= dd->num_send_contexts) { dd_dev_err(dd, - "out of range sw index %u for send context %u\n", - sw_index, hw_context); + "out of range sw index %u for send context %u\n", + sw_index, hw_context); return; } sci = &dd->send_contexts[sw_index]; sc = sci->sc; if (!sc) { dd_dev_err(dd, "%s: context %u(%u): no sc?\n", __func__, - sw_index, hw_context); + sw_index, hw_context); return; } @@ -5668,10 +5846,11 @@ static void is_sendctxt_err_int(struct hfi1_devdata *dd, status = read_kctxt_csr(dd, hw_context, SEND_CTXT_ERR_STATUS); dd_dev_info(dd, "Send Context %u(%u) Error: %s\n", sw_index, hw_context, - send_context_err_status_string(flags, sizeof(flags), status)); + send_context_err_status_string(flags, sizeof(flags), + status)); if (status & SEND_CTXT_ERR_STATUS_PIO_DISALLOWED_PACKET_ERR_SMASK) - handle_send_egress_err_info(dd); + handle_send_egress_err_info(dd, sc_to_vl(dd, sw_index)); /* * Automatically restart halted kernel contexts out of interrupt @@ -5704,6 +5883,7 @@ static void handle_sdma_eng_err(struct hfi1_devdata *dd, dd_dev_err(sde->dd, "CONFIG SDMA(%u) source: %u status 0x%llx\n", sde->this_idx, source, (unsigned long long)status); #endif + sde->err_cnt++; sdma_engine_error(sde, status); /* @@ -5752,23 +5932,22 @@ static void is_various_int(struct hfi1_devdata *dd, unsigned int source) interrupt_clear_down(dd, 0, eri); else dd_dev_info(dd, - "%s: Unimplemented/reserved interrupt %d\n", - __func__, source); + "%s: Unimplemented/reserved interrupt %d\n", + __func__, source); } static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg) { - /* source is always zero */ + /* src_ctx is always zero */ struct hfi1_pportdata *ppd = dd->pport; unsigned long flags; u64 qsfp_int_mgmt = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N); if (reg & QSFP_HFI0_MODPRST_N) { - - dd_dev_info(dd, "%s: ModPresent triggered QSFP interrupt\n", - __func__); - if (!qsfp_mod_present(ppd)) { + dd_dev_info(dd, "%s: QSFP module removed\n", + __func__); + ppd->driver_link_ready = 0; /* * Cable removed, reset all our information about the @@ -5781,14 +5960,23 @@ static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg) * an interrupt when a cable is inserted */ ppd->qsfp_info.cache_valid = 0; - ppd->qsfp_info.qsfp_interrupt_functional = 0; + ppd->qsfp_info.reset_needed = 0; + ppd->qsfp_info.limiting_active = 0; spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock, - flags); - write_csr(dd, - dd->hfi1_id ? - ASIC_QSFP2_INVERT : - ASIC_QSFP1_INVERT, - qsfp_int_mgmt); + flags); + /* Invert the ModPresent pin now to detect plug-in */ + write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_INVERT : + ASIC_QSFP1_INVERT, qsfp_int_mgmt); + + if ((ppd->offline_disabled_reason > + HFI1_ODR_MASK( + OPA_LINKDOWN_REASON_LOCAL_MEDIA_NOT_INSTALLED)) || + (ppd->offline_disabled_reason == + HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE))) + ppd->offline_disabled_reason = + HFI1_ODR_MASK( + OPA_LINKDOWN_REASON_LOCAL_MEDIA_NOT_INSTALLED); + if (ppd->host_link_state == HLS_DN_POLL) { /* * The link is still in POLL. This means @@ -5799,28 +5987,33 @@ static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg) queue_work(ppd->hfi1_wq, &ppd->link_down_work); } } else { + dd_dev_info(dd, "%s: QSFP module inserted\n", + __func__); + spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags); ppd->qsfp_info.cache_valid = 0; ppd->qsfp_info.cache_refresh_required = 1; spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock, - flags); + flags); + /* + * Stop inversion of ModPresent pin to detect + * removal of the cable + */ qsfp_int_mgmt &= ~(u64)QSFP_HFI0_MODPRST_N; - write_csr(dd, - dd->hfi1_id ? - ASIC_QSFP2_INVERT : - ASIC_QSFP1_INVERT, - qsfp_int_mgmt); + write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_INVERT : + ASIC_QSFP1_INVERT, qsfp_int_mgmt); + + ppd->offline_disabled_reason = + HFI1_ODR_MASK(OPA_LINKDOWN_REASON_TRANSIENT); } } if (reg & QSFP_HFI0_INT_N) { - - dd_dev_info(dd, "%s: IntN triggered QSFP interrupt\n", - __func__); + dd_dev_info(dd, "%s: Interrupt received from QSFP module\n", + __func__); spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags); ppd->qsfp_info.check_interrupt_flags = 1; - ppd->qsfp_info.qsfp_interrupt_functional = 1; spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock, flags); } @@ -5834,11 +6027,11 @@ static int request_host_lcb_access(struct hfi1_devdata *dd) int ret; ret = do_8051_command(dd, HCMD_MISC, - (u64)HCMD_MISC_REQUEST_LCB_ACCESS << LOAD_DATA_FIELD_ID_SHIFT, - NULL); + (u64)HCMD_MISC_REQUEST_LCB_ACCESS << + LOAD_DATA_FIELD_ID_SHIFT, NULL); if (ret != HCMD_SUCCESS) { dd_dev_err(dd, "%s: command failed with error %d\n", - __func__, ret); + __func__, ret); } return ret == HCMD_SUCCESS ? 0 : -EBUSY; } @@ -5848,11 +6041,11 @@ static int request_8051_lcb_access(struct hfi1_devdata *dd) int ret; ret = do_8051_command(dd, HCMD_MISC, - (u64)HCMD_MISC_GRANT_LCB_ACCESS << LOAD_DATA_FIELD_ID_SHIFT, - NULL); + (u64)HCMD_MISC_GRANT_LCB_ACCESS << + LOAD_DATA_FIELD_ID_SHIFT, NULL); if (ret != HCMD_SUCCESS) { dd_dev_err(dd, "%s: command failed with error %d\n", - __func__, ret); + __func__, ret); } return ret == HCMD_SUCCESS ? 0 : -EBUSY; } @@ -5864,8 +6057,8 @@ static int request_8051_lcb_access(struct hfi1_devdata *dd) static inline void set_host_lcb_access(struct hfi1_devdata *dd) { write_csr(dd, DC_DC8051_CFG_CSR_ACCESS_SEL, - DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK - | DC_DC8051_CFG_CSR_ACCESS_SEL_LCB_SMASK); + DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK | + DC_DC8051_CFG_CSR_ACCESS_SEL_LCB_SMASK); } /* @@ -5875,7 +6068,7 @@ static inline void set_host_lcb_access(struct hfi1_devdata *dd) static inline void set_8051_lcb_access(struct hfi1_devdata *dd) { write_csr(dd, DC_DC8051_CFG_CSR_ACCESS_SEL, - DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK); + DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK); } /* @@ -5909,7 +6102,7 @@ int acquire_lcb_access(struct hfi1_devdata *dd, int sleep_ok) /* this access is valid only when the link is up */ if ((ppd->host_link_state & HLS_UP) == 0) { dd_dev_info(dd, "%s: link state %s not up\n", - __func__, link_state_name(ppd->host_link_state)); + __func__, link_state_name(ppd->host_link_state)); ret = -EBUSY; goto done; } @@ -5918,8 +6111,8 @@ int acquire_lcb_access(struct hfi1_devdata *dd, int sleep_ok) ret = request_host_lcb_access(dd); if (ret) { dd_dev_err(dd, - "%s: unable to acquire LCB access, err %d\n", - __func__, |