From 359e10f087dbb7b9c9f3035a8cc4391af45bd651 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 21 Sep 2019 20:58:47 -0700 Subject: scsi: lpfc: Fix pt2pt discovery on SLI3 HBAs After exchanging PLOGI on an SLI-3 adapter, the PRLI exchange failed. Link trace showed the port was assigned a non-zero n_port_id, but didn't use the address on the PRLI. The assigned address is set on the port by the CONFIG_LINK mailbox command. The driver responded to the PRLI before the mailbox command completed. Thus the PRLI response used the old n_port_id. Defer the PRLI response until CONFIG_LINK completes. Link: https://lore.kernel.org/r/20190922035906.10977-2-jsmart2021@gmail.com Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_nportdisc.c | 141 ++++++++++++++++++++++++++++++------- 1 file changed, 115 insertions(+), 26 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c index f4b879d25fe9..4e96d28cba39 100644 --- a/drivers/scsi/lpfc/lpfc_nportdisc.c +++ b/drivers/scsi/lpfc/lpfc_nportdisc.c @@ -279,6 +279,55 @@ lpfc_els_abort(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp) lpfc_cancel_retry_delay_tmo(phba->pport, ndlp); } +/* lpfc_defer_pt2pt_acc - Complete SLI3 pt2pt processing on link up + * @phba: pointer to lpfc hba data structure. + * @link_mbox: pointer to CONFIG_LINK mailbox object + * + * This routine is only called if we are SLI3, direct connect pt2pt + * mode and the remote NPort issues the PLOGI after link up. + */ +void +lpfc_defer_pt2pt_acc(struct lpfc_hba *phba, LPFC_MBOXQ_t *link_mbox) +{ + LPFC_MBOXQ_t *login_mbox; + MAILBOX_t *mb = &link_mbox->u.mb; + struct lpfc_iocbq *save_iocb; + struct lpfc_nodelist *ndlp; + int rc; + + ndlp = link_mbox->ctx_ndlp; + login_mbox = link_mbox->context3; + save_iocb = login_mbox->context3; + link_mbox->context3 = NULL; + login_mbox->context3 = NULL; + + /* Check for CONFIG_LINK error */ + if (mb->mbxStatus) { + lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, + "4575 CONFIG_LINK fails pt2pt discovery: %x\n", + mb->mbxStatus); + mempool_free(login_mbox, phba->mbox_mem_pool); + mempool_free(link_mbox, phba->mbox_mem_pool); + lpfc_sli_release_iocbq(phba, save_iocb); + return; + } + + /* Now that CONFIG_LINK completed, and our SID is configured, + * we can now proceed with sending the PLOGI ACC. + */ + rc = lpfc_els_rsp_acc(link_mbox->vport, ELS_CMD_PLOGI, + save_iocb, ndlp, login_mbox); + if (rc) { + lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, + "4576 PLOGI ACC fails pt2pt discovery: %x\n", + rc); + mempool_free(login_mbox, phba->mbox_mem_pool); + } + + mempool_free(link_mbox, phba->mbox_mem_pool); + lpfc_sli_release_iocbq(phba, save_iocb); +} + static int lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, struct lpfc_iocbq *cmdiocb) @@ -291,10 +340,12 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, IOCB_t *icmd; struct serv_parm *sp; uint32_t ed_tov; - LPFC_MBOXQ_t *mbox; + LPFC_MBOXQ_t *link_mbox; + LPFC_MBOXQ_t *login_mbox; + struct lpfc_iocbq *save_iocb; struct ls_rjt stat; uint32_t vid, flag; - int rc; + int rc, defer_acc; memset(&stat, 0, sizeof (struct ls_rjt)); pcmd = (struct lpfc_dmabuf *) cmdiocb->context2; @@ -343,6 +394,7 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, else ndlp->nlp_fcp_info |= CLASS3; + defer_acc = 0; ndlp->nlp_class_sup = 0; if (sp->cls1.classValid) ndlp->nlp_class_sup |= FC_COS_CLASS1; @@ -354,7 +406,6 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ndlp->nlp_class_sup |= FC_COS_CLASS4; ndlp->nlp_maxframe = ((sp->cmn.bbRcvSizeMsb & 0x0F) << 8) | sp->cmn.bbRcvSizeLsb; - /* if already logged in, do implicit logout */ switch (ndlp->nlp_state) { case NLP_STE_NPR_NODE: @@ -396,6 +447,10 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ndlp->nlp_fcp_info &= ~NLP_FCP_2_DEVICE; ndlp->nlp_flag &= ~NLP_FIRSTBURST; + login_mbox = NULL; + link_mbox = NULL; + save_iocb = NULL; + /* Check for Nport to NPort pt2pt protocol */ if ((vport->fc_flag & FC_PT2PT) && !(vport->fc_flag & FC_PT2PT_PLOGI)) { @@ -423,17 +478,22 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, if (phba->sli_rev == LPFC_SLI_REV4) lpfc_issue_reg_vfi(vport); else { - mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); - if (mbox == NULL) + defer_acc = 1; + link_mbox = mempool_alloc(phba->mbox_mem_pool, + GFP_KERNEL); + if (!link_mbox) goto out; - lpfc_config_link(phba, mbox); - mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl; - mbox->vport = vport; - rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT); - if (rc == MBX_NOT_FINISHED) { - mempool_free(mbox, phba->mbox_mem_pool); + lpfc_config_link(phba, link_mbox); + link_mbox->mbox_cmpl = lpfc_defer_pt2pt_acc; + link_mbox->vport = vport; + link_mbox->ctx_ndlp = ndlp; + + save_iocb = lpfc_sli_get_iocbq(phba); + if (!save_iocb) goto out; - } + /* Save info from cmd IOCB used in rsp */ + memcpy((uint8_t *)save_iocb, (uint8_t *)cmdiocb, + sizeof(struct lpfc_iocbq)); } lpfc_can_disctmo(vport); @@ -448,8 +508,8 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ndlp->nlp_flag |= NLP_SUPPRESS_RSP; } - mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); - if (!mbox) + login_mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); + if (!login_mbox) goto out; /* Registering an existing RPI behaves differently for SLI3 vs SLI4 */ @@ -457,21 +517,19 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, lpfc_unreg_rpi(vport, ndlp); rc = lpfc_reg_rpi(phba, vport->vpi, icmd->un.rcvels.remoteID, - (uint8_t *) sp, mbox, ndlp->nlp_rpi); - if (rc) { - mempool_free(mbox, phba->mbox_mem_pool); + (uint8_t *)sp, login_mbox, ndlp->nlp_rpi); + if (rc) goto out; - } /* ACC PLOGI rsp command needs to execute first, - * queue this mbox command to be processed later. + * queue this login_mbox command to be processed later. */ - mbox->mbox_cmpl = lpfc_mbx_cmpl_reg_login; + login_mbox->mbox_cmpl = lpfc_mbx_cmpl_reg_login; /* - * mbox->ctx_ndlp = lpfc_nlp_get(ndlp) deferred until mailbox + * login_mbox->ctx_ndlp = lpfc_nlp_get(ndlp) deferred until mailbox * command issued in lpfc_cmpl_els_acc(). */ - mbox->vport = vport; + login_mbox->vport = vport; spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= (NLP_ACC_REGLOGIN | NLP_RCV_PLOGI); spin_unlock_irq(shost->host_lock); @@ -504,16 +562,47 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, stat.un.b.lsRjtRsnCode = LSRJT_INVALID_CMD; stat.un.b.lsRjtRsnCodeExp = LSEXP_NOTHING_MORE; rc = lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, - ndlp, mbox); + ndlp, login_mbox); if (rc) - mempool_free(mbox, phba->mbox_mem_pool); + mempool_free(login_mbox, phba->mbox_mem_pool); return 1; } - rc = lpfc_els_rsp_acc(vport, ELS_CMD_PLOGI, cmdiocb, ndlp, mbox); + if (defer_acc) { + /* So the order here should be: + * Issue CONFIG_LINK mbox + * CONFIG_LINK cmpl + * Issue PLOGI ACC + * PLOGI ACC cmpl + * Issue REG_LOGIN mbox + */ + + /* Save the REG_LOGIN mbox for and rcv IOCB copy later */ + link_mbox->context3 = login_mbox; + login_mbox->context3 = save_iocb; + + /* Start the ball rolling by issuing CONFIG_LINK here */ + rc = lpfc_sli_issue_mbox(phba, link_mbox, MBX_NOWAIT); + if (rc == MBX_NOT_FINISHED) + goto out; + return 1; + } + + rc = lpfc_els_rsp_acc(vport, ELS_CMD_PLOGI, cmdiocb, ndlp, login_mbox); if (rc) - mempool_free(mbox, phba->mbox_mem_pool); + mempool_free(login_mbox, phba->mbox_mem_pool); return 1; out: + if (defer_acc) + lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, + "4577 pt2pt discovery failure: %p %p %p\n", + save_iocb, link_mbox, login_mbox); + if (save_iocb) + lpfc_sli_release_iocbq(phba, save_iocb); + if (link_mbox) + mempool_free(link_mbox, phba->mbox_mem_pool); + if (login_mbox) + mempool_free(login_mbox, phba->mbox_mem_pool); + stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC; stat.un.b.lsRjtRsnCodeExp = LSEXP_OUT_OF_RESOURCE; lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp, NULL); -- cgit v1.2.3 From 65a3df63e7ff5addafc75ad8bc5ef5856db55429 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 21 Sep 2019 20:58:48 -0700 Subject: scsi: lpfc: Fix premature re-enabling of interrupts in lpfc_sli_host_down Use of spin_lock_irq may re-enable interrupts prematurely. Convert to spin_lock. Note: code is under the phba->hba_lock which has been locked with irqsave. Link: https://lore.kernel.org/r/20190922035906.10977-3-jsmart2021@gmail.com Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_sli.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index a0c6945b8139..3b3ae2182e59 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -10678,14 +10678,14 @@ lpfc_sli_host_down(struct lpfc_vport *vport) set_bit(LPFC_DATA_READY, &phba->data_flags); } prev_pring_flag = pring->flag; - spin_lock_irq(&pring->ring_lock); + spin_lock(&pring->ring_lock); list_for_each_entry_safe(iocb, next_iocb, &pring->txq, list) { if (iocb->vport != vport) continue; list_move_tail(&iocb->list, &completions); } - spin_unlock_irq(&pring->ring_lock); + spin_unlock(&pring->ring_lock); list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq, list) { if (iocb->vport != vport) -- cgit v1.2.3 From b7b95fb8637d7bd271df25e17e002a584b16f411 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 21 Sep 2019 20:58:49 -0700 Subject: scsi: lpfc: Fix miss of register read failure check Coverity flagged missing status check on register read that flags a poisoned data return value. Add checking of register read status. Link: https://lore.kernel.org/r/20190922035906.10977-4-jsmart2021@gmail.com Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_attr.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 25aa7a53d255..41cc91d3c77e 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -1475,8 +1475,9 @@ lpfc_sli4_pdev_status_reg_wait(struct lpfc_hba *phba) int i; msleep(100); - lpfc_readl(phba->sli4_hba.u.if_type2.STATUSregaddr, - &portstat_reg.word0); + if (lpfc_readl(phba->sli4_hba.u.if_type2.STATUSregaddr, + &portstat_reg.word0)) + return -EIO; /* verify if privileged for the request operation */ if (!bf_get(lpfc_sliport_status_rn, &portstat_reg) && @@ -1486,8 +1487,9 @@ lpfc_sli4_pdev_status_reg_wait(struct lpfc_hba *phba) /* wait for the SLI port firmware ready after firmware reset */ for (i = 0; i < LPFC_FW_RESET_MAXIMUM_WAIT_10MS_CNT; i++) { msleep(10); - lpfc_readl(phba->sli4_hba.u.if_type2.STATUSregaddr, - &portstat_reg.word0); + if (lpfc_readl(phba->sli4_hba.u.if_type2.STATUSregaddr, + &portstat_reg.word0)) + continue; if (!bf_get(lpfc_sliport_status_err, &portstat_reg)) continue; if (!bf_get(lpfc_sliport_status_rn, &portstat_reg)) -- cgit v1.2.3 From a5f7337f5a82fc4b13b4481a7e56977656cbe7d1 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 21 Sep 2019 20:58:50 -0700 Subject: scsi: lpfc: Fix NVME io abort failures causing hangs The nvme-fc transport may call to abort an io on controller reset. If the driver is out of resources to issue an abort command, it just gives up and does nothing. The transport expects the lldd to always be able to terminate an io it has issued. At that point, the controller hangs waiting for aborted ios to be returned. Note: flaged by "6136" and "6176" error messages. Root issue was the adapter mis-allocated the number resources it allocated for command entries for the adapter. Convert the driver to allocate command resources based on the number of xris supported by the FC port - 1 resource for the original command and 1 resource for the abort request. Link: https://lore.kernel.org/r/20190922035906.10977-5-jsmart2021@gmail.com Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc.h | 1 - drivers/scsi/lpfc/lpfc_attr.c | 5 ----- drivers/scsi/lpfc/lpfc_init.c | 9 +++------ drivers/scsi/lpfc/lpfc_sli.c | 17 +++++++++++------ 4 files changed, 14 insertions(+), 18 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index 691acbdcc46d..291335e16806 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -872,7 +872,6 @@ struct lpfc_hba { uint32_t cfg_aer_support; uint32_t cfg_sriov_nr_virtfn; uint32_t cfg_request_firmware_upgrade; - uint32_t cfg_iocb_cnt; uint32_t cfg_suppress_link_up; uint32_t cfg_rrq_xri_bitmap_sz; uint32_t cfg_delay_discovery; diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 41cc91d3c77e..79a192479755 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -3582,9 +3582,6 @@ lpfc_txcmplq_hw_show(struct device *dev, struct device_attribute *attr, static DEVICE_ATTR(txcmplq_hw, S_IRUGO, lpfc_txcmplq_hw_show, NULL); -LPFC_ATTR_R(iocb_cnt, 2, 1, 5, - "Number of IOCBs alloc for ELS, CT, and ABTS: 1k to 5k IOCBs"); - /* # lpfc_nodev_tmo: If set, it will hold all I/O errors on devices that disappear # until the timer expires. Value range is [0,255]. Default value is 30. @@ -6073,7 +6070,6 @@ struct device_attribute *lpfc_hba_attrs[] = { &dev_attr_lpfc_sriov_nr_virtfn, &dev_attr_lpfc_req_fw_upgrade, &dev_attr_lpfc_suppress_link_up, - &dev_attr_lpfc_iocb_cnt, &dev_attr_iocb_hw, &dev_attr_txq_hw, &dev_attr_txcmplq_hw, @@ -7212,7 +7208,6 @@ lpfc_get_cfgparam(struct lpfc_hba *phba) lpfc_sriov_nr_virtfn_init(phba, lpfc_sriov_nr_virtfn); lpfc_request_firmware_upgrade_init(phba, lpfc_req_fw_upgrade); lpfc_suppress_link_up_init(phba, lpfc_suppress_link_up); - lpfc_iocb_cnt_init(phba, lpfc_iocb_cnt); lpfc_delay_discovery_init(phba, lpfc_delay_discovery); lpfc_sli_mode_init(phba, lpfc_sli_mode); phba->cfg_enable_dss = 1; diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index e91377a4cafe..bb84d2a20e76 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -7126,7 +7126,7 @@ lpfc_init_iocb_list(struct lpfc_hba *phba, int iocb_count) if (iocbq_entry == NULL) { printk(KERN_ERR "%s: only allocated %d iocbs of " "expected %d count. Unloading driver.\n", - __func__, i, LPFC_IOCB_LIST_CNT); + __func__, i, iocb_count); goto out_free_iocbq; } @@ -11591,13 +11591,10 @@ fcponly: } /* If the NVME FC4 type is enabled, scale the sg_seg_cnt to - * accommodate 512K and 1M IOs in a single nvme buf and supply - * enough NVME LS iocb buffers for larger connectivity counts. + * accommodate 512K and 1M IOs in a single nvme buf. */ - if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) { + if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) phba->cfg_sg_seg_cnt = LPFC_MAX_NVME_SEG_CNT; - phba->cfg_iocb_cnt = 5; - } /* Only embed PBDE for if_type 6, PBDE support requires xib be set */ if ((bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) != diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 3b3ae2182e59..827406f58b1e 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -7523,9 +7523,11 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba) } phba->sli4_hba.nvmet_xri_cnt = rc; - cnt = phba->cfg_iocb_cnt * 1024; - /* We need 1 iocbq for every SGL, for IO processing */ - cnt += phba->sli4_hba.nvmet_xri_cnt; + /* We allocate an iocbq for every receive context SGL. + * The additional allocation is for abort and ls handling. + */ + cnt = phba->sli4_hba.nvmet_xri_cnt + + phba->sli4_hba.max_cfg_param.max_xri; } else { /* update host common xri-sgl sizes and mappings */ rc = lpfc_sli4_io_sgl_update(phba); @@ -7547,14 +7549,17 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba) rc = -ENODEV; goto out_destroy_queue; } - cnt = phba->cfg_iocb_cnt * 1024; + /* Each lpfc_io_buf job structure has an iocbq element. + * This cnt provides for abort, els, ct and ls requests. + */ + cnt = phba->sli4_hba.max_cfg_param.max_xri; } if (!phba->sli.iocbq_lookup) { /* Initialize and populate the iocb list per host */ lpfc_printf_log(phba, KERN_INFO, LOG_INIT, - "2821 initialize iocb list %d total %d\n", - phba->cfg_iocb_cnt, cnt); + "2821 initialize iocb list with %d entries\n", + cnt); rc = lpfc_init_iocb_list(phba, cnt); if (rc) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, -- cgit v1.2.3 From 97acd0019d5dadd9c0e111c2083c889bfe548f25 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 21 Sep 2019 20:58:51 -0700 Subject: scsi: lpfc: Fix rpi release when deleting vport A prior use-after-free mailbox fix solved it's problem by null'ing a ndlp pointer. However, further testing has shown that this change causes a later state change to occasionally be skipped, which results in a reference count never being decremented thus the rpi is never released, which causes a vport delete to never succeed. Revise the fix in the prior patch to no longer null the ndlp. Instead the RELEASE_RPI flag is set which will drive the release of the rpi. Given the new code was added at a deep indentation level, refactor the code block using a new routine that avoids the indentation issues. Fixes: 9b1640686470 ("scsi: lpfc: Fix use-after-free mailbox cmd completion") Link: https://lore.kernel.org/r/20190922035906.10977-6-jsmart2021@gmail.com Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_hbadisc.c | 88 +++++++++++++++++++++++++++------------- drivers/scsi/lpfc/lpfc_sli.c | 2 + 2 files changed, 61 insertions(+), 29 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 749286acdc17..9df6f0cabab0 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -4840,6 +4840,44 @@ lpfc_nlp_logo_unreg(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) } } +/* + * Sets the mailbox completion handler to be used for the + * unreg_rpi command. The handler varies based on the state of + * the port and what will be happening to the rpi next. + */ +static void +lpfc_set_unreg_login_mbx_cmpl(struct lpfc_hba *phba, struct lpfc_vport *vport, + struct lpfc_nodelist *ndlp, LPFC_MBOXQ_t *mbox) +{ + unsigned long iflags; + + if (ndlp->nlp_flag & NLP_ISSUE_LOGO) { + mbox->ctx_ndlp = ndlp; + mbox->mbox_cmpl = lpfc_nlp_logo_unreg; + + } else if (phba->sli_rev == LPFC_SLI_REV4 && + (!(vport->load_flag & FC_UNLOADING)) && + (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) >= + LPFC_SLI_INTF_IF_TYPE_2) && + (kref_read(&ndlp->kref) > 0)) { + mbox->ctx_ndlp = lpfc_nlp_get(ndlp); + mbox->mbox_cmpl = lpfc_sli4_unreg_rpi_cmpl_clr; + } else { + if (vport->load_flag & FC_UNLOADING) { + if (phba->sli_rev == LPFC_SLI_REV4) { + spin_lock_irqsave(&vport->phba->ndlp_lock, + iflags); + ndlp->nlp_flag |= NLP_RELEASE_RPI; + spin_unlock_irqrestore(&vport->phba->ndlp_lock, + iflags); + } + lpfc_nlp_get(ndlp); + } + mbox->ctx_ndlp = ndlp; + mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl; + } +} + /* * Free rpi associated with LPFC_NODELIST entry. * This routine is called from lpfc_freenode(), when we are removing @@ -4890,33 +4928,12 @@ lpfc_unreg_rpi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) lpfc_unreg_login(phba, vport->vpi, rpi, mbox); mbox->vport = vport; - if (ndlp->nlp_flag & NLP_ISSUE_LOGO) { - mbox->ctx_ndlp = ndlp; - mbox->mbox_cmpl = lpfc_nlp_logo_unreg; - } else { - if (phba->sli_rev == LPFC_SLI_REV4 && - (!(vport->load_flag & FC_UNLOADING)) && - (bf_get(lpfc_sli_intf_if_type, - &phba->sli4_hba.sli_intf) >= - LPFC_SLI_INTF_IF_TYPE_2) && - (kref_read(&ndlp->kref) > 0)) { - mbox->ctx_ndlp = lpfc_nlp_get(ndlp); - mbox->mbox_cmpl = - lpfc_sli4_unreg_rpi_cmpl_clr; - /* - * accept PLOGIs after unreg_rpi_cmpl - */ - acc_plogi = 0; - } else if (vport->load_flag & FC_UNLOADING) { - mbox->ctx_ndlp = NULL; - mbox->mbox_cmpl = - lpfc_sli_def_mbox_cmpl; - } else { - mbox->ctx_ndlp = ndlp; - mbox->mbox_cmpl = - lpfc_sli_def_mbox_cmpl; - } - } + lpfc_set_unreg_login_mbx_cmpl(phba, vport, ndlp, mbox); + if (mbox->mbox_cmpl == lpfc_sli4_unreg_rpi_cmpl_clr) + /* + * accept PLOGIs after unreg_rpi_cmpl + */ + acc_plogi = 0; if (((ndlp->nlp_DID & Fabric_DID_MASK) != Fabric_DID_MASK) && (!(vport->fc_flag & FC_OFFLINE_MODE))) @@ -5057,6 +5074,7 @@ lpfc_cleanup_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) struct lpfc_hba *phba = vport->phba; LPFC_MBOXQ_t *mb, *nextmb; struct lpfc_dmabuf *mp; + unsigned long iflags; /* Cleanup node for NPort */ lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE, @@ -5138,8 +5156,20 @@ lpfc_cleanup_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) lpfc_cleanup_vports_rrqs(vport, ndlp); if (phba->sli_rev == LPFC_SLI_REV4) ndlp->nlp_flag |= NLP_RELEASE_RPI; - lpfc_unreg_rpi(vport, ndlp); - + if (!lpfc_unreg_rpi(vport, ndlp)) { + /* Clean up unregistered and non freed rpis */ + if ((ndlp->nlp_flag & NLP_RELEASE_RPI) && + !(ndlp->nlp_rpi == LPFC_RPI_ALLOC_ERROR)) { + lpfc_sli4_free_rpi(vport->phba, + ndlp->nlp_rpi); + spin_lock_irqsave(&vport->phba->ndlp_lock, + iflags); + ndlp->nlp_flag &= ~NLP_RELEASE_RPI; + ndlp->nlp_rpi = LPFC_RPI_ALLOC_ERROR; + spin_unlock_irqrestore(&vport->phba->ndlp_lock, + iflags); + } + } return 0; } diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 827406f58b1e..f764012ba0a6 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -2526,6 +2526,8 @@ lpfc_sli_def_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) } else { __lpfc_sli_rpi_release(vport, ndlp); } + if (vport->load_flag & FC_UNLOADING) + lpfc_nlp_put(ndlp); pmb->ctx_ndlp = NULL; } } -- cgit v1.2.3 From 0f154226d699fefe651ccc4db773efc05a820b56 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 21 Sep 2019 20:58:52 -0700 Subject: scsi: lpfc: Fix device recovery errors after PLOGI failures When target-side fault injections are made, the driver isn't reconnecting to the remote port. The driver is logging "2753" error messages which state: "PLOGI failure DID:1B2400 Status:x3/xf0240008" The failures status is indicating a Illegal field error, which points to the Temporary RPI field being used for the ELS. This error typically means the driver used an RPI that was already registered (shouldn't be registered if using it in this context). Study has found that if the driver were in discovery attempts and encountered an error, it wouldn't flag the temporary rpi in error. Yet the rpi was released for reallocation in these error paths and another ELS could allocate the rpi. In the failure situation a retry was done on an ELS that had encountered an error, and as the rpi wasn't marked in error, the ELS reused the rpi it originally allocated. But that rpi had been allocated by a different ELS issued after the original error and before the retry attempt. The different ELS had succeeded and the RPI was registered. Fix by marking the rpi state for the node to be in error, aka as needing reallocation, upon an error in the els processing. Error state marking is always done prior to release back to the internal rpi free list, which the driver wasn't doing in cases prior. Also enhanced some of the logging to help in the next case of problem troubleshooting. Link: https://lore.kernel.org/r/20190922035906.10977-7-jsmart2021@gmail.com Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_hbadisc.c | 44 ++++++++++++++++++++++++---------------- drivers/scsi/lpfc/lpfc_init.c | 40 ++++++++++++++++++++---------------- drivers/scsi/lpfc/lpfc_sli.c | 8 +++++--- 3 files changed, 55 insertions(+), 37 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 9df6f0cabab0..144786947b63 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -4046,7 +4046,7 @@ out: ndlp->nlp_flag |= NLP_RPI_REGISTERED; ndlp->nlp_type |= NLP_FABRIC; lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE); - lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI, + lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE | LOG_DISCOVERY, "0003 rpi:%x DID:%x flg:%x %d map%x x%px\n", ndlp->nlp_rpi, ndlp->nlp_DID, ndlp->nlp_flag, kref_read(&ndlp->kref), @@ -4575,8 +4575,10 @@ lpfc_enable_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, return ndlp; free_rpi: - if (phba->sli_rev == LPFC_SLI_REV4) + if (phba->sli_rev == LPFC_SLI_REV4) { lpfc_sli4_free_rpi(vport->phba, rpi); + ndlp->nlp_rpi = LPFC_RPI_ALLOC_ERROR; + } return NULL; } @@ -4835,6 +4837,7 @@ lpfc_nlp_logo_unreg(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) if (ndlp->nlp_flag & NLP_RELEASE_RPI) { lpfc_sli4_free_rpi(vport->phba, ndlp->nlp_rpi); ndlp->nlp_flag &= ~NLP_RELEASE_RPI; + ndlp->nlp_rpi = LPFC_RPI_ALLOC_ERROR; } ndlp->nlp_flag &= ~NLP_UNREG_INP; } @@ -4898,7 +4901,8 @@ lpfc_unreg_rpi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) if (ndlp->nlp_flag & NLP_RPI_REGISTERED || ndlp->nlp_flag & NLP_REG_LOGIN_SEND) { if (ndlp->nlp_flag & NLP_REG_LOGIN_SEND) - lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI, + lpfc_printf_vlog(vport, KERN_INFO, + LOG_NODE | LOG_DISCOVERY, "3366 RPI x%x needs to be " "unregistered nlp_flag x%x " "did x%x\n", @@ -4909,7 +4913,8 @@ lpfc_unreg_rpi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) * no need to queue up another one. */ if (ndlp->nlp_flag & NLP_UNREG_INP) { - lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY, + lpfc_printf_vlog(vport, KERN_INFO, + LOG_NODE | LOG_DISCOVERY, "1436 unreg_rpi SKIP UNREG x%x on " "NPort x%x deferred x%x flg x%x " "Data: x%px\n", @@ -4939,7 +4944,8 @@ lpfc_unreg_rpi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) (!(vport->fc_flag & FC_OFFLINE_MODE))) ndlp->nlp_flag |= NLP_UNREG_INP; - lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY, + lpfc_printf_vlog(vport, KERN_INFO, + LOG_NODE | LOG_DISCOVERY, "1433 unreg_rpi UNREG x%x on " "NPort x%x deferred flg x%x " "Data:x%px\n", @@ -5195,8 +5201,10 @@ lpfc_nlp_remove(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) /* For this case we need to cleanup the default rpi * allocated by the firmware. */ - lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE, - "0005 rpi:%x DID:%x flg:%x %d map:%x x%px\n", + lpfc_printf_vlog(vport, KERN_INFO, + LOG_NODE | LOG_DISCOVERY, + "0005 Cleanup Default rpi:x%x DID:x%x flg:x%x " + "ref %d map:x%x ndlp x%px\n", ndlp->nlp_rpi, ndlp->nlp_DID, ndlp->nlp_flag, kref_read(&ndlp->kref), ndlp->nlp_usg_map, ndlp); @@ -5233,8 +5241,9 @@ lpfc_nlp_remove(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) */ lpfc_printf_vlog(vport, KERN_WARNING, LOG_NODE, "0940 removed node x%px DID x%x " - " rport not null x%px\n", - ndlp, ndlp->nlp_DID, ndlp->rport); + "rpi %d rport not null x%px\n", + ndlp, ndlp->nlp_DID, ndlp->nlp_rpi, + ndlp->rport); rport = ndlp->rport; rdata = rport->dd_data; rdata->pnode = NULL; @@ -6026,7 +6035,7 @@ lpfc_mbx_cmpl_fdmi_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) ndlp->nlp_flag |= NLP_RPI_REGISTERED; ndlp->nlp_type |= NLP_FABRIC; lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE); - lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI, + lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE | LOG_DISCOVERY, "0004 rpi:%x DID:%x flg:%x %d map:%x x%px\n", ndlp->nlp_rpi, ndlp->nlp_DID, ndlp->nlp_flag, kref_read(&ndlp->kref), @@ -6215,12 +6224,12 @@ lpfc_nlp_init(struct lpfc_vport *vport, uint32_t did) INIT_LIST_HEAD(&ndlp->nlp_listp); if (vport->phba->sli_rev == LPFC_SLI_REV4) { ndlp->nlp_rpi = rpi; - lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE, - "0007 rpi:%x DID:%x flg:%x refcnt:%d " - "map:%x x%px\n", ndlp->nlp_rpi, ndlp->nlp_DID, - ndlp->nlp_flag, - kref_read(&ndlp->kref), - ndlp->nlp_usg_map, ndlp); + lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE | LOG_DISCOVERY, + "0007 Init New ndlp x%px, rpi:x%x DID:%x " + "flg:x%x refcnt:%d map:x%x\n", + ndlp, ndlp->nlp_rpi, ndlp->nlp_DID, + ndlp->nlp_flag, kref_read(&ndlp->kref), + ndlp->nlp_usg_map); ndlp->active_rrqs_xri_bitmap = mempool_alloc(vport->phba->active_rrq_pool, @@ -6449,7 +6458,8 @@ lpfc_fcf_inuse(struct lpfc_hba *phba) goto out; } else if (ndlp->nlp_flag & NLP_RPI_REGISTERED) { ret = 1; - lpfc_printf_log(phba, KERN_INFO, LOG_ELS, + lpfc_printf_log(phba, KERN_INFO, + LOG_NODE | LOG_DISCOVERY, "2624 RPI %x DID %x flag %x " "still logged in\n", ndlp->nlp_rpi, ndlp->nlp_DID, diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index bb84d2a20e76..12885b01fa27 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -3053,11 +3053,12 @@ lpfc_sli4_node_prep(struct lpfc_hba *phba) continue; } ndlp->nlp_rpi = rpi; - lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NODE, - "0009 rpi:%x DID:%x " - "flg:%x map:%x x%px\n", ndlp->nlp_rpi, - ndlp->nlp_DID, ndlp->nlp_flag, - ndlp->nlp_usg_map, ndlp); + lpfc_printf_vlog(ndlp->vport, KERN_INFO, + LOG_NODE | LOG_DISCOVERY, + "0009 Assign RPI x%x to ndlp x%px " + "DID:x%06x flg:x%x map:x%x\n", + ndlp->nlp_rpi, ndlp, ndlp->nlp_DID, + ndlp->nlp_flag, ndlp->nlp_usg_map); } } lpfc_destroy_vport_work_array(phba, vports); @@ -3453,10 +3454,15 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action) list_for_each_entry_safe(ndlp, next_ndlp, &vports[i]->fc_nodes, nlp_listp) { - if (!NLP_CHK_NODE_ACT(ndlp)) - continue; - if (ndlp->nlp_state == NLP_STE_UNUSED_NODE) + if ((!NLP_CHK_NODE_ACT(ndlp)) || + ndlp->nlp_state == NLP_STE_UNUSED_NODE) { + /* Driver must assume RPI is invalid for + * any unused or inactive node. + */ + ndlp->nlp_rpi = LPFC_RPI_ALLOC_ERROR; continue; + } + if (ndlp->nlp_type & NLP_FABRIC) { lpfc_disc_state_machine(vports[i], ndlp, NULL, NLP_EVT_DEVICE_RECOVERY); @@ -3472,16 +3478,16 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action) * comes back online. */ if (phba->sli_rev == LPFC_SLI_REV4) { - lpfc_printf_vlog(ndlp->vport, - KERN_INFO, LOG_NODE, - "0011 lpfc_offline: " - "ndlp:x%px did %x " - "usgmap:x%x rpi:%x\n", - ndlp, ndlp->nlp_DID, - ndlp->nlp_usg_map, - ndlp->nlp_rpi); - + lpfc_printf_vlog(ndlp->vport, KERN_INFO, + LOG_NODE | LOG_DISCOVERY, + "0011 Free RPI x%x on " + "ndlp:x%px did x%x " + "usgmap:x%x\n", + ndlp->nlp_rpi, ndlp, + ndlp->nlp_DID, + ndlp->nlp_usg_map); lpfc_sli4_free_rpi(phba, ndlp->nlp_rpi); + ndlp->nlp_rpi = LPFC_RPI_ALLOC_ERROR; } lpfc_unreg_rpi(vports[i], ndlp); } diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index f764012ba0a6..24d6779a99f8 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -18131,8 +18131,9 @@ lpfc_sli4_alloc_rpi(struct lpfc_hba *phba) phba->sli4_hba.max_cfg_param.rpi_used++; phba->sli4_hba.rpi_count++; } - lpfc_printf_log(phba, KERN_INFO, LOG_SLI, - "0001 rpi:%x max:%x lim:%x\n", + lpfc_printf_log(phba, KERN_INFO, + LOG_NODE | LOG_DISCOVERY, + "0001 Allocated rpi:x%x max:x%x lim:x%x\n", (int) rpi, max_rpi, rpi_limit); /* @@ -18192,7 +18193,8 @@ __lpfc_sli4_free_rpi(struct lpfc_hba *phba, int rpi) phba->sli4_hba.rpi_count--; phba->sli4_hba.max_cfg_param.rpi_used--; } else { - lpfc_printf_log(phba, KERN_INFO, LOG_SLI, + lpfc_printf_log(phba, KERN_INFO, + LOG_NODE | LOG_DISCOVERY, "2016 rpi %x not inuse\n", rpi); } -- cgit v1.2.3 From 07b8582430370097238b589f4e24da7613ca6dd3 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 21 Sep 2019 20:58:53 -0700 Subject: scsi: lpfc: Fix locking on mailbox command completion Symptoms were seen of the driver not having valid data for mailbox commands. After debugging, the following sequence was found: The driver maintains a port-wide pointer of the mailbox command that is currently in execution. Once finished, the port-wide pointer is cleared (done in lpfc_sli4_mq_release()). The next mailbox command issued will set the next pointer and so on. The mailbox response data is only copied if there is a valid port-wide pointer. In the failing case, it was seen that a new mailbox command was being attempted in parallel with the completion. The parallel path was seeing the mailbox no long in use (flag check under lock) and thus set the port pointer. The completion path had cleared the active flag under lock, but had not touched the port pointer. The port pointer is cleared after the lock is released. In this case, the completion path cleared the just-set value by the parallel path. Fix by making the calls that clear mbox state/port pointer while under lock. Also slightly cleaned up the error path. Link: https://lore.kernel.org/r/20190922035906.10977-8-jsmart2021@gmail.com Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_sli.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 24d6779a99f8..313441a3c4cf 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -13165,13 +13165,19 @@ send_current_mbox: phba->sli.sli_flag &= ~LPFC_SLI_MBOX_ACTIVE; /* Setting active mailbox pointer need to be in sync to flag clear */ phba->sli.mbox_active = NULL; + if (bf_get(lpfc_trailer_consumed, mcqe)) + lpfc_sli4_mq_release(phba->sli4_hba.mbx_wq); spin_unlock_irqrestore(&phba->hbalock, iflags); /* Wake up worker thread to post the next pending mailbox command */ lpfc_worker_wake_up(phba); + return workposted; + out_no_mqe_complete: + spin_lock_irqsave(&phba->hbalock, iflags); if (bf_get(lpfc_trailer_consumed, mcqe)) lpfc_sli4_mq_release(phba->sli4_hba.mbx_wq); - return workposted; + spin_unlock_irqrestore(&phba->hbalock, iflags); + return false; } /** -- cgit v1.2.3 From 9df0a0381a600438d19def2c3868c02871e0cd72 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 21 Sep 2019 20:58:54 -0700 Subject: scsi: lpfc: Fix GPF on scsi command completion Faults are seen with RIP of lpfc_scsi_cmd_iocb_cmpl(). The failure is when lpfc_update_status is being called as part of the completion. After debugging, it was seen the issue was the shost pointer that the driver derived from the scsi cmd. The crash showed the cmd->device pointer being bogus, which is likely as the scsi devices were offlined prior. The bogus device pointer caused subsequent pointers derived from the location, specifically the vport, to be bogus. Fix by adjusting the calling sequence to pass in the vport rather than having to derive it from the cmd structure. Link: https://lore.kernel.org/r/20190922035906.10977-9-jsmart2021@gmail.com Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_scsi.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index fe1097666de4..c2773c07657d 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -134,21 +134,21 @@ lpfc_sli4_set_rsp_sgl_last(struct lpfc_hba *phba, /** * lpfc_update_stats - Update statistical data for the command completion - * @phba: Pointer to HBA object. + * @vport: The virtual port on which this call is executing. * @lpfc_cmd: lpfc scsi command object pointer. * * This function is called when there is a command completion and this * function updates the statistical data for the command completion. **/ static void -lpfc_update_stats(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd) +lpfc_update_stats(struct lpfc_vport *vport, struct lpfc_io_buf *lpfc_cmd) { + struct lpfc_hba *phba = vport->phba; struct lpfc_rport_data *rdata; struct lpfc_nodelist *pnode; struct scsi_cmnd *cmd = lpfc_cmd->pCmd; unsigned long flags; - struct Scsi_Host *shost = cmd->device->host; - struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); unsigned long latency; int i; @@ -4004,7 +4004,7 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn, scsi_get_resid(cmd)); } - lpfc_update_stats(phba, lpfc_cmd); + lpfc_update_stats(vport, lpfc_cmd); if (vport->cfg_max_scsicmpl_time && time_after(jiffies, lpfc_cmd->start_time + msecs_to_jiffies(vport->cfg_max_scsicmpl_time))) { -- cgit v1.2.3 From 3f97aed6117c7677eb16756c4ec8b86000fd5822 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 21 Sep 2019 20:58:55 -0700 Subject: scsi: lpfc: Fix discovery failures when target device connectivity bounces An issue was seen discovering all SCSI Luns when a target device undergoes link bounce. The driver currently does not qualify the FC4 support on the target. Therefore it will send a SCSI PRLI and an NVMe PRLI. The expectation is that the target will reject the PRLI if it is not supported. If a PRLI times out, the driver will retry. The driver will not proceed with the device until both SCSI and NVMe PRLIs are resolved. In the failure case, the device is FCP only and does not respond to the NVMe PRLI, thus initiating the wait/retry loop in the driver. During that time, a RSCN is received (device bounced) causing the driver to issue a GID_FT. The GID_FT response comes back before the PRLI mess is resolved and it prematurely cancels the PRLI retry logic and leaves the device in a STE_PRLI_ISSUE state. Discovery with the target never completes or resets. Fix by resetting the node state back to STE_NPR_NODE when GID_FT completes, thereby restarting the discovery process for the node. Link: https://lore.kernel.org/r/20190922035906.10977-10-jsmart2021@gmail.com Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_hbadisc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 144786947b63..f483b3aea22b 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -5444,9 +5444,14 @@ lpfc_setup_disc_node(struct lpfc_vport *vport, uint32_t did) /* If we've already received a PLOGI from this NPort * we don't need to try to discover it again. */ - if (ndlp->nlp_flag & NLP_RCV_PLOGI) + if (ndlp->nlp_flag & NLP_RCV_PLOGI && + !(ndlp->nlp_type & + (NLP_FCP_TARGET | NLP_NVME_TARGET))) return NULL; + ndlp->nlp_prev_state = ndlp->nlp_state; + lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); + spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_NPR_2B_DISC; spin_unlock_irq(shost->host_lock); -- cgit v1.2.3 From 51f8e43ed355d30b3c93293077ecb0c0afac3799 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 21 Sep 2019 20:58:56 -0700 Subject: scsi: lpfc: Fix NVMe ABTS in response to receiving an ABTS When the port, running as a nvme target, receives an ABTS, it submits commands to the adapter to Abort i/o outstanding in the adapter. The Abort command formatting routine left a command field set to zero, which instructs the adapter to generate an ABTS on the wire as part of cleaning up the I/O. This is common operation for an initiator, but not for a target. Fix the driver to check whether an ABTS had been received for the I/O, and if so, change the Abort command formatting so that the ABTS generation is disabled (IA=1). No need to ABTS it when the other side already has. Also refactored the code such that there is a single routine being used for nvme or nvmet ABORT requests, and IA is an argument. Link: https://lore.kernel.org/r/20190922035906.10977-11-jsmart2021@gmail.com Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_crtn.h | 1 + drivers/scsi/lpfc/lpfc_hw4.h | 1 + drivers/scsi/lpfc/lpfc_nvme.c | 73 ++++++++++++++++++++++++------------------ drivers/scsi/lpfc/lpfc_nvmet.c | 34 ++------------------ 4 files changed, 46 insertions(+), 63 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h index b2ad8c750486..6e09fd98a922 100644 --- a/drivers/scsi/lpfc/lpfc_crtn.h +++ b/drivers/scsi/lpfc/lpfc_crtn.h @@ -586,6 +586,7 @@ void lpfc_release_io_buf(struct lpfc_hba *phba, struct lpfc_io_buf *ncmd, void lpfc_nvme_cmd_template(void); void lpfc_nvmet_cmd_template(void); void lpfc_nvme_cancel_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn); +void lpfc_nvme_prep_abort_wqe(struct lpfc_iocbq *pwqeq, u16 xritag, u8 opt); extern int lpfc_enable_nvmet_cnt; extern unsigned long long lpfc_enable_nvmet[]; extern int lpfc_no_hba_reset_cnt; diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h index bd533475c86a..f70fb7629c82 100644 --- a/drivers/scsi/lpfc/lpfc_hw4.h +++ b/drivers/scsi/lpfc/lpfc_hw4.h @@ -4659,6 +4659,7 @@ struct create_xri_wqe { uint32_t rsvd_12_15[4]; /* word 12-15 */ }; +#define INHIBIT_ABORT 1 #define T_REQUEST_TAG 3 #define T_XRI_TAG 1 diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index a227e36cbdc2..5af944b97c4c 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -195,6 +195,46 @@ lpfc_nvme_cmd_template(void) /* Word 12, 13, 14, 15 - is zero */ } +/** + * lpfc_nvme_prep_abort_wqe - set up 'abort' work queue entry. + * @pwqeq: Pointer to command iocb. + * @xritag: Tag that uniqely identifies the local exchange resource. + * @opt: Option bits - + * bit 0 = inhibit sending abts on the link + * + * This function is called with hbalock held. + **/ +void +lpfc_nvme_prep_abort_wqe(struct lpfc_iocbq *pwqeq, u16 xritag, u8 opt) +{ + union lpfc_wqe128 *wqe = &pwqeq->wqe; + + /* WQEs are reused. Clear stale data and set key fields to + * zero like ia, iaab, iaar, xri_tag, and ctxt_tag. + */ + memset(wqe, 0, sizeof(*wqe)); + + if (opt & INHIBIT_ABORT) + bf_set(abort_cmd_ia, &wqe->abort_cmd, 1); + /* Abort specified xri tag, with the mask deliberately zeroed */ + bf_set(abort_cmd_criteria, &wqe->abort_cmd, T_XRI_TAG); + + bf_set(wqe_cmnd, &wqe->abort_cmd.wqe_com, CMD_ABORT_XRI_CX); + + /* Abort the IO associated with this outstanding exchange ID. */ + wqe->abort_cmd.wqe_com.abort_tag = xritag; + + /* iotag for the wqe completion. */ + bf_set(wqe_reqtag, &wqe->abort_cmd.wqe_com, pwqeq->iotag); + + bf_set(wqe_qosd, &wqe->abort_cmd.wqe_com, 1); + bf_set(wqe_lenloc, &wqe->abort_cmd.wqe_com, LPFC_WQE_LENLOC_NONE); + + bf_set(wqe_cmd_type, &wqe->abort_cmd.wqe_com, OTHER_COMMAND); + bf_set(wqe_wqec, &wqe->abort_cmd.wqe_com, 1); + bf_set(wqe_cqid, &wqe->abort_cmd.wqe_com, LPFC_WQE_CQ_ID_DEFAULT); +} + /** * lpfc_nvme_create_queue - * @lpfc_pnvme: Pointer to the driver's nvme instance data @@ -1791,7 +1831,6 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, struct lpfc_iocbq *abts_buf; struct lpfc_iocbq *nvmereq_wqe; struct lpfc_nvme_fcpreq_priv *freqpriv; - union lpfc_wqe128 *abts_wqe; unsigned long flags; int ret_val; @@ -1912,37 +1951,7 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, /* Ready - mark outstanding as aborted by driver. */ nvmereq_wqe->iocb_flag |= LPFC_DRIVER_ABORTED; - /* Complete prepping the abort wqe and issue to the FW. */ - abts_wqe = &abts_buf->wqe; - - /* WQEs are reused. Clear stale data and set key fields to - * zero like ia, iaab, iaar, xri_tag, and ctxt_tag. - */ - memset(abts_wqe, 0, sizeof(*abts_wqe)); - bf_set(abort_cmd_criteria, &abts_wqe->abort_cmd, T_XRI_TAG); - - /* word 7 */ - bf_set(wqe_cmnd, &abts_wqe->abort_cmd.wqe_com, CMD_ABORT_XRI_CX); - bf_set(wqe_class, &abts_wqe->abort_cmd.wqe_com, - nvmereq_wqe->iocb.ulpClass); - - /* word 8 - tell the FW to abort the IO associated with this - * outstanding exchange ID. - */ - abts_wqe->abort_cmd.wqe_com.abort_tag = nvmereq_wqe->sli4_xritag; - - /* word 9 - this is the iotag for the abts_wqe completion. */ - bf_set(wqe_reqtag, &abts_wqe->abort_cmd.wqe_com, - abts_buf->iotag); - - /* word 10 */ - bf_set(wqe_qosd, &abts_wqe->abort_cmd.wqe_com, 1); - bf_set(wqe_lenloc, &abts_wqe->abort_cmd.wqe_com, LPFC_WQE_LENLOC_NONE); - - /* word 11 */ - bf_set(wqe_cmd_type, &abts_wqe->abort_cmd.wqe_com, OTHER_COMMAND); - bf_set(wqe_wqec, &abts_wqe->abort_cmd.wqe_com, 1); - bf_set(wqe_cqid, &abts_wqe->abort_cmd.wqe_com, LPFC_WQE_CQ_ID_DEFAULT); + lpfc_nvme_prep_abort_wqe(abts_buf, nvmereq_wqe->sli4_xritag, 0); /* ABTS WQE must go to the same WQ as the WQE to be aborted */ abts_buf->iocb_flag |= LPFC_IO_NVME; diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index 9884228800a5..121dbdcbb583 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -3239,9 +3239,9 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, { struct lpfc_nvmet_tgtport *tgtp; struct lpfc_iocbq *abts_wqeq; - union lpfc_wqe128 *abts_wqe; struct lpfc_nodelist *ndlp; unsigned long flags; + u8 opt; int rc; tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; @@ -3280,8 +3280,8 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, return 0; } abts_wqeq = ctxp->abort_wqeq; - abts_wqe = &abts_wqeq->wqe; ctxp->state = LPFC_NVMET_STE_ABORT; + opt = (ctxp->flag & LPFC_NVMET_ABTS_RCV) ? INHIBIT_ABORT : 0; spin_unlock_irqrestore(&ctxp->ctxlock, flags); /* Announce entry to new IO submit field. */ @@ -3327,35 +3327,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, /* Ready - mark outstanding as aborted by driver. */ abts_wqeq->iocb_flag |= LPFC_DRIVER_ABORTED; - /* WQEs are reused. Clear stale data and set key fields to - * zero like ia, iaab, iaar, xri_tag, and ctxt_tag. - */ - memset(abts_wqe, 0, sizeof(*abts_wqe)); - - /* word 3 */ - bf_set(abort_cmd_criteria, &abts_wqe->abort_cmd, T_XRI_TAG); - - /* word 7 */ - bf_set(wqe_ct, &abts_wqe->abort_cmd.wqe_com, 0); - bf_set(wqe_cmnd, &abts_wqe->abort_cmd.wqe_com, CMD_ABORT_XRI_CX); - - /* word 8 - tell the FW to abort the IO associated with this - * outstanding exchange ID. - */ - abts_wqe->abort_cmd.wqe_com.abort_tag = ctxp->wqeq->sli4_xritag; - - /* word 9 - this is the iotag for the abts_wqe completion. */ - bf_set(wqe_reqtag, &abts_wqe->abort_cmd.wqe_com, - abts_wqeq->iotag); - - /* word 10 */ - bf_set(wqe_qosd, &abts_wqe->abort_cmd.wqe_com, 1); - bf_set(wqe_lenloc, &abts_wqe->abort_cmd.wqe_com, LPFC_WQE_LENLOC_NONE); - - /* word 11 */ - bf_set(wqe_cmd_type, &abts_wqe->abort_cmd.wqe_com, OTHER_COMMAND); - bf_set(wqe_wqec, &abts_wqe->abort_cmd.wqe_com, 1); - bf_set(wqe_cqid, &abts_wqe->abort_cmd.wqe_com, LPFC_WQE_CQ_ID_DEFAULT); + lpfc_nvme_prep_abort_wqe(abts_wqeq, ctxp->wqeq->sli4_xritag, opt); /* ABTS WQE must go to the same WQ as the WQE to be aborted */ abts_wqeq->hba_wqidx = ctxp->wqeq->hba_wqidx; -- cgit v1.2.3 From 43bfea1bffb6b01089c2fe483ede1b036e166579 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 21 Sep 2019 20:58:57 -0700 Subject: scsi: lpfc: Fix coverity errors on NULL pointer checks Coverity flagged several scenarios where checking of null pointer values wasn't consistent. Fix the code to that be consistent on checking. Link: https://lore.kernel.org/r/20190922035906.10977-12-jsmart2021@gmail.com Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_els.c | 5 +++++ drivers/scsi/lpfc/lpfc_nvmet.c | 19 +++++++++++++++---- drivers/scsi/lpfc/lpfc_scsi.c | 2 +- drivers/scsi/lpfc/lpfc_sli.c | 9 ++++++--- 4 files changed, 27 insertions(+), 8 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index d5303994bfd6..55ab37572e92 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -4291,6 +4291,11 @@ lpfc_cmpl_els_rsp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, irsp = &rspiocb->iocb; + if (!vport) { + lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS, + "3177 ELS response failed\n"); + goto out; + } if (cmdiocb->context_un.mbox) mbox = cmdiocb->context_un.mbox; diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index 121dbdcbb583..c9481a618b85 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -1958,12 +1958,10 @@ lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, uint32_t *payload; uint32_t size, oxid, sid, rc; - fc_hdr = (struct fc_frame_header *)(nvmebuf->hbuf.virt); - oxid = be16_to_cpu(fc_hdr->fh_ox_id); - if (!phba->targetport) { + if (!nvmebuf || !phba->targetport) { lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, - "6154 LS Drop IO x%x\n", oxid); + "6154 LS Drop IO\n"); oxid = 0; size = 0; sid = 0; @@ -1971,6 +1969,9 @@ lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, goto dropit; } + fc_hdr = (struct fc_frame_header *)(nvmebuf->hbuf.virt); + oxid = be16_to_cpu(fc_hdr->fh_ox_id); + tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; payload = (uint32_t *)(nvmebuf->dbuf.virt); size = bf_get(lpfc_rcqe_length, &nvmebuf->cq_event.cqe.rcqe_cmpl); @@ -2401,6 +2402,11 @@ lpfc_nvmet_unsol_ls_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, d_buf = piocb->context2; nvmebuf = container_of(d_buf, struct hbq_dmabuf, dbuf); + if (!nvmebuf) { + lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, + "3015 LS Drop IO\n"); + return; + } if (phba->nvmet_support == 0) { lpfc_in_buf_free(phba, &nvmebuf->dbuf); return; @@ -2429,6 +2435,11 @@ lpfc_nvmet_unsol_fcp_event(struct lpfc_hba *phba, uint64_t isr_timestamp, uint8_t cqflag) { + if (!nvmebuf) { + lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, + "3167 NVMET FCP Drop IO\n"); + return; + } if (phba->nvmet_support == 0) { lpfc_rq_buf_free(phba, &nvmebuf->hbuf); return; diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index c2773c07657d..f06f63e58596 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -3814,7 +3814,7 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn, /* Sanity check on return of outstanding command */ cmd = lpfc_cmd->pCmd; - if (!cmd) { + if (!cmd || !phba) { lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT, "2621 IO completion: Not an active IO\n"); spin_unlock(&lpfc_cmd->buf_lock); diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 313441a3c4cf..939efee6b5dd 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -2674,7 +2674,8 @@ lpfc_sli_handle_mb_event(struct lpfc_hba *phba) lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI, "(%d):0323 Unknown Mailbox command " "x%x (x%x/x%x) Cmpl\n", - pmb->vport ? pmb->vport->vpi : 0, + pmb->vport ? pmb->vport->vpi : + LPFC_VPORT_UNKNOWN, pmbox->mbxCommand, lpfc_sli_config_mbox_subsys_get(phba, pmb), @@ -2695,7 +2696,8 @@ lpfc_sli_handle_mb_event(struct lpfc_hba *phba) "(%d):0305 Mbox cmd cmpl " "error - RETRYing Data: x%x " "(x%x/x%x) x%x x%x x%x\n", - pmb->vport ? pmb->vport->vpi : 0, + pmb->vport ? pmb->vport->vpi : + LPFC_VPORT_UNKNOWN, pmbox->mbxCommand, lpfc_sli_config_mbox_subsys_get(phba, pmb), @@ -2703,7 +2705,8 @@ lpfc_sli_handle_mb_event(struct lpfc_hba *phba) pmb), pmbox->mbxStatus, pmbox->un.varWords[0], - pmb->vport->port_state); + pmb->vport ? pmb->vport->port_state : + LPFC_VPORT_UNKNOWN); pmbox->mbxStatus = 0; pmbox->mbxOwner = OWN_HOST; rc = lpfc_sli_issue_mbox(phba, pmb, MBX_NOWAIT); -- cgit v1.2.3 From 24c7c0a6d3de68b6e15532d18749e561d260c160 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 21 Sep 2019 20:58:58 -0700 Subject: scsi: lpfc: Fix host hang at boot or slow boot Scenarios were seen where a host hung when the system booted or the host was very slow in booting. The link would not come up and no luns were visible to the host. After investigation, this was found to be due to the introduction of a new ACQE that adapter may generate to report a adapter hw warning. The ACQE was delivered to the driver very early in adapter initialization, when the driver did not expect command completion. As part of handling this unexpected interrupt the an EQEs are consumed and discarded and the EQ rearmed. The issue is the CQ that cause the EQE and thus the interrupt was not processed and the CQ was left unarmed. Meaning it would no longer generate a new interrupt condition. Subsequent mailbox commands used to initialize the adapter use the same CQ, and as there was no completion interrupt generated, the driver never saw the mailbox commands complete and it would wait long command timeouts. Fix by having the early flush routine also process the related CQ and rearm the CQ. Link: https://lore.kernel.org/r/20190922035906.10977-13-jsmart2021@gmail.com Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_sli.c | 42 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 36 insertions(+), 6 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 939efee6b5dd..412cd8c56d90 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -87,6 +87,10 @@ static void lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe); static bool lpfc_sli4_mbox_completions_pending(struct lpfc_hba *phba); static bool lpfc_sli4_process_missed_mbox_completions(struct lpfc_hba *phba); +static struct lpfc_cqe *lpfc_sli4_cq_get(struct lpfc_queue *q); +static void __lpfc_sli4_consume_cqe(struct lpfc_hba *phba, + struct lpfc_queue *cq, + struct lpfc_cqe *cqe); static IOCB_t * lpfc_get_iocb_from_iocbq(struct lpfc_iocbq *iocbq) @@ -467,21 +471,47 @@ __lpfc_sli4_consume_eqe(struct lpfc_hba *phba, struct lpfc_queue *eq, } static void -lpfc_sli4_eq_flush(struct lpfc_hba *phba, struct lpfc_queue *eq) +lpfc_sli4_eqcq_flush(struct lpfc_hba *phba, struct lpfc_queue *eq) { - struct lpfc_eqe *eqe; - uint32_t count = 0; + struct lpfc_eqe *eqe = NULL; + u32 eq_count = 0, cq_count = 0; + struct lpfc_cqe *cqe = NULL; + struct lpfc_queue *cq = NULL, *childq = NULL; + int cqid = 0; /* walk all the EQ entries and drop on the floor */ eqe = lpfc_sli4_eq_get(eq); while (eqe) { + /* Get the reference to the corresponding CQ */ + cqid = bf_get_le32(lpfc_eqe_resource_id, eqe); + cq = NULL; + + list_for_each_entry(childq, &eq->child_list, list) { + if (childq->queue_id == cqid) { + cq = childq; + break; + } + } + /* If CQ is valid, iterate through it and drop all the CQEs */ + if (cq) { + cqe = lpfc_sli4_cq_get(cq); + while (cqe) { + __lpfc_sli4_consume_cqe(phba, cq, cqe); + cq_count++; + cqe = lpfc_sli4_cq_get(cq); + } + /* Clear and re-arm the CQ */ + phba->sli4_hba.sli4_write_cq_db(phba, cq, cq_count, + LPFC_QUEUE_REARM); + cq_count = 0; + } __lpfc_sli4_consume_eqe(phba, eq, eqe); - count++; + eq_count++; eqe = lpfc_sli4_eq_get(eq); } /* Clear and re-arm the EQ */ - phba->sli4_hba.sli4_write_eq_db(phba, eq, count, LPFC_QUEUE_REARM); + phba->sli4_hba.sli4_write_eq_db(phba, eq, eq_count, LPFC_QUEUE_REARM); } static int @@ -14236,7 +14266,7 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id) spin_lock_irqsave(&phba->hbalock, iflag); if (phba->link_state < LPFC_LINK_DOWN) /* Flush, clear interrupt, and rearm the EQ */ - lpfc_sli4_eq_flush(phba, fpeq); + lpfc_sli4_eqcq_flush(phba, fpeq); spin_unlock_irqrestore(&phba->hbalock, iflag); return IRQ_NONE; } -- cgit v1.2.3 From 15498dc1a55b7aaea4b51ff03e3ff0f662e73f44 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 21 Sep 2019 20:58:59 -0700 Subject: scsi: lpfc: Fix list corruption in lpfc_sli_get_iocbq After study, it was determined there was a double free of a CT iocb during execution of lpfc_offline_prep and lpfc_offline. The prep routine issued an abort for some CT iocbs, but the aborts did not complete fast enough for a subsequent routine that waits for completion. Thus the driver proceeded to lpfc_offline, which releases any pending iocbs. Unfortunately, the completions for the aborts were then received which re-released the ct iocbs. Turns out the issue for why the aborts didn't complete fast enough was not their time on the wire/in the adapter. It was the lpfc_work_done routine, which requires the adapter state to be UP before it calls lpfc_sli_handle_slow_ring_event() to process the completions. The issue is the prep routine takes the link down as part of it's processing. To fix, the following was performed: - Prevent the offline routine from releasing iocbs that have had aborts issued on them. Defer to the abort completions. Also means the driver fully waits for the completions. Given this change, the recognition of "driver-generated" status which then releases the iocb is no longer valid. As such, the change made in the commit 296012285c90 is reverted. As recognition of "driver-generated" status is no longer valid, this patch reverts the changes made in commit 296012285c90 ("scsi: lpfc: Fix leak of ELS completions on adapter reset") - Modify lpfc_work_done to allow slow path completions so that the abort completions aren't ignored. - Updated the fdmi path to recognize a CT request that fails due to the port being unusable. This stops FDMI retries. FDMI will be restarted on next link up. Link: https://lore.kernel.org/r/20190922035906.10977-14-jsmart2021@gmail.com Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_ct.c | 6 ++++++ drivers/scsi/lpfc/lpfc_els.c | 3 +++ drivers/scsi/lpfc/lpfc_hbadisc.c | 5 ++++- drivers/scsi/lpfc/lpfc_sli.c | 3 --- 4 files changed, 13 insertions(+), 4 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c index 25e86706e207..f883fac2d2b1 100644 --- a/drivers/scsi/lpfc/lpfc_ct.c +++ b/drivers/scsi/lpfc/lpfc_ct.c @@ -1868,6 +1868,12 @@ lpfc_cmpl_ct_disc_fdmi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, if (irsp->ulpStatus == IOSTAT_LOCAL_REJECT) { switch ((irsp->un.ulpWord[4] & IOERR_PARAM_MASK)) { case IOERR_SLI_ABORTED: + case IOERR_SLI_DOWN: + /* Driver aborted this IO. No retry as error + * is likely Offline->Online or some adapter + * error. Recovery will try again. + */ + break; case IOERR_ABORT_IN_PROGRESS: case IOERR_SEQUENCE_TIMEOUT: case IOERR_ILLEGAL_FRAME: diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 55ab37572e92..bd8109b2a083 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -8019,6 +8019,9 @@ lpfc_els_flush_cmd(struct lpfc_vport *vport) if (piocb->vport != vport) continue; + if (piocb->iocb_flag & LPFC_DRIVER_ABORTED) + continue; + /* On the ELS ring we can have ELS_REQUESTs or * GEN_REQUESTs waiting for a response. */ diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index f483b3aea22b..808ad666bb1b 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -700,7 +700,10 @@ lpfc_work_done(struct lpfc_hba *phba) if (!(phba->hba_flag & HBA_SP_QUEUE_EVT)) set_bit(LPFC_DATA_READY, &phba->data_flags); } else { - if (phba->link_state >= LPFC_LINK_UP || + /* Driver could have abort request completed in queue + * when link goes down. Allow for this transition. + */ + if (phba->link_state >= LPFC_LINK_DOWN || phba->link_flag & LS_MDS_LOOPBACK) { pring->flag &= ~LPFC_DEFERRED_RING_EVENT; lpfc_sli_handle_slow_ring_event(phba, pring, diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 412cd8c56d90..ff261c0c738a 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -11090,9 +11090,6 @@ lpfc_sli_abort_els_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, irsp->ulpStatus, irsp->un.ulpWord[4]); spin_unlock_irq(&phba->hbalock); - if (irsp->ulpStatus == IOSTAT_LOCAL_REJECT && - irsp->un.ulpWord[4] == IOERR_SLI_ABORTED) - lpfc_sli_release_iocbq(phba, abort_iocb); } release_iocb: lpfc_sli_release_iocbq(phba, cmdiocb); -- cgit v1.2.3 From d38b4a527fe898f859f74a3a43d4308f48ac7855 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 21 Sep 2019 20:59:00 -0700 Subject: scsi: lpfc: Fix spinlock_irq issues in lpfc_els_flush_cmd() While reviewing the CT behavior, issues with spinlock_irq were seen. The driver should be using spinlock_irqsave/irqrestore in the els flush routine. Changed to spinlock_irqsave/irqrestore. Link: https://lore.kernel.org/r/20190922035906.10977-15-jsmart2021@gmail.com Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_els.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index bd8109b2a083..da90c7bf2287 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -7991,20 +7991,22 @@ lpfc_els_flush_cmd(struct lpfc_vport *vport) struct lpfc_sli_ring *pring; struct lpfc_iocbq *tmp_iocb, *piocb; IOCB_t *cmd = NULL; + unsigned long iflags = 0; lpfc_fabric_abort_vport(vport); + /* * For SLI3, only the hbalock is required. But SLI4 needs to coordinate * with the ring insert operation. Because lpfc_sli_issue_abort_iotag * ultimately grabs the ring_lock, the driver must splice the list into * a working list and release the locks before calli