diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-03-09 16:53:47 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-03-09 16:53:47 -0800 |
commit | 92fff53b7191cae566be9ca6752069426c7f8241 (patch) | |
tree | 019396be4719ad3969d0395cfa0a90860be75f4a /drivers/scsi/lpfc/lpfc_init.c | |
parent | a50243b1ddcdd766d0d17fbfeeb1a22e62fdc461 (diff) | |
parent | 26af1a368e40618d67956b1f883fbcfec292c5d8 (diff) |
Merge tag 'scsi-misc' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi
Pull SCSI updates from James Bottomley:
"This is mostly update of the usual drivers: arcmsr, qla2xxx, lpfc,
hisi_sas, target/iscsi and target/core.
Additionally Christoph refactored gdth as part of the dma changes. The
major mid-layer change this time is the removal of bidi commands and
with them the whole of the osd/exofs driver and filesystem. This is a
major simplification for block and mq in particular"
* tag 'scsi-misc' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi: (240 commits)
scsi: cxgb4i: validate tcp sequence number only if chip version <= T5
scsi: cxgb4i: get pf number from lldi->pf
scsi: core: replace GFP_ATOMIC with GFP_KERNEL in scsi_scan.c
scsi: mpt3sas: Add missing breaks in switch statements
scsi: aacraid: Fix missing break in switch statement
scsi: kill command serial number
scsi: csiostor: drop serial_number usage
scsi: mvumi: use request tag instead of serial_number
scsi: dpt_i2o: remove serial number usage
scsi: st: osst: Remove negative constant left-shifts
scsi: ufs-bsg: Allow reading descriptors
scsi: ufs: Allow reading descriptor via raw upiu
scsi: ufs-bsg: Change the calling convention for write descriptor
scsi: ufs: Remove unused device quirks
Revert "scsi: ufs: disable vccq if it's not needed by UFS device"
scsi: megaraid_sas: Remove a bunch of set but not used variables
scsi: clean obsolete return values of eh_timed_out
scsi: sd: Optimal I/O size should be a multiple of physical block size
scsi: MAINTAINERS: SCSI initiator and target tweaks
scsi: fcoe: make use of fip_mode enum complete
...
Diffstat (limited to 'drivers/scsi/lpfc/lpfc_init.c')
-rw-r--r-- | drivers/scsi/lpfc/lpfc_init.c | 2274 |
1 files changed, 1271 insertions, 1003 deletions
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index e1129260ed18..3b5873f6751e 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -1,7 +1,7 @@ /******************************************************************* * This file is part of the Emulex Linux Device Driver for * * Fibre Channel Host Bus Adapters. * - * Copyright (C) 2017-2018 Broadcom. All Rights Reserved. The term * + * Copyright (C) 2017-2019 Broadcom. All Rights Reserved. The term * * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Copyright (C) 2004-2016 Emulex. All rights reserved. * * EMULEX and SLI are trademarks of Emulex. * @@ -37,6 +37,7 @@ #include <linux/miscdevice.h> #include <linux/percpu.h> #include <linux/msi.h> +#include <linux/irq.h> #include <linux/bitops.h> #include <scsi/scsi.h> @@ -71,7 +72,6 @@ unsigned long _dump_buf_dif_order; spinlock_t _dump_buf_lock; /* Used when mapping IRQ vectors in a driver centric manner */ -uint16_t *lpfc_used_cpu; uint32_t lpfc_present_cpu; static void lpfc_get_hba_model_desc(struct lpfc_hba *, uint8_t *, uint8_t *); @@ -93,6 +93,8 @@ static void lpfc_sli4_cq_event_release_all(struct lpfc_hba *); static void lpfc_sli4_disable_intr(struct lpfc_hba *); static uint32_t lpfc_sli4_enable_intr(struct lpfc_hba *, uint32_t); static void lpfc_sli4_oas_verify(struct lpfc_hba *phba); +static uint16_t lpfc_find_eq_handle(struct lpfc_hba *, uint16_t); +static uint16_t lpfc_find_cpu_handle(struct lpfc_hba *, uint16_t, int); static struct scsi_transport_template *lpfc_transport_template = NULL; static struct scsi_transport_template *lpfc_vport_transport_template = NULL; @@ -1037,14 +1039,14 @@ lpfc_hba_down_post_s3(struct lpfc_hba *phba) static int lpfc_hba_down_post_s4(struct lpfc_hba *phba) { - struct lpfc_scsi_buf *psb, *psb_next; + struct lpfc_io_buf *psb, *psb_next; struct lpfc_nvmet_rcv_ctx *ctxp, *ctxp_next; + struct lpfc_sli4_hdw_queue *qp; LIST_HEAD(aborts); LIST_HEAD(nvme_aborts); LIST_HEAD(nvmet_aborts); - unsigned long iflag = 0; struct lpfc_sglq *sglq_entry = NULL; - int cnt; + int cnt, idx; lpfc_sli_hbqbuf_free_all(phba); @@ -1071,55 +1073,65 @@ lpfc_hba_down_post_s4(struct lpfc_hba *phba) spin_unlock(&phba->sli4_hba.sgl_list_lock); - /* abts_scsi_buf_list_lock required because worker thread uses this + + /* abts_xxxx_buf_list_lock required because worker thread uses this * list. */ - if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) { - spin_lock(&phba->sli4_hba.abts_scsi_buf_list_lock); - list_splice_init(&phba->sli4_hba.lpfc_abts_scsi_buf_list, - &aborts); - spin_unlock(&phba->sli4_hba.abts_scsi_buf_list_lock); - } - - if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) { - spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock); - list_splice_init(&phba->sli4_hba.lpfc_abts_nvme_buf_list, - &nvme_aborts); - list_splice_init(&phba->sli4_hba.lpfc_abts_nvmet_ctx_list, - &nvmet_aborts); - spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock); - } + cnt = 0; + for (idx = 0; idx < phba->cfg_hdw_queue; idx++) { + qp = &phba->sli4_hba.hdwq[idx]; - spin_unlock_irq(&phba->hbalock); - - list_for_each_entry_safe(psb, psb_next, &aborts, list) { - psb->pCmd = NULL; - psb->status = IOSTAT_SUCCESS; - } - spin_lock_irqsave(&phba->scsi_buf_list_put_lock, iflag); - list_splice(&aborts, &phba->lpfc_scsi_buf_list_put); - spin_unlock_irqrestore(&phba->scsi_buf_list_put_lock, iflag); + spin_lock(&qp->abts_scsi_buf_list_lock); + list_splice_init(&qp->lpfc_abts_scsi_buf_list, + &aborts); - if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) { - cnt = 0; - list_for_each_entry_safe(psb, psb_next, &nvme_aborts, list) { + list_for_each_entry_safe(psb, psb_next, &aborts, list) { psb->pCmd = NULL; psb->status = IOSTAT_SUCCESS; cnt++; } - spin_lock_irqsave(&phba->nvme_buf_list_put_lock, iflag); - phba->put_nvme_bufs += cnt; - list_splice(&nvme_aborts, &phba->lpfc_nvme_buf_list_put); - spin_unlock_irqrestore(&phba->nvme_buf_list_put_lock, iflag); + spin_lock(&qp->io_buf_list_put_lock); + list_splice_init(&aborts, &qp->lpfc_io_buf_list_put); + qp->put_io_bufs += qp->abts_scsi_io_bufs; + qp->abts_scsi_io_bufs = 0; + spin_unlock(&qp->io_buf_list_put_lock); + spin_unlock(&qp->abts_scsi_buf_list_lock); + + if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) { + spin_lock(&qp->abts_nvme_buf_list_lock); + list_splice_init(&qp->lpfc_abts_nvme_buf_list, + &nvme_aborts); + list_for_each_entry_safe(psb, psb_next, &nvme_aborts, + list) { + psb->pCmd = NULL; + psb->status = IOSTAT_SUCCESS; + cnt++; + } + spin_lock(&qp->io_buf_list_put_lock); + qp->put_io_bufs += qp->abts_nvme_io_bufs; + qp->abts_nvme_io_bufs = 0; + list_splice_init(&nvme_aborts, + &qp->lpfc_io_buf_list_put); + spin_unlock(&qp->io_buf_list_put_lock); + spin_unlock(&qp->abts_nvme_buf_list_lock); + + } + } + if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) { + spin_lock(&phba->sli4_hba.abts_nvmet_buf_list_lock); + list_splice_init(&phba->sli4_hba.lpfc_abts_nvmet_ctx_list, + &nvmet_aborts); + spin_unlock(&phba->sli4_hba.abts_nvmet_buf_list_lock); list_for_each_entry_safe(ctxp, ctxp_next, &nvmet_aborts, list) { ctxp->flag &= ~(LPFC_NVMET_XBUSY | LPFC_NVMET_ABORT_OP); lpfc_nvmet_ctxbuf_post(phba, ctxp->ctxbuf); } } + spin_unlock_irq(&phba->hbalock); lpfc_sli4_free_sp_events(phba); - return 0; + return cnt; } /** @@ -1239,6 +1251,96 @@ lpfc_hb_mbox_cmpl(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmboxq) return; } +static void +lpfc_hb_eq_delay_work(struct work_struct *work) +{ + struct lpfc_hba *phba = container_of(to_delayed_work(work), + struct lpfc_hba, eq_delay_work); + struct lpfc_eq_intr_info *eqi, *eqi_new; + struct lpfc_queue *eq, *eq_next; + unsigned char *eqcnt = NULL; + uint32_t usdelay; + int i; + + if (!phba->cfg_auto_imax || phba->pport->load_flag & FC_UNLOADING) + return; + + if (phba->link_state == LPFC_HBA_ERROR || + phba->pport->fc_flag & FC_OFFLINE_MODE) + goto requeue; + + eqcnt = kcalloc(num_possible_cpus(), sizeof(unsigned char), + GFP_KERNEL); + if (!eqcnt) + goto requeue; + + for (i = 0; i < phba->cfg_irq_chann; i++) { + eq = phba->sli4_hba.hdwq[i].hba_eq; + if (eq && eqcnt[eq->last_cpu] < 2) + eqcnt[eq->last_cpu]++; + continue; + } + + for_each_present_cpu(i) { + if (phba->cfg_irq_chann > 1 && eqcnt[i] < 2) + continue; + + eqi = per_cpu_ptr(phba->sli4_hba.eq_info, i); + + usdelay = (eqi->icnt / LPFC_IMAX_THRESHOLD) * + LPFC_EQ_DELAY_STEP; + if (usdelay > LPFC_MAX_AUTO_EQ_DELAY) + usdelay = LPFC_MAX_AUTO_EQ_DELAY; + + eqi->icnt = 0; + + list_for_each_entry_safe(eq, eq_next, &eqi->list, cpu_list) { + if (eq->last_cpu != i) { + eqi_new = per_cpu_ptr(phba->sli4_hba.eq_info, + eq->last_cpu); + list_move_tail(&eq->cpu_list, &eqi_new->list); + continue; + } + if (usdelay != eq->q_mode) + lpfc_modify_hba_eq_delay(phba, eq->hdwq, 1, + usdelay); + } + } + + kfree(eqcnt); + +requeue: + queue_delayed_work(phba->wq, &phba->eq_delay_work, + msecs_to_jiffies(LPFC_EQ_DELAY_MSECS)); +} + +/** + * lpfc_hb_mxp_handler - Multi-XRI pools handler to adjust XRI distribution + * @phba: pointer to lpfc hba data structure. + * + * For each heartbeat, this routine does some heuristic methods to adjust + * XRI distribution. The goal is to fully utilize free XRIs. + **/ +static void lpfc_hb_mxp_handler(struct lpfc_hba *phba) +{ + u32 i; + u32 hwq_count; + + hwq_count = phba->cfg_hdw_queue; + for (i = 0; i < hwq_count; i++) { + /* Adjust XRIs in private pool */ + lpfc_adjust_pvt_pool_count(phba, i); + + /* Adjust high watermark */ + lpfc_adjust_high_watermark(phba, i); + +#ifdef LPFC_MXP_STAT + /* Snapshot pbl, pvt and busy count */ + lpfc_snapshot_mxp(phba, i); +#endif + } +} + /** * lpfc_hb_timeout_handler - The HBA-timer timeout handler * @phba: pointer to lpfc hba data structure. @@ -1264,16 +1366,11 @@ lpfc_hb_timeout_handler(struct lpfc_hba *phba) int retval, i; struct lpfc_sli *psli = &phba->sli; LIST_HEAD(completions); - struct lpfc_queue *qp; - unsigned long time_elapsed; - uint32_t tick_cqe, max_cqe, val; - uint64_t tot, data1, data2, data3; - struct lpfc_nvmet_tgtport *tgtp; - struct lpfc_register reg_data; - struct nvme_fc_local_port *localport; - struct lpfc_nvme_lport *lport; - struct lpfc_nvme_ctrl_stat *cstat; - void __iomem *eqdreg = phba->sli4_hba.u.if_type2.EQDregaddr; + + if (phba->cfg_xri_rebalancing) { + /* Multi-XRI pools handler */ + lpfc_hb_mxp_handler(phba); + } vports = lpfc_create_vport_work_array(phba); if (vports != NULL) @@ -1288,107 +1385,6 @@ lpfc_hb_timeout_handler(struct lpfc_hba *phba) (phba->pport->fc_flag & FC_OFFLINE_MODE)) return; - if (phba->cfg_auto_imax) { - if (!phba->last_eqdelay_time) { - phba->last_eqdelay_time = jiffies; - goto skip_eqdelay; - } - time_elapsed = jiffies - phba->last_eqdelay_time; - phba->last_eqdelay_time = jiffies; - - tot = 0xffff; - /* Check outstanding IO count */ - if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) { - if (phba->nvmet_support) { - tgtp = phba->targetport->private; - /* Calculate outstanding IOs */ - tot = atomic_read(&tgtp->rcv_fcp_cmd_drop); - tot += atomic_read(&tgtp->xmt_fcp_release); - tot = atomic_read(&tgtp->rcv_fcp_cmd_in) - tot; - } else { - localport = phba->pport->localport; - if (!localport || !localport->private) - goto skip_eqdelay; - lport = (struct lpfc_nvme_lport *) - localport->private; - tot = 0; - for (i = 0; - i < phba->cfg_nvme_io_channel; i++) { - cstat = &lport->cstat[i]; - data1 = atomic_read( - &cstat->fc4NvmeInputRequests); - data2 = atomic_read( - &cstat->fc4NvmeOutputRequests); - data3 = atomic_read( - &cstat->fc4NvmeControlRequests); - tot += (data1 + data2 + data3); - tot -= atomic_read( - &cstat->fc4NvmeIoCmpls); - } - } - } - - /* Interrupts per sec per EQ */ - val = phba->cfg_fcp_imax / phba->io_channel_irqs; - tick_cqe = val / CONFIG_HZ; /* Per tick per EQ */ - - /* Assume 1 CQE/ISR, calc max CQEs allowed for time duration */ - max_cqe = time_elapsed * tick_cqe; - - for (i = 0; i < phba->io_channel_irqs; i++) { - /* Fast-path EQ */ - qp = phba->sli4_hba.hba_eq[i]; - if (!qp) - continue; - - /* Use no EQ delay if we don't have many outstanding - * IOs, or if we are only processing 1 CQE/ISR or less. - * Otherwise, assume we can process up to lpfc_fcp_imax - * interrupts per HBA. - */ - if (tot < LPFC_NODELAY_MAX_IO || - qp->EQ_cqe_cnt <= max_cqe) - val = 0; - else - val = phba->cfg_fcp_imax; - - if (phba->sli.sli_flag & LPFC_SLI_USE_EQDR) { - /* Use EQ Delay Register method */ - - /* Convert for EQ Delay register */ - if (val) { - /* First, interrupts per sec per EQ */ - val = phba->cfg_fcp_imax / - phba->io_channel_irqs; - - /* us delay between each interrupt */ - val = LPFC_SEC_TO_USEC / val; - } - if (val != qp->q_mode) { - reg_data.word0 = 0; - bf_set(lpfc_sliport_eqdelay_id, - ®_data, qp->queue_id); - bf_set(lpfc_sliport_eqdelay_delay, - ®_data, val); - writel(reg_data.word0, eqdreg); - } - } else { - /* Use mbox command method */ - if (val != qp->q_mode) - lpfc_modify_hba_eq_delay(phba, i, - 1, val); - } - - /* - * val is cfg_fcp_imax or 0 for mbox delay or us delay - * between interrupts for EQDR. - */ - qp->q_mode = val; - qp->EQ_cqe_cnt = 0; - } - } - -skip_eqdelay: spin_lock_irq(&phba->pport->work_port_lock); if (time_after(phba->last_completion_time + @@ -2943,7 +2939,9 @@ lpfc_sli4_stop_fcf_redisc_wait_timer(struct lpfc_hba *phba) void lpfc_stop_hba_timers(struct lpfc_hba *phba) { - lpfc_stop_vport_timers(phba->pport); + if (phba->pport) + lpfc_stop_vport_timers(phba->pport); + cancel_delayed_work_sync(&phba->eq_delay_work); del_timer_sync(&phba->sli.mbox_tmo); del_timer_sync(&phba->fabric_block_timer); del_timer_sync(&phba->eratt_poll); @@ -3071,6 +3069,242 @@ lpfc_sli4_node_prep(struct lpfc_hba *phba) } /** + * lpfc_create_expedite_pool - create expedite pool + * @phba: pointer to lpfc hba data structure. + * + * This routine moves a batch of XRIs from lpfc_io_buf_list_put of HWQ 0 + * to expedite pool. Mark them as expedite. + **/ +void lpfc_create_expedite_pool(struct lpfc_hba *phba) +{ + struct lpfc_sli4_hdw_queue *qp; + struct lpfc_io_buf *lpfc_ncmd; + struct lpfc_io_buf *lpfc_ncmd_next; + struct lpfc_epd_pool *epd_pool; + unsigned long iflag; + + epd_pool = &phba->epd_pool; + qp = &phba->sli4_hba.hdwq[0]; + + spin_lock_init(&epd_pool->lock); + spin_lock_irqsave(&qp->io_buf_list_put_lock, iflag); + spin_lock(&epd_pool->lock); + INIT_LIST_HEAD(&epd_pool->list); + list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next, + &qp->lpfc_io_buf_list_put, list) { + list_move_tail(&lpfc_ncmd->list, &epd_pool->list); + lpfc_ncmd->expedite = true; + qp->put_io_bufs--; + epd_pool->count++; + if (epd_pool->count >= XRI_BATCH) + break; + } + spin_unlock(&epd_pool->lock); + spin_unlock_irqrestore(&qp->io_buf_list_put_lock, iflag); +} + +/** + * lpfc_destroy_expedite_pool - destroy expedite pool + * @phba: pointer to lpfc hba data structure. + * + * This routine returns XRIs from expedite pool to lpfc_io_buf_list_put + * of HWQ 0. Clear the mark. + **/ +void lpfc_destroy_expedite_pool(struct lpfc_hba *phba) +{ + struct lpfc_sli4_hdw_queue *qp; + struct lpfc_io_buf *lpfc_ncmd; + struct lpfc_io_buf *lpfc_ncmd_next; + struct lpfc_epd_pool *epd_pool; + unsigned long iflag; + + epd_pool = &phba->epd_pool; + qp = &phba->sli4_hba.hdwq[0]; + + spin_lock_irqsave(&qp->io_buf_list_put_lock, iflag); + spin_lock(&epd_pool->lock); + list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next, + &epd_pool->list, list) { + list_move_tail(&lpfc_ncmd->list, + &qp->lpfc_io_buf_list_put); + lpfc_ncmd->flags = false; + qp->put_io_bufs++; + epd_pool->count--; + } + spin_unlock(&epd_pool->lock); + spin_unlock_irqrestore(&qp->io_buf_list_put_lock, iflag); +} + +/** + * lpfc_create_multixri_pools - create multi-XRI pools + * @phba: pointer to lpfc hba data structure. + * + * This routine initialize public, private per HWQ. Then, move XRIs from + * lpfc_io_buf_list_put to public pool. High and low watermark are also + * Initialized. + **/ +void lpfc_create_multixri_pools(struct lpfc_hba *phba) +{ + u32 i, j; + u32 hwq_count; + u32 count_per_hwq; + struct lpfc_io_buf *lpfc_ncmd; + struct lpfc_io_buf *lpfc_ncmd_next; + unsigned long iflag; + struct lpfc_sli4_hdw_queue *qp; + struct lpfc_multixri_pool *multixri_pool; + struct lpfc_pbl_pool *pbl_pool; + struct lpfc_pvt_pool *pvt_pool; + + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "1234 num_hdw_queue=%d num_present_cpu=%d common_xri_cnt=%d\n", + phba->cfg_hdw_queue, phba->sli4_hba.num_present_cpu, + phba->sli4_hba.io_xri_cnt); + + if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) + lpfc_create_expedite_pool(phba); + + hwq_count = phba->cfg_hdw_queue; + count_per_hwq = phba->sli4_hba.io_xri_cnt / hwq_count; + + for (i = 0; i < hwq_count; i++) { + multixri_pool = kzalloc(sizeof(*multixri_pool), GFP_KERNEL); + + if (!multixri_pool) { + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "1238 Failed to allocate memory for " + "multixri_pool\n"); + + if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) + lpfc_destroy_expedite_pool(phba); + + j = 0; + while (j < i) { + qp = &phba->sli4_hba.hdwq[j]; + kfree(qp->p_multixri_pool); + j++; + } + phba->cfg_xri_rebalancing = 0; + return; + } + + qp = &phba->sli4_hba.hdwq[i]; + qp->p_multixri_pool = multixri_pool; + + multixri_pool->xri_limit = count_per_hwq; + multixri_pool->rrb_next_hwqid = i; + + /* Deal with public free xri pool */ + pbl_pool = &multixri_pool->pbl_pool; + spin_lock_init(&pbl_pool->lock); + spin_lock_irqsave(&qp->io_buf_list_put_lock, iflag); + spin_lock(&pbl_pool->lock); + INIT_LIST_HEAD(&pbl_pool->list); + list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next, + &qp->lpfc_io_buf_list_put, list) { + list_move_tail(&lpfc_ncmd->list, &pbl_pool->list); + qp->put_io_bufs--; + pbl_pool->count++; + } + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "1235 Moved %d buffers from PUT list over to pbl_pool[%d]\n", + pbl_pool->count, i); + spin_unlock(&pbl_pool->lock); + spin_unlock_irqrestore(&qp->io_buf_list_put_lock, iflag); + + /* Deal with private free xri pool */ + pvt_pool = &multixri_pool->pvt_pool; + pvt_pool->high_watermark = multixri_pool->xri_limit / 2; + pvt_pool->low_watermark = XRI_BATCH; + spin_lock_init(&pvt_pool->lock); + spin_lock_irqsave(&pvt_pool->lock, iflag); + INIT_LIST_HEAD(&pvt_pool->list); + pvt_pool->count = 0; + spin_unlock_irqrestore(&pvt_pool->lock, iflag); + } +} + +/** + * lpfc_destroy_multixri_pools - destroy multi-XRI pools + * @phba: pointer to lpfc hba data structure. + * + * This routine returns XRIs from public/private to lpfc_io_buf_list_put. + **/ +void lpfc_destroy_multixri_pools(struct lpfc_hba *phba) +{ + u32 i; + u32 hwq_count; + struct lpfc_io_buf *lpfc_ncmd; + struct lpfc_io_buf *lpfc_ncmd_next; + unsigned long iflag; + struct lpfc_sli4_hdw_queue *qp; + struct lpfc_multixri_pool *multixri_pool; + struct lpfc_pbl_pool *pbl_pool; + struct lpfc_pvt_pool *pvt_pool; + + if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) + lpfc_destroy_expedite_pool(phba); + + hwq_count = phba->cfg_hdw_queue; + + for (i = 0; i < hwq_count; i++) { + qp = &phba->sli4_hba.hdwq[i]; + multixri_pool = qp->p_multixri_pool; + if (!multixri_pool) + continue; + + qp->p_multixri_pool = NULL; + + spin_lock_irqsave(&qp->io_buf_list_put_lock, iflag); + + /* Deal with public free xri pool */ + pbl_pool = &multixri_pool->pbl_pool; + spin_lock(&pbl_pool->lock); + + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "1236 Moving %d buffers from pbl_pool[%d] TO PUT list\n", + pbl_pool->count, i); + + list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next, + &pbl_pool->list, list) { + list_move_tail(&lpfc_ncmd->list, + &qp->lpfc_io_buf_list_put); + qp->put_io_bufs++; + pbl_pool->count--; + } + + INIT_LIST_HEAD(&pbl_pool->list); + pbl_pool->count = 0; + + spin_unlock(&pbl_pool->lock); + + /* Deal with private free xri pool */ + pvt_pool = &multixri_pool->pvt_pool; + spin_lock(&pvt_pool->lock); + + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "1237 Moving %d buffers from pvt_pool[%d] TO PUT list\n", + pvt_pool->count, i); + + list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next, + &pvt_pool->list, list) { + list_move_tail(&lpfc_ncmd->list, + &qp->lpfc_io_buf_list_put); + qp->put_io_bufs++; + pvt_pool->count--; + } + + INIT_LIST_HEAD(&pvt_pool->list); + pvt_pool->count = 0; + + spin_unlock(&pvt_pool->lock); + spin_unlock_irqrestore(&qp->io_buf_list_put_lock, iflag); + + kfree(multixri_pool); + } +} + +/** * lpfc_online - Initialize and bring a HBA online * @phba: pointer to lpfc hba data structure. * @@ -3152,6 +3386,9 @@ lpfc_online(struct lpfc_hba *phba) } lpfc_destroy_vport_work_array(phba, vports); + if (phba->cfg_xri_rebalancing) + lpfc_create_multixri_pools(phba); + lpfc_unblock_mgmt_io(phba); return 0; } @@ -3310,6 +3547,9 @@ lpfc_offline(struct lpfc_hba *phba) spin_unlock_irq(shost->host_lock); } lpfc_destroy_vport_work_array(phba, vports); + + if (phba->cfg_xri_rebalancing) + lpfc_destroy_multixri_pools(phba); } /** @@ -3323,7 +3563,7 @@ lpfc_offline(struct lpfc_hba *phba) static void lpfc_scsi_free(struct lpfc_hba *phba) { - struct lpfc_scsi_buf *sb, *sb_next; + struct lpfc_io_buf *sb, *sb_next; if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP)) return; @@ -3355,50 +3595,57 @@ lpfc_scsi_free(struct lpfc_hba *phba) spin_unlock(&phba->scsi_buf_list_get_lock); spin_unlock_irq(&phba->hbalock); } + /** - * lpfc_nvme_free - Free all the NVME buffers and IOCBs from driver lists + * lpfc_io_free - Free all the IO buffers and IOCBs from driver lists * @phba: pointer to lpfc hba data structure. * - * This routine is to free all the NVME buffers and IOCBs from the driver + * This routine is to free all the IO buffers and IOCBs from the driver * list back to kernel. It is called from lpfc_pci_remove_one to free * the internal resources before the device is removed from the system. **/ -static void -lpfc_nvme_free(struct lpfc_hba *phba) +void +lpfc_io_free(struct lpfc_hba *phba) { - struct lpfc_nvme_buf *lpfc_ncmd, *lpfc_ncmd_next; - - if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)) - return; + struct lpfc_io_buf *lpfc_ncmd, *lpfc_ncmd_next; + struct lpfc_sli4_hdw_queue *qp; + int idx; spin_lock_irq(&phba->hbalock); - /* Release all the lpfc_nvme_bufs maintained by this host. */ - spin_lock(&phba->nvme_buf_list_put_lock); - list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next, - &phba->lpfc_nvme_buf_list_put, list) { - list_del(&lpfc_ncmd->list); - phba->put_nvme_bufs--; - dma_pool_free(phba->lpfc_sg_dma_buf_pool, lpfc_ncmd->data, - lpfc_ncmd->dma_handle); - kfree(lpfc_ncmd); - phba->total_nvme_bufs--; + for (idx = 0; idx < phba->cfg_hdw_queue; idx++) { + qp = &phba->sli4_hba.hdwq[idx]; + /* Release all the lpfc_nvme_bufs maintained by this host. */ + spin_lock(&qp->io_buf_list_put_lock); + list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next, + &qp->lpfc_io_buf_list_put, + list) { + list_del(&lpfc_ncmd->list); + qp->put_io_bufs--; + dma_pool_free(phba->lpfc_sg_dma_buf_pool, + lpfc_ncmd->data, lpfc_ncmd->dma_handle); + kfree(lpfc_ncmd); + qp->total_io_bufs--; + } + spin_unlock(&qp->io_buf_list_put_lock); + + spin_lock(&qp->io_buf_list_get_lock); + list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next, + &qp->lpfc_io_buf_list_get, + list) { + list_del(&lpfc_ncmd->list); + qp->get_io_bufs--; + dma_pool_free(phba->lpfc_sg_dma_buf_pool, + lpfc_ncmd->data, lpfc_ncmd->dma_handle); + kfree(lpfc_ncmd); + qp->total_io_bufs--; + } + spin_unlock(&qp->io_buf_list_get_lock); } - spin_unlock(&phba->nvme_buf_list_put_lock); - spin_lock(&phba->nvme_buf_list_get_lock); - list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next, - &phba->lpfc_nvme_buf_list_get, list) { - list_del(&lpfc_ncmd->list); - phba->get_nvme_bufs--; - dma_pool_free(phba->lpfc_sg_dma_buf_pool, lpfc_ncmd->data, - lpfc_ncmd->dma_handle); - kfree(lpfc_ncmd); - phba->total_nvme_bufs--; - } - spin_unlock(&phba->nvme_buf_list_get_lock); spin_unlock_irq(&phba->hbalock); } + /** * lpfc_sli4_els_sgl_update - update ELS xri-sgl sizing and mapping * @phba: pointer to lpfc hba data structure. @@ -3640,8 +3887,102 @@ out_free_mem: return rc; } +int +lpfc_io_buf_flush(struct lpfc_hba *phba, struct list_head *cbuf) +{ + LIST_HEAD(blist); + struct lpfc_sli4_hdw_queue *qp; + struct lpfc_io_buf *lpfc_cmd; + struct lpfc_io_buf *iobufp, *prev_iobufp; + int idx, cnt, xri, inserted; + + cnt = 0; + for (idx = 0; idx < phba->cfg_hdw_queue; idx++) { + qp = &phba->sli4_hba.hdwq[idx]; + spin_lock_irq(&qp->io_buf_list_get_lock); + spin_lock(&qp->io_buf_list_put_lock); + + /* Take everything off the get and put lists */ + list_splice_init(&qp->lpfc_io_buf_list_get, &blist); + list_splice(&qp->lpfc_io_buf_list_put, &blist); + INIT_LIST_HEAD(&qp->lpfc_io_buf_list_get); + INIT_LIST_HEAD(&qp->lpfc_io_buf_list_put); + cnt += qp->get_io_bufs + qp->put_io_bufs; + qp->get_io_bufs = 0; + qp->put_io_bufs = 0; + qp->total_io_bufs = 0; + spin_unlock(&qp->io_buf_list_put_lock); + spin_unlock_irq(&qp->io_buf_list_get_lock); + } + + /* + * Take IO buffers off blist and put on cbuf sorted by XRI. + * This is because POST_SGL takes a sequential range of XRIs + * to post to the firmware. + */ + for (idx = 0; idx < cnt; idx++) { + list_remove_head(&blist, lpfc_cmd, struct lpfc_io_buf, list); + if (!lpfc_cmd) + return cnt; + if (idx == 0) { + list_add_tail(&lpfc_cmd->list, cbuf); + continue; + } + xri = lpfc_cmd->cur_iocbq.sli4_xritag; + inserted = 0; + prev_iobufp = NULL; + list_for_each_entry(iobufp, cbuf, list) { + if (xri < iobufp->cur_iocbq.sli4_xritag) { + if (prev_iobufp) + list_add(&lpfc_cmd->list, + &prev_iobufp->list); + else + list_add(&lpfc_cmd->list, cbuf); + inserted = 1; + break; + } + prev_iobufp = iobufp; + } + if (!inserted) + list_add_tail(&lpfc_cmd->list, cbuf); + } + return cnt; +} + +int +lpfc_io_buf_replenish(struct lpfc_hba *phba, struct list_head *cbuf) +{ + struct lpfc_sli4_hdw_queue *qp; + struct lpfc_io_buf *lpfc_cmd; + int idx, cnt; + + qp = phba->sli4_hba.hdwq; + cnt = 0; + while (!list_empty(cbuf)) { + for (idx = 0; idx < phba->cfg_hdw_queue; idx++) { + list_remove_head(cbuf, lpfc_cmd, + struct lpfc_io_buf, list); + if (!lpfc_cmd) + return cnt; + cnt++; + qp = &phba->sli4_hba.hdwq[idx]; + lpfc_cmd->hdwq_no = idx; + lpfc_cmd->hdwq = qp; + lpfc_cmd->cur_iocbq.wqe_cmpl = NULL; + lpfc_cmd->cur_iocbq.iocb_cmpl = NULL; + spin_lock(&qp->io_buf_list_put_lock); + list_add_tail(&lpfc_cmd->list, + &qp->lpfc_io_buf_list_put); + qp->put_io_bufs++; + qp->total_io_bufs++; + spin_unlock(&qp->io_buf_list_put_lock); + } + } + return cnt; +} + /** - * lpfc_sli4_scsi_sgl_update - update xri-sgl sizing and mapping + * lpfc_sli4_io_sgl_update - update xri-sgl sizing and mapping * @phba: pointer to lpfc hba data structure. * * This routine first calculates the sizes of the current els and allocated @@ -3653,94 +3994,192 @@ out_free_mem: * 0 - successful (for now, it always returns 0) **/ int -lpfc_sli4_scsi_sgl_update(struct lpfc_hba *phba) +lpfc_sli4_io_sgl_update(struct lpfc_hba *phba) { - struct lpfc_scsi_buf *psb, *psb_next; - uint16_t i, lxri, els_xri_cnt, scsi_xri_cnt; - LIST_HEAD(scsi_sgl_list); - int rc; - - /* - * update on pci function's els xri-sgl list - */ - els_xri_cnt = lpfc_sli4_get_els_iocb_cnt(phba); - phba->total_scsi_bufs = 0; + struct lpfc_io_buf *lpfc_ncmd = NULL, *lpfc_ncmd_next = NULL; + uint16_t i, lxri, els_xri_cnt; + uint16_t io_xri_cnt, io_xri_max; + LIST_HEAD(io_sgl_list); + int rc, cnt; /* - * update on pci function's allocated scsi xri-sgl list + * update on pci function's allocated nvme xri-sgl list */ - /* maximum number of xris available for scsi buffers */ - phba->sli4_hba.scsi_xri_max = phba->sli4_hba.max_cfg_param.max_xri - - els_xri_cnt; - if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP)) - return 0; + /* maximum number of xris available for nvme buffers */ + els_xri_cnt = lpfc_sli4_get_els_iocb_cnt(phba); + io_xri_max = phba->sli4_hba.max_cfg_param.max_xri - els_xri_cnt; + phba->sli4_hba.io_xri_max = io_xri_max; - if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) - phba->sli4_hba.scsi_xri_max = /* Split them up */ - (phba->sli4_hba.scsi_xri_max * - phba->cfg_xri_split) / 100; + lpfc_printf_log(phba, KERN_INFO, LOG_SLI, + "6074 Current allocated XRI sgl count:%d, " + "maximum XRI count:%d\n", + phba->sli4_hba.io_xri_cnt, + phba->sli4_hba.io_xri_max); - spin_lock_irq(&phba->scsi_buf_list_get_lock); - spin_lock(&phba->scsi_buf_list_put_lock); - list_splice_init(&phba->lpfc_scsi_buf_list_get, &scsi_sgl_list); - list_splice(&phba->lpfc_scsi_buf_list_put, &scsi_sgl_list); - spin_unlock(&phba->scsi_buf_list_put_lock); - spin_unlock_irq(&phba->scsi_buf_list_get_lock); + cnt = lpfc_io_buf_flush(phba, &io_sgl_list); - lpfc_printf_log(phba, KERN_INFO, LOG_SLI, - "6060 Current allocated SCSI xri-sgl count:%d, " - "maximum SCSI xri count:%d (split:%d)\n", - phba->sli4_hba.scsi_xri_cnt, - phba->sli4_hba.scsi_xri_max, phba->cfg_xri_split); - - if (phba->sli4_hba.scsi_xri_cnt > phba->sli4_hba.scsi_xri_max) { - /* max scsi xri shrinked below the allocated scsi buffers */ - scsi_xri_cnt = phba->sli4_hba.scsi_xri_cnt - - phba->sli4_hba.scsi_xri_max; - /* release the extra allocated scsi buffers */ - for (i = 0; i < scsi_xri_cnt; i++) { - list_remove_head(&scsi_sgl_list, psb, - struct lpfc_scsi_buf, list); - if (psb) { + if (phba->sli4_hba.io_xri_cnt > phba->sli4_hba.io_xri_max) { + /* max nvme xri shrunk below the allocated nvme buffers */ + io_xri_cnt = phba->sli4_hba.io_xri_cnt - + phba->sli4_hba.io_xri_max; + /* release the extra allocated nvme buffers */ + for (i = 0; i < io_xri_cnt; i++) { + list_remove_head(&io_sgl_list, lpfc_ncmd, + struct lpfc_io_buf, list); + if (lpfc_ncmd) { dma_pool_free(phba->lpfc_sg_dma_buf_pool, - psb->data, psb->dma_handle); - kfree(psb); + lpfc_ncmd->data, + lpfc_ncmd->dma_handle); + kfree(lpfc_ncmd); } } - spin_lock_irq(&phba->scsi_buf_list_get_lock); - phba->sli4_hba.scsi_xri_cnt -= scsi_xri_cnt; - spin_unlock_irq(&phba->scsi_buf_list_get_lock); + phba->sli4_hba.io_xri_cnt -= io_xri_cnt; } - /* update xris associated to remaining allocated scsi buffers */ - psb = NULL; - psb_next = NULL; - list_for_each_entry_safe(psb, psb_next, &scsi_sgl_list, list) { + /* update xris associated to remaining allocated nvme buffers */ + lpfc_ncmd = NULL; + lpfc_ncmd_next = NULL; + phba->sli4_hba.io_xri_cnt = cnt; + list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next, + &io_sgl_list, list) { lxri = lpfc_sli4_next_xritag(phba); if (lxri == NO_XRI) { lpfc_printf_log(phba, KERN_ERR, LOG_SLI, - "2560 Failed to allocate xri for " - "scsi buffer\n"); + "6075 Failed to allocate xri for " + "nvme buffer\n"); rc = -ENOMEM; goto out_free_mem; } - psb->cur_iocbq.sli4_lxritag = lxri; - psb->cur_iocbq.sli4_xritag = phba->sli4_hba.xri_ids[lxri]; + lpfc_ncmd->cur_iocbq.sli4_lxritag = lxri; + lpfc_ncmd->cur_iocbq.sli4_xritag = phba->sli4_hba.xri_ids[lxri]; } - spin_lock_irq(&phba->scsi_buf_list_get_lock); - spin_lock(&phba->scsi_buf_list_put_lock); - list_splice_init(&scsi_sgl_list, &phba->lpfc_scsi_buf_list_get); - INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list_put); - spin_unlock(&phba->scsi_buf_list_put_lock); - spin_unlock_irq(&phba->scsi_buf_list_get_lock); + cnt = lpfc_io_buf_replenish(phba, &io_sgl_list); return 0; out_free_mem: - lpfc_scsi_free(phba); + lpfc_io_free(phba); return rc; } +/** + * lpfc_new_io_buf - IO buffer allocator for HBA with SLI4 IF spec + * @vport: The virtual port for which this call being executed. + * @num_to_allocate: The requested number of buffers to allocate. + * + * This routine allocates nvme buffers for device with SLI-4 interface spec, + * the nvme buffer contains all the necessary information needed to initiate + * an I/O. After allocating up to @num_to_allocate IO buffers and put + * them on a list, it post them to the port by using SGL block post. + * + * Return codes: + * int - number of IO buffers that were allocated and posted. + * 0 = failure, less than num_to_alloc is a partial failure. + **/ +int +lpfc_new_io_buf(struct lpfc_hba *phba, int num_to_alloc) +{ + struct lpfc_io_buf *lpfc_ncmd; + struct lpfc_iocbq *pwqeq; + uint16_t iotag, lxri = 0; + int bcnt, num_posted; + LIST_HEAD(prep_nblist); + LIST_HEAD(post_nblist); + LIST_HEAD(nvme_nblist); + + /* Sanity check to ensure our sizing is right for both SCSI and NVME */ + if (sizeof(struct lpfc_io_buf) > LPFC_COMMON_IO_BUF_SZ) { + lpfc_printf_log(phba, KERN_ERR, LOG_FCP, + "6426 Common buffer size %ld exceeds %d\n", + sizeof(struct lpfc_io_buf), + LPFC_COMMON_IO_BUF_SZ); + return 0; + } + + phba->sli4_hba.io_xri_cnt = 0; + for (bcnt = 0; bcnt < num_to_alloc; bcnt++) { + lpfc_ncmd = kzalloc(LPFC_COMMON_IO_BUF_SZ, GFP_KERNEL); + if (!lpfc_ncmd) |