summaryrefslogtreecommitdiffstats
path: root/drivers/scsi/lpfc/lpfc_init.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-03-09 16:53:47 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2019-03-09 16:53:47 -0800
commit92fff53b7191cae566be9ca6752069426c7f8241 (patch)
tree019396be4719ad3969d0395cfa0a90860be75f4a /drivers/scsi/lpfc/lpfc_init.c
parenta50243b1ddcdd766d0d17fbfeeb1a22e62fdc461 (diff)
parent26af1a368e40618d67956b1f883fbcfec292c5d8 (diff)
Merge tag 'scsi-misc' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi
Pull SCSI updates from James Bottomley: "This is mostly update of the usual drivers: arcmsr, qla2xxx, lpfc, hisi_sas, target/iscsi and target/core. Additionally Christoph refactored gdth as part of the dma changes. The major mid-layer change this time is the removal of bidi commands and with them the whole of the osd/exofs driver and filesystem. This is a major simplification for block and mq in particular" * tag 'scsi-misc' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi: (240 commits) scsi: cxgb4i: validate tcp sequence number only if chip version <= T5 scsi: cxgb4i: get pf number from lldi->pf scsi: core: replace GFP_ATOMIC with GFP_KERNEL in scsi_scan.c scsi: mpt3sas: Add missing breaks in switch statements scsi: aacraid: Fix missing break in switch statement scsi: kill command serial number scsi: csiostor: drop serial_number usage scsi: mvumi: use request tag instead of serial_number scsi: dpt_i2o: remove serial number usage scsi: st: osst: Remove negative constant left-shifts scsi: ufs-bsg: Allow reading descriptors scsi: ufs: Allow reading descriptor via raw upiu scsi: ufs-bsg: Change the calling convention for write descriptor scsi: ufs: Remove unused device quirks Revert "scsi: ufs: disable vccq if it's not needed by UFS device" scsi: megaraid_sas: Remove a bunch of set but not used variables scsi: clean obsolete return values of eh_timed_out scsi: sd: Optimal I/O size should be a multiple of physical block size scsi: MAINTAINERS: SCSI initiator and target tweaks scsi: fcoe: make use of fip_mode enum complete ...
Diffstat (limited to 'drivers/scsi/lpfc/lpfc_init.c')
-rw-r--r--drivers/scsi/lpfc/lpfc_init.c2274
1 files changed, 1271 insertions, 1003 deletions
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index e1129260ed18..3b5873f6751e 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -1,7 +1,7 @@
/*******************************************************************
* This file is part of the Emulex Linux Device Driver for *
* Fibre Channel Host Bus Adapters. *
- * Copyright (C) 2017-2018 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2019 Broadcom. All Rights Reserved. The term *
* “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. *
* Copyright (C) 2004-2016 Emulex. All rights reserved. *
* EMULEX and SLI are trademarks of Emulex. *
@@ -37,6 +37,7 @@
#include <linux/miscdevice.h>
#include <linux/percpu.h>
#include <linux/msi.h>
+#include <linux/irq.h>
#include <linux/bitops.h>
#include <scsi/scsi.h>
@@ -71,7 +72,6 @@ unsigned long _dump_buf_dif_order;
spinlock_t _dump_buf_lock;
/* Used when mapping IRQ vectors in a driver centric manner */
-uint16_t *lpfc_used_cpu;
uint32_t lpfc_present_cpu;
static void lpfc_get_hba_model_desc(struct lpfc_hba *, uint8_t *, uint8_t *);
@@ -93,6 +93,8 @@ static void lpfc_sli4_cq_event_release_all(struct lpfc_hba *);
static void lpfc_sli4_disable_intr(struct lpfc_hba *);
static uint32_t lpfc_sli4_enable_intr(struct lpfc_hba *, uint32_t);
static void lpfc_sli4_oas_verify(struct lpfc_hba *phba);
+static uint16_t lpfc_find_eq_handle(struct lpfc_hba *, uint16_t);
+static uint16_t lpfc_find_cpu_handle(struct lpfc_hba *, uint16_t, int);
static struct scsi_transport_template *lpfc_transport_template = NULL;
static struct scsi_transport_template *lpfc_vport_transport_template = NULL;
@@ -1037,14 +1039,14 @@ lpfc_hba_down_post_s3(struct lpfc_hba *phba)
static int
lpfc_hba_down_post_s4(struct lpfc_hba *phba)
{
- struct lpfc_scsi_buf *psb, *psb_next;
+ struct lpfc_io_buf *psb, *psb_next;
struct lpfc_nvmet_rcv_ctx *ctxp, *ctxp_next;
+ struct lpfc_sli4_hdw_queue *qp;
LIST_HEAD(aborts);
LIST_HEAD(nvme_aborts);
LIST_HEAD(nvmet_aborts);
- unsigned long iflag = 0;
struct lpfc_sglq *sglq_entry = NULL;
- int cnt;
+ int cnt, idx;
lpfc_sli_hbqbuf_free_all(phba);
@@ -1071,55 +1073,65 @@ lpfc_hba_down_post_s4(struct lpfc_hba *phba)
spin_unlock(&phba->sli4_hba.sgl_list_lock);
- /* abts_scsi_buf_list_lock required because worker thread uses this
+
+ /* abts_xxxx_buf_list_lock required because worker thread uses this
* list.
*/
- if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) {
- spin_lock(&phba->sli4_hba.abts_scsi_buf_list_lock);
- list_splice_init(&phba->sli4_hba.lpfc_abts_scsi_buf_list,
- &aborts);
- spin_unlock(&phba->sli4_hba.abts_scsi_buf_list_lock);
- }
-
- if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
- spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock);
- list_splice_init(&phba->sli4_hba.lpfc_abts_nvme_buf_list,
- &nvme_aborts);
- list_splice_init(&phba->sli4_hba.lpfc_abts_nvmet_ctx_list,
- &nvmet_aborts);
- spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock);
- }
+ cnt = 0;
+ for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
+ qp = &phba->sli4_hba.hdwq[idx];
- spin_unlock_irq(&phba->hbalock);
-
- list_for_each_entry_safe(psb, psb_next, &aborts, list) {
- psb->pCmd = NULL;
- psb->status = IOSTAT_SUCCESS;
- }
- spin_lock_irqsave(&phba->scsi_buf_list_put_lock, iflag);
- list_splice(&aborts, &phba->lpfc_scsi_buf_list_put);
- spin_unlock_irqrestore(&phba->scsi_buf_list_put_lock, iflag);
+ spin_lock(&qp->abts_scsi_buf_list_lock);
+ list_splice_init(&qp->lpfc_abts_scsi_buf_list,
+ &aborts);
- if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
- cnt = 0;
- list_for_each_entry_safe(psb, psb_next, &nvme_aborts, list) {
+ list_for_each_entry_safe(psb, psb_next, &aborts, list) {
psb->pCmd = NULL;
psb->status = IOSTAT_SUCCESS;
cnt++;
}
- spin_lock_irqsave(&phba->nvme_buf_list_put_lock, iflag);
- phba->put_nvme_bufs += cnt;
- list_splice(&nvme_aborts, &phba->lpfc_nvme_buf_list_put);
- spin_unlock_irqrestore(&phba->nvme_buf_list_put_lock, iflag);
+ spin_lock(&qp->io_buf_list_put_lock);
+ list_splice_init(&aborts, &qp->lpfc_io_buf_list_put);
+ qp->put_io_bufs += qp->abts_scsi_io_bufs;
+ qp->abts_scsi_io_bufs = 0;
+ spin_unlock(&qp->io_buf_list_put_lock);
+ spin_unlock(&qp->abts_scsi_buf_list_lock);
+
+ if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
+ spin_lock(&qp->abts_nvme_buf_list_lock);
+ list_splice_init(&qp->lpfc_abts_nvme_buf_list,
+ &nvme_aborts);
+ list_for_each_entry_safe(psb, psb_next, &nvme_aborts,
+ list) {
+ psb->pCmd = NULL;
+ psb->status = IOSTAT_SUCCESS;
+ cnt++;
+ }
+ spin_lock(&qp->io_buf_list_put_lock);
+ qp->put_io_bufs += qp->abts_nvme_io_bufs;
+ qp->abts_nvme_io_bufs = 0;
+ list_splice_init(&nvme_aborts,
+ &qp->lpfc_io_buf_list_put);
+ spin_unlock(&qp->io_buf_list_put_lock);
+ spin_unlock(&qp->abts_nvme_buf_list_lock);
+
+ }
+ }
+ if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
+ spin_lock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
+ list_splice_init(&phba->sli4_hba.lpfc_abts_nvmet_ctx_list,
+ &nvmet_aborts);
+ spin_unlock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
list_for_each_entry_safe(ctxp, ctxp_next, &nvmet_aborts, list) {
ctxp->flag &= ~(LPFC_NVMET_XBUSY | LPFC_NVMET_ABORT_OP);
lpfc_nvmet_ctxbuf_post(phba, ctxp->ctxbuf);
}
}
+ spin_unlock_irq(&phba->hbalock);
lpfc_sli4_free_sp_events(phba);
- return 0;
+ return cnt;
}
/**
@@ -1239,6 +1251,96 @@ lpfc_hb_mbox_cmpl(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmboxq)
return;
}
+static void
+lpfc_hb_eq_delay_work(struct work_struct *work)
+{
+ struct lpfc_hba *phba = container_of(to_delayed_work(work),
+ struct lpfc_hba, eq_delay_work);
+ struct lpfc_eq_intr_info *eqi, *eqi_new;
+ struct lpfc_queue *eq, *eq_next;
+ unsigned char *eqcnt = NULL;
+ uint32_t usdelay;
+ int i;
+
+ if (!phba->cfg_auto_imax || phba->pport->load_flag & FC_UNLOADING)
+ return;
+
+ if (phba->link_state == LPFC_HBA_ERROR ||
+ phba->pport->fc_flag & FC_OFFLINE_MODE)
+ goto requeue;
+
+ eqcnt = kcalloc(num_possible_cpus(), sizeof(unsigned char),
+ GFP_KERNEL);
+ if (!eqcnt)
+ goto requeue;
+
+ for (i = 0; i < phba->cfg_irq_chann; i++) {
+ eq = phba->sli4_hba.hdwq[i].hba_eq;
+ if (eq && eqcnt[eq->last_cpu] < 2)
+ eqcnt[eq->last_cpu]++;
+ continue;
+ }
+
+ for_each_present_cpu(i) {
+ if (phba->cfg_irq_chann > 1 && eqcnt[i] < 2)
+ continue;
+
+ eqi = per_cpu_ptr(phba->sli4_hba.eq_info, i);
+
+ usdelay = (eqi->icnt / LPFC_IMAX_THRESHOLD) *
+ LPFC_EQ_DELAY_STEP;
+ if (usdelay > LPFC_MAX_AUTO_EQ_DELAY)
+ usdelay = LPFC_MAX_AUTO_EQ_DELAY;
+
+ eqi->icnt = 0;
+
+ list_for_each_entry_safe(eq, eq_next, &eqi->list, cpu_list) {
+ if (eq->last_cpu != i) {
+ eqi_new = per_cpu_ptr(phba->sli4_hba.eq_info,
+ eq->last_cpu);
+ list_move_tail(&eq->cpu_list, &eqi_new->list);
+ continue;
+ }
+ if (usdelay != eq->q_mode)
+ lpfc_modify_hba_eq_delay(phba, eq->hdwq, 1,
+ usdelay);
+ }
+ }
+
+ kfree(eqcnt);
+
+requeue:
+ queue_delayed_work(phba->wq, &phba->eq_delay_work,
+ msecs_to_jiffies(LPFC_EQ_DELAY_MSECS));
+}
+
+/**
+ * lpfc_hb_mxp_handler - Multi-XRI pools handler to adjust XRI distribution
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * For each heartbeat, this routine does some heuristic methods to adjust
+ * XRI distribution. The goal is to fully utilize free XRIs.
+ **/
+static void lpfc_hb_mxp_handler(struct lpfc_hba *phba)
+{
+ u32 i;
+ u32 hwq_count;
+
+ hwq_count = phba->cfg_hdw_queue;
+ for (i = 0; i < hwq_count; i++) {
+ /* Adjust XRIs in private pool */
+ lpfc_adjust_pvt_pool_count(phba, i);
+
+ /* Adjust high watermark */
+ lpfc_adjust_high_watermark(phba, i);
+
+#ifdef LPFC_MXP_STAT
+ /* Snapshot pbl, pvt and busy count */
+ lpfc_snapshot_mxp(phba, i);
+#endif
+ }
+}
+
/**
* lpfc_hb_timeout_handler - The HBA-timer timeout handler
* @phba: pointer to lpfc hba data structure.
@@ -1264,16 +1366,11 @@ lpfc_hb_timeout_handler(struct lpfc_hba *phba)
int retval, i;
struct lpfc_sli *psli = &phba->sli;
LIST_HEAD(completions);
- struct lpfc_queue *qp;
- unsigned long time_elapsed;
- uint32_t tick_cqe, max_cqe, val;
- uint64_t tot, data1, data2, data3;
- struct lpfc_nvmet_tgtport *tgtp;
- struct lpfc_register reg_data;
- struct nvme_fc_local_port *localport;
- struct lpfc_nvme_lport *lport;
- struct lpfc_nvme_ctrl_stat *cstat;
- void __iomem *eqdreg = phba->sli4_hba.u.if_type2.EQDregaddr;
+
+ if (phba->cfg_xri_rebalancing) {
+ /* Multi-XRI pools handler */
+ lpfc_hb_mxp_handler(phba);
+ }
vports = lpfc_create_vport_work_array(phba);
if (vports != NULL)
@@ -1288,107 +1385,6 @@ lpfc_hb_timeout_handler(struct lpfc_hba *phba)
(phba->pport->fc_flag & FC_OFFLINE_MODE))
return;
- if (phba->cfg_auto_imax) {
- if (!phba->last_eqdelay_time) {
- phba->last_eqdelay_time = jiffies;
- goto skip_eqdelay;
- }
- time_elapsed = jiffies - phba->last_eqdelay_time;
- phba->last_eqdelay_time = jiffies;
-
- tot = 0xffff;
- /* Check outstanding IO count */
- if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
- if (phba->nvmet_support) {
- tgtp = phba->targetport->private;
- /* Calculate outstanding IOs */
- tot = atomic_read(&tgtp->rcv_fcp_cmd_drop);
- tot += atomic_read(&tgtp->xmt_fcp_release);
- tot = atomic_read(&tgtp->rcv_fcp_cmd_in) - tot;
- } else {
- localport = phba->pport->localport;
- if (!localport || !localport->private)
- goto skip_eqdelay;
- lport = (struct lpfc_nvme_lport *)
- localport->private;
- tot = 0;
- for (i = 0;
- i < phba->cfg_nvme_io_channel; i++) {
- cstat = &lport->cstat[i];
- data1 = atomic_read(
- &cstat->fc4NvmeInputRequests);
- data2 = atomic_read(
- &cstat->fc4NvmeOutputRequests);
- data3 = atomic_read(
- &cstat->fc4NvmeControlRequests);
- tot += (data1 + data2 + data3);
- tot -= atomic_read(
- &cstat->fc4NvmeIoCmpls);
- }
- }
- }
-
- /* Interrupts per sec per EQ */
- val = phba->cfg_fcp_imax / phba->io_channel_irqs;
- tick_cqe = val / CONFIG_HZ; /* Per tick per EQ */
-
- /* Assume 1 CQE/ISR, calc max CQEs allowed for time duration */
- max_cqe = time_elapsed * tick_cqe;
-
- for (i = 0; i < phba->io_channel_irqs; i++) {
- /* Fast-path EQ */
- qp = phba->sli4_hba.hba_eq[i];
- if (!qp)
- continue;
-
- /* Use no EQ delay if we don't have many outstanding
- * IOs, or if we are only processing 1 CQE/ISR or less.
- * Otherwise, assume we can process up to lpfc_fcp_imax
- * interrupts per HBA.
- */
- if (tot < LPFC_NODELAY_MAX_IO ||
- qp->EQ_cqe_cnt <= max_cqe)
- val = 0;
- else
- val = phba->cfg_fcp_imax;
-
- if (phba->sli.sli_flag & LPFC_SLI_USE_EQDR) {
- /* Use EQ Delay Register method */
-
- /* Convert for EQ Delay register */
- if (val) {
- /* First, interrupts per sec per EQ */
- val = phba->cfg_fcp_imax /
- phba->io_channel_irqs;
-
- /* us delay between each interrupt */
- val = LPFC_SEC_TO_USEC / val;
- }
- if (val != qp->q_mode) {
- reg_data.word0 = 0;
- bf_set(lpfc_sliport_eqdelay_id,
- &reg_data, qp->queue_id);
- bf_set(lpfc_sliport_eqdelay_delay,
- &reg_data, val);
- writel(reg_data.word0, eqdreg);
- }
- } else {
- /* Use mbox command method */
- if (val != qp->q_mode)
- lpfc_modify_hba_eq_delay(phba, i,
- 1, val);
- }
-
- /*
- * val is cfg_fcp_imax or 0 for mbox delay or us delay
- * between interrupts for EQDR.
- */
- qp->q_mode = val;
- qp->EQ_cqe_cnt = 0;
- }
- }
-
-skip_eqdelay:
spin_lock_irq(&phba->pport->work_port_lock);
if (time_after(phba->last_completion_time +
@@ -2943,7 +2939,9 @@ lpfc_sli4_stop_fcf_redisc_wait_timer(struct lpfc_hba *phba)
void
lpfc_stop_hba_timers(struct lpfc_hba *phba)
{
- lpfc_stop_vport_timers(phba->pport);
+ if (phba->pport)
+ lpfc_stop_vport_timers(phba->pport);
+ cancel_delayed_work_sync(&phba->eq_delay_work);
del_timer_sync(&phba->sli.mbox_tmo);
del_timer_sync(&phba->fabric_block_timer);
del_timer_sync(&phba->eratt_poll);
@@ -3071,6 +3069,242 @@ lpfc_sli4_node_prep(struct lpfc_hba *phba)
}
/**
+ * lpfc_create_expedite_pool - create expedite pool
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine moves a batch of XRIs from lpfc_io_buf_list_put of HWQ 0
+ * to expedite pool. Mark them as expedite.
+ **/
+void lpfc_create_expedite_pool(struct lpfc_hba *phba)
+{
+ struct lpfc_sli4_hdw_queue *qp;
+ struct lpfc_io_buf *lpfc_ncmd;
+ struct lpfc_io_buf *lpfc_ncmd_next;
+ struct lpfc_epd_pool *epd_pool;
+ unsigned long iflag;
+
+ epd_pool = &phba->epd_pool;
+ qp = &phba->sli4_hba.hdwq[0];
+
+ spin_lock_init(&epd_pool->lock);
+ spin_lock_irqsave(&qp->io_buf_list_put_lock, iflag);
+ spin_lock(&epd_pool->lock);
+ INIT_LIST_HEAD(&epd_pool->list);
+ list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
+ &qp->lpfc_io_buf_list_put, list) {
+ list_move_tail(&lpfc_ncmd->list, &epd_pool->list);
+ lpfc_ncmd->expedite = true;
+ qp->put_io_bufs--;
+ epd_pool->count++;
+ if (epd_pool->count >= XRI_BATCH)
+ break;
+ }
+ spin_unlock(&epd_pool->lock);
+ spin_unlock_irqrestore(&qp->io_buf_list_put_lock, iflag);
+}
+
+/**
+ * lpfc_destroy_expedite_pool - destroy expedite pool
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine returns XRIs from expedite pool to lpfc_io_buf_list_put
+ * of HWQ 0. Clear the mark.
+ **/
+void lpfc_destroy_expedite_pool(struct lpfc_hba *phba)
+{
+ struct lpfc_sli4_hdw_queue *qp;
+ struct lpfc_io_buf *lpfc_ncmd;
+ struct lpfc_io_buf *lpfc_ncmd_next;
+ struct lpfc_epd_pool *epd_pool;
+ unsigned long iflag;
+
+ epd_pool = &phba->epd_pool;
+ qp = &phba->sli4_hba.hdwq[0];
+
+ spin_lock_irqsave(&qp->io_buf_list_put_lock, iflag);
+ spin_lock(&epd_pool->lock);
+ list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
+ &epd_pool->list, list) {
+ list_move_tail(&lpfc_ncmd->list,
+ &qp->lpfc_io_buf_list_put);
+ lpfc_ncmd->flags = false;
+ qp->put_io_bufs++;
+ epd_pool->count--;
+ }
+ spin_unlock(&epd_pool->lock);
+ spin_unlock_irqrestore(&qp->io_buf_list_put_lock, iflag);
+}
+
+/**
+ * lpfc_create_multixri_pools - create multi-XRI pools
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine initialize public, private per HWQ. Then, move XRIs from
+ * lpfc_io_buf_list_put to public pool. High and low watermark are also
+ * Initialized.
+ **/
+void lpfc_create_multixri_pools(struct lpfc_hba *phba)
+{
+ u32 i, j;
+ u32 hwq_count;
+ u32 count_per_hwq;
+ struct lpfc_io_buf *lpfc_ncmd;
+ struct lpfc_io_buf *lpfc_ncmd_next;
+ unsigned long iflag;
+ struct lpfc_sli4_hdw_queue *qp;
+ struct lpfc_multixri_pool *multixri_pool;
+ struct lpfc_pbl_pool *pbl_pool;
+ struct lpfc_pvt_pool *pvt_pool;
+
+ lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+ "1234 num_hdw_queue=%d num_present_cpu=%d common_xri_cnt=%d\n",
+ phba->cfg_hdw_queue, phba->sli4_hba.num_present_cpu,
+ phba->sli4_hba.io_xri_cnt);
+
+ if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)
+ lpfc_create_expedite_pool(phba);
+
+ hwq_count = phba->cfg_hdw_queue;
+ count_per_hwq = phba->sli4_hba.io_xri_cnt / hwq_count;
+
+ for (i = 0; i < hwq_count; i++) {
+ multixri_pool = kzalloc(sizeof(*multixri_pool), GFP_KERNEL);
+
+ if (!multixri_pool) {
+ lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+ "1238 Failed to allocate memory for "
+ "multixri_pool\n");
+
+ if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)
+ lpfc_destroy_expedite_pool(phba);
+
+ j = 0;
+ while (j < i) {
+ qp = &phba->sli4_hba.hdwq[j];
+ kfree(qp->p_multixri_pool);
+ j++;
+ }
+ phba->cfg_xri_rebalancing = 0;
+ return;
+ }
+
+ qp = &phba->sli4_hba.hdwq[i];
+ qp->p_multixri_pool = multixri_pool;
+
+ multixri_pool->xri_limit = count_per_hwq;
+ multixri_pool->rrb_next_hwqid = i;
+
+ /* Deal with public free xri pool */
+ pbl_pool = &multixri_pool->pbl_pool;
+ spin_lock_init(&pbl_pool->lock);
+ spin_lock_irqsave(&qp->io_buf_list_put_lock, iflag);
+ spin_lock(&pbl_pool->lock);
+ INIT_LIST_HEAD(&pbl_pool->list);
+ list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
+ &qp->lpfc_io_buf_list_put, list) {
+ list_move_tail(&lpfc_ncmd->list, &pbl_pool->list);
+ qp->put_io_bufs--;
+ pbl_pool->count++;
+ }
+ lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+ "1235 Moved %d buffers from PUT list over to pbl_pool[%d]\n",
+ pbl_pool->count, i);
+ spin_unlock(&pbl_pool->lock);
+ spin_unlock_irqrestore(&qp->io_buf_list_put_lock, iflag);
+
+ /* Deal with private free xri pool */
+ pvt_pool = &multixri_pool->pvt_pool;
+ pvt_pool->high_watermark = multixri_pool->xri_limit / 2;
+ pvt_pool->low_watermark = XRI_BATCH;
+ spin_lock_init(&pvt_pool->lock);
+ spin_lock_irqsave(&pvt_pool->lock, iflag);
+ INIT_LIST_HEAD(&pvt_pool->list);
+ pvt_pool->count = 0;
+ spin_unlock_irqrestore(&pvt_pool->lock, iflag);
+ }
+}
+
+/**
+ * lpfc_destroy_multixri_pools - destroy multi-XRI pools
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine returns XRIs from public/private to lpfc_io_buf_list_put.
+ **/
+void lpfc_destroy_multixri_pools(struct lpfc_hba *phba)
+{
+ u32 i;
+ u32 hwq_count;
+ struct lpfc_io_buf *lpfc_ncmd;
+ struct lpfc_io_buf *lpfc_ncmd_next;
+ unsigned long iflag;
+ struct lpfc_sli4_hdw_queue *qp;
+ struct lpfc_multixri_pool *multixri_pool;
+ struct lpfc_pbl_pool *pbl_pool;
+ struct lpfc_pvt_pool *pvt_pool;
+
+ if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)
+ lpfc_destroy_expedite_pool(phba);
+
+ hwq_count = phba->cfg_hdw_queue;
+
+ for (i = 0; i < hwq_count; i++) {
+ qp = &phba->sli4_hba.hdwq[i];
+ multixri_pool = qp->p_multixri_pool;
+ if (!multixri_pool)
+ continue;
+
+ qp->p_multixri_pool = NULL;
+
+ spin_lock_irqsave(&qp->io_buf_list_put_lock, iflag);
+
+ /* Deal with public free xri pool */
+ pbl_pool = &multixri_pool->pbl_pool;
+ spin_lock(&pbl_pool->lock);
+
+ lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+ "1236 Moving %d buffers from pbl_pool[%d] TO PUT list\n",
+ pbl_pool->count, i);
+
+ list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
+ &pbl_pool->list, list) {
+ list_move_tail(&lpfc_ncmd->list,
+ &qp->lpfc_io_buf_list_put);
+ qp->put_io_bufs++;
+ pbl_pool->count--;
+ }
+
+ INIT_LIST_HEAD(&pbl_pool->list);
+ pbl_pool->count = 0;
+
+ spin_unlock(&pbl_pool->lock);
+
+ /* Deal with private free xri pool */
+ pvt_pool = &multixri_pool->pvt_pool;
+ spin_lock(&pvt_pool->lock);
+
+ lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+ "1237 Moving %d buffers from pvt_pool[%d] TO PUT list\n",
+ pvt_pool->count, i);
+
+ list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
+ &pvt_pool->list, list) {
+ list_move_tail(&lpfc_ncmd->list,
+ &qp->lpfc_io_buf_list_put);
+ qp->put_io_bufs++;
+ pvt_pool->count--;
+ }
+
+ INIT_LIST_HEAD(&pvt_pool->list);
+ pvt_pool->count = 0;
+
+ spin_unlock(&pvt_pool->lock);
+ spin_unlock_irqrestore(&qp->io_buf_list_put_lock, iflag);
+
+ kfree(multixri_pool);
+ }
+}
+
+/**
* lpfc_online - Initialize and bring a HBA online
* @phba: pointer to lpfc hba data structure.
*
@@ -3152,6 +3386,9 @@ lpfc_online(struct lpfc_hba *phba)
}
lpfc_destroy_vport_work_array(phba, vports);
+ if (phba->cfg_xri_rebalancing)
+ lpfc_create_multixri_pools(phba);
+
lpfc_unblock_mgmt_io(phba);
return 0;
}
@@ -3310,6 +3547,9 @@ lpfc_offline(struct lpfc_hba *phba)
spin_unlock_irq(shost->host_lock);
}
lpfc_destroy_vport_work_array(phba, vports);
+
+ if (phba->cfg_xri_rebalancing)
+ lpfc_destroy_multixri_pools(phba);
}
/**
@@ -3323,7 +3563,7 @@ lpfc_offline(struct lpfc_hba *phba)
static void
lpfc_scsi_free(struct lpfc_hba *phba)
{
- struct lpfc_scsi_buf *sb, *sb_next;
+ struct lpfc_io_buf *sb, *sb_next;
if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP))
return;
@@ -3355,50 +3595,57 @@ lpfc_scsi_free(struct lpfc_hba *phba)
spin_unlock(&phba->scsi_buf_list_get_lock);
spin_unlock_irq(&phba->hbalock);
}
+
/**
- * lpfc_nvme_free - Free all the NVME buffers and IOCBs from driver lists
+ * lpfc_io_free - Free all the IO buffers and IOCBs from driver lists
* @phba: pointer to lpfc hba data structure.
*
- * This routine is to free all the NVME buffers and IOCBs from the driver
+ * This routine is to free all the IO buffers and IOCBs from the driver
* list back to kernel. It is called from lpfc_pci_remove_one to free
* the internal resources before the device is removed from the system.
**/
-static void
-lpfc_nvme_free(struct lpfc_hba *phba)
+void
+lpfc_io_free(struct lpfc_hba *phba)
{
- struct lpfc_nvme_buf *lpfc_ncmd, *lpfc_ncmd_next;
-
- if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME))
- return;
+ struct lpfc_io_buf *lpfc_ncmd, *lpfc_ncmd_next;
+ struct lpfc_sli4_hdw_queue *qp;
+ int idx;
spin_lock_irq(&phba->hbalock);
- /* Release all the lpfc_nvme_bufs maintained by this host. */
- spin_lock(&phba->nvme_buf_list_put_lock);
- list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
- &phba->lpfc_nvme_buf_list_put, list) {
- list_del(&lpfc_ncmd->list);
- phba->put_nvme_bufs--;
- dma_pool_free(phba->lpfc_sg_dma_buf_pool, lpfc_ncmd->data,
- lpfc_ncmd->dma_handle);
- kfree(lpfc_ncmd);
- phba->total_nvme_bufs--;
+ for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
+ qp = &phba->sli4_hba.hdwq[idx];
+ /* Release all the lpfc_nvme_bufs maintained by this host. */
+ spin_lock(&qp->io_buf_list_put_lock);
+ list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
+ &qp->lpfc_io_buf_list_put,
+ list) {
+ list_del(&lpfc_ncmd->list);
+ qp->put_io_bufs--;
+ dma_pool_free(phba->lpfc_sg_dma_buf_pool,
+ lpfc_ncmd->data, lpfc_ncmd->dma_handle);
+ kfree(lpfc_ncmd);
+ qp->total_io_bufs--;
+ }
+ spin_unlock(&qp->io_buf_list_put_lock);
+
+ spin_lock(&qp->io_buf_list_get_lock);
+ list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
+ &qp->lpfc_io_buf_list_get,
+ list) {
+ list_del(&lpfc_ncmd->list);
+ qp->get_io_bufs--;
+ dma_pool_free(phba->lpfc_sg_dma_buf_pool,
+ lpfc_ncmd->data, lpfc_ncmd->dma_handle);
+ kfree(lpfc_ncmd);
+ qp->total_io_bufs--;
+ }
+ spin_unlock(&qp->io_buf_list_get_lock);
}
- spin_unlock(&phba->nvme_buf_list_put_lock);
- spin_lock(&phba->nvme_buf_list_get_lock);
- list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
- &phba->lpfc_nvme_buf_list_get, list) {
- list_del(&lpfc_ncmd->list);
- phba->get_nvme_bufs--;
- dma_pool_free(phba->lpfc_sg_dma_buf_pool, lpfc_ncmd->data,
- lpfc_ncmd->dma_handle);
- kfree(lpfc_ncmd);
- phba->total_nvme_bufs--;
- }
- spin_unlock(&phba->nvme_buf_list_get_lock);
spin_unlock_irq(&phba->hbalock);
}
+
/**
* lpfc_sli4_els_sgl_update - update ELS xri-sgl sizing and mapping
* @phba: pointer to lpfc hba data structure.
@@ -3640,8 +3887,102 @@ out_free_mem:
return rc;
}
+int
+lpfc_io_buf_flush(struct lpfc_hba *phba, struct list_head *cbuf)
+{
+ LIST_HEAD(blist);
+ struct lpfc_sli4_hdw_queue *qp;
+ struct lpfc_io_buf *lpfc_cmd;
+ struct lpfc_io_buf *iobufp, *prev_iobufp;
+ int idx, cnt, xri, inserted;
+
+ cnt = 0;
+ for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
+ qp = &phba->sli4_hba.hdwq[idx];
+ spin_lock_irq(&qp->io_buf_list_get_lock);
+ spin_lock(&qp->io_buf_list_put_lock);
+
+ /* Take everything off the get and put lists */
+ list_splice_init(&qp->lpfc_io_buf_list_get, &blist);
+ list_splice(&qp->lpfc_io_buf_list_put, &blist);
+ INIT_LIST_HEAD(&qp->lpfc_io_buf_list_get);
+ INIT_LIST_HEAD(&qp->lpfc_io_buf_list_put);
+ cnt += qp->get_io_bufs + qp->put_io_bufs;
+ qp->get_io_bufs = 0;
+ qp->put_io_bufs = 0;
+ qp->total_io_bufs = 0;
+ spin_unlock(&qp->io_buf_list_put_lock);
+ spin_unlock_irq(&qp->io_buf_list_get_lock);
+ }
+
+ /*
+ * Take IO buffers off blist and put on cbuf sorted by XRI.
+ * This is because POST_SGL takes a sequential range of XRIs
+ * to post to the firmware.
+ */
+ for (idx = 0; idx < cnt; idx++) {
+ list_remove_head(&blist, lpfc_cmd, struct lpfc_io_buf, list);
+ if (!lpfc_cmd)
+ return cnt;
+ if (idx == 0) {
+ list_add_tail(&lpfc_cmd->list, cbuf);
+ continue;
+ }
+ xri = lpfc_cmd->cur_iocbq.sli4_xritag;
+ inserted = 0;
+ prev_iobufp = NULL;
+ list_for_each_entry(iobufp, cbuf, list) {
+ if (xri < iobufp->cur_iocbq.sli4_xritag) {
+ if (prev_iobufp)
+ list_add(&lpfc_cmd->list,
+ &prev_iobufp->list);
+ else
+ list_add(&lpfc_cmd->list, cbuf);
+ inserted = 1;
+ break;
+ }
+ prev_iobufp = iobufp;
+ }
+ if (!inserted)
+ list_add_tail(&lpfc_cmd->list, cbuf);
+ }
+ return cnt;
+}
+
+int
+lpfc_io_buf_replenish(struct lpfc_hba *phba, struct list_head *cbuf)
+{
+ struct lpfc_sli4_hdw_queue *qp;
+ struct lpfc_io_buf *lpfc_cmd;
+ int idx, cnt;
+
+ qp = phba->sli4_hba.hdwq;
+ cnt = 0;
+ while (!list_empty(cbuf)) {
+ for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
+ list_remove_head(cbuf, lpfc_cmd,
+ struct lpfc_io_buf, list);
+ if (!lpfc_cmd)
+ return cnt;
+ cnt++;
+ qp = &phba->sli4_hba.hdwq[idx];
+ lpfc_cmd->hdwq_no = idx;
+ lpfc_cmd->hdwq = qp;
+ lpfc_cmd->cur_iocbq.wqe_cmpl = NULL;
+ lpfc_cmd->cur_iocbq.iocb_cmpl = NULL;
+ spin_lock(&qp->io_buf_list_put_lock);
+ list_add_tail(&lpfc_cmd->list,
+ &qp->lpfc_io_buf_list_put);
+ qp->put_io_bufs++;
+ qp->total_io_bufs++;
+ spin_unlock(&qp->io_buf_list_put_lock);
+ }
+ }
+ return cnt;
+}
+
/**
- * lpfc_sli4_scsi_sgl_update - update xri-sgl sizing and mapping
+ * lpfc_sli4_io_sgl_update - update xri-sgl sizing and mapping
* @phba: pointer to lpfc hba data structure.
*
* This routine first calculates the sizes of the current els and allocated
@@ -3653,94 +3994,192 @@ out_free_mem:
* 0 - successful (for now, it always returns 0)
**/
int
-lpfc_sli4_scsi_sgl_update(struct lpfc_hba *phba)
+lpfc_sli4_io_sgl_update(struct lpfc_hba *phba)
{
- struct lpfc_scsi_buf *psb, *psb_next;
- uint16_t i, lxri, els_xri_cnt, scsi_xri_cnt;
- LIST_HEAD(scsi_sgl_list);
- int rc;
-
- /*
- * update on pci function's els xri-sgl list
- */
- els_xri_cnt = lpfc_sli4_get_els_iocb_cnt(phba);
- phba->total_scsi_bufs = 0;
+ struct lpfc_io_buf *lpfc_ncmd = NULL, *lpfc_ncmd_next = NULL;
+ uint16_t i, lxri, els_xri_cnt;
+ uint16_t io_xri_cnt, io_xri_max;
+ LIST_HEAD(io_sgl_list);
+ int rc, cnt;
/*
- * update on pci function's allocated scsi xri-sgl list
+ * update on pci function's allocated nvme xri-sgl list
*/
- /* maximum number of xris available for scsi buffers */
- phba->sli4_hba.scsi_xri_max = phba->sli4_hba.max_cfg_param.max_xri -
- els_xri_cnt;
- if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP))
- return 0;
+ /* maximum number of xris available for nvme buffers */
+ els_xri_cnt = lpfc_sli4_get_els_iocb_cnt(phba);
+ io_xri_max = phba->sli4_hba.max_cfg_param.max_xri - els_xri_cnt;
+ phba->sli4_hba.io_xri_max = io_xri_max;
- if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)
- phba->sli4_hba.scsi_xri_max = /* Split them up */
- (phba->sli4_hba.scsi_xri_max *
- phba->cfg_xri_split) / 100;
+ lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+ "6074 Current allocated XRI sgl count:%d, "
+ "maximum XRI count:%d\n",
+ phba->sli4_hba.io_xri_cnt,
+ phba->sli4_hba.io_xri_max);
- spin_lock_irq(&phba->scsi_buf_list_get_lock);
- spin_lock(&phba->scsi_buf_list_put_lock);
- list_splice_init(&phba->lpfc_scsi_buf_list_get, &scsi_sgl_list);
- list_splice(&phba->lpfc_scsi_buf_list_put, &scsi_sgl_list);
- spin_unlock(&phba->scsi_buf_list_put_lock);
- spin_unlock_irq(&phba->scsi_buf_list_get_lock);
+ cnt = lpfc_io_buf_flush(phba, &io_sgl_list);
- lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
- "6060 Current allocated SCSI xri-sgl count:%d, "
- "maximum SCSI xri count:%d (split:%d)\n",
- phba->sli4_hba.scsi_xri_cnt,
- phba->sli4_hba.scsi_xri_max, phba->cfg_xri_split);
-
- if (phba->sli4_hba.scsi_xri_cnt > phba->sli4_hba.scsi_xri_max) {
- /* max scsi xri shrinked below the allocated scsi buffers */
- scsi_xri_cnt = phba->sli4_hba.scsi_xri_cnt -
- phba->sli4_hba.scsi_xri_max;
- /* release the extra allocated scsi buffers */
- for (i = 0; i < scsi_xri_cnt; i++) {
- list_remove_head(&scsi_sgl_list, psb,
- struct lpfc_scsi_buf, list);
- if (psb) {
+ if (phba->sli4_hba.io_xri_cnt > phba->sli4_hba.io_xri_max) {
+ /* max nvme xri shrunk below the allocated nvme buffers */
+ io_xri_cnt = phba->sli4_hba.io_xri_cnt -
+ phba->sli4_hba.io_xri_max;
+ /* release the extra allocated nvme buffers */
+ for (i = 0; i < io_xri_cnt; i++) {
+ list_remove_head(&io_sgl_list, lpfc_ncmd,
+ struct lpfc_io_buf, list);
+ if (lpfc_ncmd) {
dma_pool_free(phba->lpfc_sg_dma_buf_pool,
- psb->data, psb->dma_handle);
- kfree(psb);
+ lpfc_ncmd->data,
+ lpfc_ncmd->dma_handle);
+ kfree(lpfc_ncmd);
}
}
- spin_lock_irq(&phba->scsi_buf_list_get_lock);
- phba->sli4_hba.scsi_xri_cnt -= scsi_xri_cnt;
- spin_unlock_irq(&phba->scsi_buf_list_get_lock);
+ phba->sli4_hba.io_xri_cnt -= io_xri_cnt;
}
- /* update xris associated to remaining allocated scsi buffers */
- psb = NULL;
- psb_next = NULL;
- list_for_each_entry_safe(psb, psb_next, &scsi_sgl_list, list) {
+ /* update xris associated to remaining allocated nvme buffers */
+ lpfc_ncmd = NULL;
+ lpfc_ncmd_next = NULL;
+ phba->sli4_hba.io_xri_cnt = cnt;
+ list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
+ &io_sgl_list, list) {
lxri = lpfc_sli4_next_xritag(phba);
if (lxri == NO_XRI) {
lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
- "2560 Failed to allocate xri for "
- "scsi buffer\n");
+ "6075 Failed to allocate xri for "
+ "nvme buffer\n");
rc = -ENOMEM;
goto out_free_mem;
}
- psb->cur_iocbq.sli4_lxritag = lxri;
- psb->cur_iocbq.sli4_xritag = phba->sli4_hba.xri_ids[lxri];
+ lpfc_ncmd->cur_iocbq.sli4_lxritag = lxri;
+ lpfc_ncmd->cur_iocbq.sli4_xritag = phba->sli4_hba.xri_ids[lxri];
}
- spin_lock_irq(&phba->scsi_buf_list_get_lock);
- spin_lock(&phba->scsi_buf_list_put_lock);
- list_splice_init(&scsi_sgl_list, &phba->lpfc_scsi_buf_list_get);
- INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list_put);
- spin_unlock(&phba->scsi_buf_list_put_lock);
- spin_unlock_irq(&phba->scsi_buf_list_get_lock);
+ cnt = lpfc_io_buf_replenish(phba, &io_sgl_list);
return 0;
out_free_mem:
- lpfc_scsi_free(phba);
+ lpfc_io_free(phba);
return rc;
}
+/**
+ * lpfc_new_io_buf - IO buffer allocator for HBA with SLI4 IF spec
+ * @vport: The virtual port for which this call being executed.
+ * @num_to_allocate: The requested number of buffers to allocate.
+ *
+ * This routine allocates nvme buffers for device with SLI-4 interface spec,
+ * the nvme buffer contains all the necessary information needed to initiate
+ * an I/O. After allocating up to @num_to_allocate IO buffers and put
+ * them on a list, it post them to the port by using SGL block post.
+ *
+ * Return codes:
+ * int - number of IO buffers that were allocated and posted.
+ * 0 = failure, less than num_to_alloc is a partial failure.
+ **/
+int
+lpfc_new_io_buf(struct lpfc_hba *phba, int num_to_alloc)
+{
+ struct lpfc_io_buf *lpfc_ncmd;
+ struct lpfc_iocbq *pwqeq;
+ uint16_t iotag, lxri = 0;
+ int bcnt, num_posted;
+ LIST_HEAD(prep_nblist);
+ LIST_HEAD(post_nblist);
+ LIST_HEAD(nvme_nblist);
+
+ /* Sanity check to ensure our sizing is right for both SCSI and NVME */
+ if (sizeof(struct lpfc_io_buf) > LPFC_COMMON_IO_BUF_SZ) {
+ lpfc_printf_log(phba, KERN_ERR, LOG_FCP,
+ "6426 Common buffer size %ld exceeds %d\n",
+ sizeof(struct lpfc_io_buf),
+ LPFC_COMMON_IO_BUF_SZ);
+ return 0;
+ }
+
+ phba->sli4_hba.io_xri_cnt = 0;
+ for (bcnt = 0; bcnt < num_to_alloc; bcnt++) {
+ lpfc_ncmd = kzalloc(LPFC_COMMON_IO_BUF_SZ, GFP_KERNEL);
+ if (!lpfc_ncmd)