From c0332694903a37cf8ecdc9102d5c9e09cf8643d0 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 1 May 2017 08:58:49 -0700 Subject: block: Remove elevator_change() Since commit 84253394927c ("remove the mg_disk driver") removed the only caller of elevator_change(), also remove the elevator_change() function itself. Signed-off-by: Bart Van Assche Cc: Christoph Hellwig Cc: Markus Trippelsdorf Signed-off-by: Jens Axboe --- block/elevator.c | 13 ------------- include/linux/elevator.h | 1 - 2 files changed, 14 deletions(-) diff --git a/block/elevator.c b/block/elevator.c index bf11e70f008b..80f485451096 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -1088,19 +1088,6 @@ static int __elevator_change(struct request_queue *q, const char *name) return elevator_switch(q, e); } -int elevator_change(struct request_queue *q, const char *name) -{ - int ret; - - /* Protect q->elevator from elevator_init() */ - mutex_lock(&q->sysfs_lock); - ret = __elevator_change(q, name); - mutex_unlock(&q->sysfs_lock); - - return ret; -} -EXPORT_SYMBOL(elevator_change); - static inline bool elv_support_iosched(struct request_queue *q) { if (q->mq_ops && q->tag_set && (q->tag_set->flags & diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 3a216318ae73..d44840368ee7 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -214,7 +214,6 @@ extern ssize_t elv_iosched_store(struct request_queue *, const char *, size_t); extern int elevator_init(struct request_queue *, char *); extern void elevator_exit(struct request_queue *, struct elevator_queue *); -extern int elevator_change(struct request_queue *, const char *); extern bool elv_bio_merge_ok(struct request *, struct bio *); extern struct elevator_queue *elevator_alloc(struct request_queue *, struct elevator_type *); -- cgit v1.2.3 From 0f6422a2c57c6afcf66ead903dc3fa6641184aa4 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 27 Apr 2017 16:29:26 -0600 Subject: mtip32xx: get rid of 'atomic' argument to mtip_exec_internal_command() All callers can safely block. Kill the atomic/block argument, and remove the argument from all callers. Reviewed-by: Bart Van Assche Reviewed-by: Christoph Hellwig Tested-by: Ming Lei Signed-off-by: Jens Axboe --- drivers/block/mtip32xx/mtip32xx.c | 148 +++++++++++--------------------------- 1 file changed, 43 insertions(+), 105 deletions(-) diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 02804cc79d82..d81d797ee65d 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -609,11 +609,6 @@ static void mtip_completion(struct mtip_port *port, complete(waiting); } -static void mtip_null_completion(struct mtip_port *port, - int tag, struct mtip_cmd *command, int status) -{ -} - static int mtip_read_log_page(struct mtip_port *port, u8 page, u16 *buffer, dma_addr_t buffer_dma, unsigned int sectors); static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id, @@ -1117,7 +1112,6 @@ static int mtip_exec_internal_command(struct mtip_port *port, dma_addr_t buffer, int buf_len, u32 opts, - gfp_t atomic, unsigned long timeout) { struct mtip_cmd_sg *command_sg; @@ -1146,30 +1140,22 @@ static int mtip_exec_internal_command(struct mtip_port *port, clear_bit(MTIP_PF_DM_ACTIVE_BIT, &port->flags); - if (atomic == GFP_KERNEL) { - if (fis->command != ATA_CMD_STANDBYNOW1) { - /* wait for io to complete if non atomic */ - if (mtip_quiesce_io(port, - MTIP_QUIESCE_IO_TIMEOUT_MS, atomic) < 0) { - dev_warn(&dd->pdev->dev, - "Failed to quiesce IO\n"); - mtip_put_int_command(dd, int_cmd); - clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); - wake_up_interruptible(&port->svc_wait); - return -EBUSY; - } + if (fis->command != ATA_CMD_STANDBYNOW1) { + /* wait for io to complete if non atomic */ + if (mtip_quiesce_io(port, + MTIP_QUIESCE_IO_TIMEOUT_MS, GFP_KERNEL) < 0) { + dev_warn(&dd->pdev->dev, "Failed to quiesce IO\n"); + mtip_put_int_command(dd, int_cmd); + clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); + wake_up_interruptible(&port->svc_wait); + return -EBUSY; } - - /* Set the completion function and data for the command. */ - int_cmd->comp_data = &wait; - int_cmd->comp_func = mtip_completion; - - } else { - /* Clear completion - we're going to poll */ - int_cmd->comp_data = NULL; - int_cmd->comp_func = mtip_null_completion; } + /* Set the completion function and data for the command. */ + int_cmd->comp_data = &wait; + int_cmd->comp_func = mtip_completion; + /* Copy the command to the command table */ memcpy(int_cmd->command, fis, fis_len*4); @@ -1198,81 +1184,41 @@ static int mtip_exec_internal_command(struct mtip_port *port, /* Issue the command to the hardware */ mtip_issue_non_ncq_command(port, MTIP_TAG_INTERNAL); - if (atomic == GFP_KERNEL) { - /* Wait for the command to complete or timeout. */ - if ((rv = wait_for_completion_interruptible_timeout( - &wait, - msecs_to_jiffies(timeout))) <= 0) { - - if (rv == -ERESTARTSYS) { /* interrupted */ - dev_err(&dd->pdev->dev, - "Internal command [%02X] was interrupted after %u ms\n", - fis->command, - jiffies_to_msecs(jiffies - start)); - rv = -EINTR; - goto exec_ic_exit; - } else if (rv == 0) /* timeout */ - dev_err(&dd->pdev->dev, - "Internal command did not complete [%02X] within timeout of %lu ms\n", - fis->command, timeout); - else - dev_err(&dd->pdev->dev, - "Internal command [%02X] wait returned code [%d] after %lu ms - unhandled\n", - fis->command, rv, timeout); - - if (mtip_check_surprise_removal(dd->pdev) || - test_bit(MTIP_DDF_REMOVE_PENDING_BIT, - &dd->dd_flag)) { - dev_err(&dd->pdev->dev, - "Internal command [%02X] wait returned due to SR\n", - fis->command); - rv = -ENXIO; - goto exec_ic_exit; - } - mtip_device_reset(dd); /* recover from timeout issue */ - rv = -EAGAIN; + /* Wait for the command to complete or timeout. */ + rv = wait_for_completion_interruptible_timeout(&wait, + msecs_to_jiffies(timeout)); + if (rv <= 0) { + if (rv == -ERESTARTSYS) { /* interrupted */ + dev_err(&dd->pdev->dev, + "Internal command [%02X] was interrupted after %u ms\n", + fis->command, + jiffies_to_msecs(jiffies - start)); + rv = -EINTR; goto exec_ic_exit; - } - } else { - u32 hba_stat, port_stat; - - /* Spin for checking if command still outstanding */ - timeout = jiffies + msecs_to_jiffies(timeout); - while ((readl(port->cmd_issue[MTIP_TAG_INTERNAL]) - & (1 << MTIP_TAG_INTERNAL)) - && time_before(jiffies, timeout)) { - if (mtip_check_surprise_removal(dd->pdev)) { - rv = -ENXIO; - goto exec_ic_exit; - } - if ((fis->command != ATA_CMD_STANDBYNOW1) && - test_bit(MTIP_DDF_REMOVE_PENDING_BIT, - &dd->dd_flag)) { - rv = -ENXIO; - goto exec_ic_exit; - } - port_stat = readl(port->mmio + PORT_IRQ_STAT); - if (!port_stat) - continue; + } else if (rv == 0) /* timeout */ + dev_err(&dd->pdev->dev, + "Internal command did not complete [%02X] within timeout of %lu ms\n", + fis->command, timeout); + else + dev_err(&dd->pdev->dev, + "Internal command [%02X] wait returned code [%d] after %lu ms - unhandled\n", + fis->command, rv, timeout); - if (port_stat & PORT_IRQ_ERR) { - dev_err(&dd->pdev->dev, - "Internal command [%02X] failed\n", - fis->command); - mtip_device_reset(dd); - rv = -EIO; - goto exec_ic_exit; - } else { - writel(port_stat, port->mmio + PORT_IRQ_STAT); - hba_stat = readl(dd->mmio + HOST_IRQ_STAT); - if (hba_stat) - writel(hba_stat, - dd->mmio + HOST_IRQ_STAT); - } - break; + if (mtip_check_surprise_removal(dd->pdev) || + test_bit(MTIP_DDF_REMOVE_PENDING_BIT, + &dd->dd_flag)) { + dev_err(&dd->pdev->dev, + "Internal command [%02X] wait returned due to SR\n", + fis->command); + rv = -ENXIO; + goto exec_ic_exit; } + mtip_device_reset(dd); /* recover from timeout issue */ + rv = -EAGAIN; + goto exec_ic_exit; } + rv = 0; if (readl(port->cmd_issue[MTIP_TAG_INTERNAL]) & (1 << MTIP_TAG_INTERNAL)) { rv = -ENXIO; @@ -1391,7 +1337,6 @@ static int mtip_get_identify(struct mtip_port *port, void __user *user_buffer) port->identify_dma, sizeof(u16) * ATA_ID_WORDS, 0, - GFP_KERNEL, MTIP_INT_CMD_TIMEOUT_MS) < 0) { rv = -1; @@ -1477,7 +1422,6 @@ static int mtip_standby_immediate(struct mtip_port *port) 0, 0, 0, - GFP_ATOMIC, timeout); dbg_printk(MTIP_DRV_NAME "Time taken to complete standby cmd: %d ms\n", jiffies_to_msecs(jiffies - start)); @@ -1523,7 +1467,6 @@ static int mtip_read_log_page(struct mtip_port *port, u8 page, u16 *buffer, buffer_dma, sectors * ATA_SECT_SIZE, 0, - GFP_ATOMIC, MTIP_INT_CMD_TIMEOUT_MS); } @@ -1558,7 +1501,6 @@ static int mtip_get_smart_data(struct mtip_port *port, u8 *buffer, buffer_dma, ATA_SECT_SIZE, 0, - GFP_ATOMIC, 15000); } @@ -1686,7 +1628,6 @@ static int mtip_send_trim(struct driver_data *dd, unsigned int lba, dma_addr, ATA_SECT_SIZE, 0, - GFP_KERNEL, MTIP_TRIM_TIMEOUT_MS) < 0) rv = -EIO; @@ -1850,7 +1791,6 @@ static int exec_drive_task(struct mtip_port *port, u8 *command) 0, 0, 0, - GFP_KERNEL, to) < 0) { return -1; } @@ -1946,7 +1886,6 @@ static int exec_drive_command(struct mtip_port *port, u8 *command, (xfer_sz ? dma_addr : 0), (xfer_sz ? ATA_SECT_SIZE * xfer_sz : 0), 0, - GFP_KERNEL, to) < 0) { rv = -EFAULT; @@ -2189,7 +2128,6 @@ static int exec_drive_taskfile(struct driver_data *dd, dma_buffer, transfer_size, 0, - GFP_KERNEL, timeout) < 0) { err = -EIO; goto abort; -- cgit v1.2.3 From 8afdd94c74e416de74a8ee61d79e4bf93466420b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 27 Apr 2017 16:32:41 -0600 Subject: mtip32xx: kill atomic argument to mtip_quiesce_io() All callers now pass in GFP_KERNEL, get rid of the argument. Reviewed-by: Bart Van Assche Reviewed-by: Christoph Hellwig Tested-by: Ming Lei Signed-off-by: Jens Axboe --- drivers/block/mtip32xx/mtip32xx.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index d81d797ee65d..36f3d34f2156 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -1035,14 +1035,12 @@ static bool mtip_pause_ncq(struct mtip_port *port, * * @port Pointer to port data structure * @timeout Max duration to wait (ms) - * @atomic gfp_t flag to indicate blockable context or not * * return value * 0 Success * -EBUSY Commands still active */ -static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout, - gfp_t atomic) +static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout) { unsigned long to; unsigned int n; @@ -1053,18 +1051,12 @@ static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout, to = jiffies + msecs_to_jiffies(timeout); do { if (test_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags) && - test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags) && - atomic == GFP_KERNEL) { + test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags)) { msleep(20); continue; /* svc thd is actively issuing commands */ } - if (atomic == GFP_KERNEL) - msleep(100); - else { - cpu_relax(); - udelay(100); - } + msleep(100); if (mtip_check_surprise_removal(port->dd->pdev)) goto err_fault; @@ -1142,8 +1134,7 @@ static int mtip_exec_internal_command(struct mtip_port *port, if (fis->command != ATA_CMD_STANDBYNOW1) { /* wait for io to complete if non atomic */ - if (mtip_quiesce_io(port, - MTIP_QUIESCE_IO_TIMEOUT_MS, GFP_KERNEL) < 0) { + if (mtip_quiesce_io(port, MTIP_QUIESCE_IO_TIMEOUT_MS) < 0) { dev_warn(&dd->pdev->dev, "Failed to quiesce IO\n"); mtip_put_int_command(dd, int_cmd); clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); @@ -4106,8 +4097,7 @@ static int mtip_block_remove(struct driver_data *dd) * Explicitly wait here for IOs to quiesce, * as mtip_standby_drive usually won't wait for IOs. */ - if (!mtip_quiesce_io(dd->port, MTIP_QUIESCE_IO_TIMEOUT_MS, - GFP_KERNEL)) + if (!mtip_quiesce_io(dd->port, MTIP_QUIESCE_IO_TIMEOUT_MS)) mtip_standby_drive(dd); } else -- cgit v1.2.3 From baed548a98397f57a71d61917177f7d42ab17881 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 28 Apr 2017 07:54:06 -0600 Subject: mtip32xx: abstract out "are any commands active" helper This is a prep patch for backoff in ->queue_rq() for non-ncq commands. Reviewed-by: Bart Van Assche Reviewed-by: Christoph Hellwig Tested-by: Ming Lei Signed-off-by: Jens Axboe --- drivers/block/mtip32xx/mtip32xx.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 36f3d34f2156..aee94f260725 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -1030,6 +1030,22 @@ static bool mtip_pause_ncq(struct mtip_port *port, return false; } +static bool mtip_commands_active(struct mtip_port *port) +{ + unsigned int active; + unsigned int n; + + /* + * Ignore s_active bit 0 of array element 0. + * This bit will always be set + */ + active = readl(port->s_active[0]) & 0xFFFFFFFE; + for (n = 1; n < port->dd->slot_groups; n++) + active |= readl(port->s_active[n]); + + return active != 0; +} + /* * Wait for port to quiesce * @@ -1043,8 +1059,7 @@ static bool mtip_pause_ncq(struct mtip_port *port, static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout) { unsigned long to; - unsigned int n; - unsigned int active = 1; + bool active = true; blk_mq_stop_hw_queues(port->dd->queue); @@ -1061,14 +1076,7 @@ static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout) if (mtip_check_surprise_removal(port->dd->pdev)) goto err_fault; - /* - * Ignore s_active bit 0 of array element 0. - * This bit will always be set - */ - active = readl(port->s_active[0]) & 0xFFFFFFFE; - for (n = 1; n < port->dd->slot_groups; n++) - active |= readl(port->s_active[n]); - + active = mtip_commands_active(port); if (!active) break; } while (time_before(jiffies, to)); -- cgit v1.2.3 From 3f5e6a35774cd6bb4fd6b32edb4efd2a3f90e4dd Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 28 Apr 2017 10:45:08 -0600 Subject: mtip32xx: convert internal command issue to block IO path The driver special cases certain things for command issue, depending on whether it's an internal command or not. Make the internal commands use the regular infrastructure for issuing IO. Since this is an 8-group souped up AHCI variant, we have to deal with NCQ vs non-queueable commands. Do this from the queue_rq handler, by backing off unless the drive is idle. Reviewed-by: Christoph Hellwig Tested-by: Ming Lei Signed-off-by: Jens Axboe --- drivers/block/mtip32xx/mtip32xx.c | 103 +++++++++++++++++++++++++++----------- 1 file changed, 73 insertions(+), 30 deletions(-) diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index aee94f260725..9749b099a914 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -195,7 +195,7 @@ static struct mtip_cmd *mtip_get_int_command(struct driver_data *dd) if (mtip_check_surprise_removal(dd->pdev)) return NULL; - rq = blk_mq_alloc_request(dd->queue, 0, BLK_MQ_REQ_RESERVED); + rq = blk_mq_alloc_request(dd->queue, REQ_OP_DRV_IN, BLK_MQ_REQ_RESERVED); if (IS_ERR(rq)) return NULL; @@ -1088,6 +1088,13 @@ err_fault: return -EFAULT; } +struct mtip_int_cmd { + int fis_len; + dma_addr_t buffer; + int buf_len; + u32 opts; +}; + /* * Execute an internal command and wait for the completion. * @@ -1114,10 +1121,16 @@ static int mtip_exec_internal_command(struct mtip_port *port, u32 opts, unsigned long timeout) { - struct mtip_cmd_sg *command_sg; DECLARE_COMPLETION_ONSTACK(wait); struct mtip_cmd *int_cmd; struct driver_data *dd = port->dd; + struct request *rq; + struct mtip_int_cmd icmd = { + .fis_len = fis_len, + .buffer = buffer, + .buf_len = buf_len, + .opts = opts + }; int rv = 0; unsigned long start; @@ -1132,6 +1145,8 @@ static int mtip_exec_internal_command(struct mtip_port *port, dbg_printk(MTIP_DRV_NAME "Unable to allocate tag for PIO cmd\n"); return -EFAULT; } + rq = blk_mq_rq_from_pdu(int_cmd); + rq->end_io_data = &icmd; set_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); @@ -1158,35 +1173,16 @@ static int mtip_exec_internal_command(struct mtip_port *port, /* Copy the command to the command table */ memcpy(int_cmd->command, fis, fis_len*4); - /* Populate the SG list */ - int_cmd->command_header->opts = - __force_bit2int cpu_to_le32(opts | fis_len); - if (buf_len) { - command_sg = int_cmd->command + AHCI_CMD_TBL_HDR_SZ; - - command_sg->info = - __force_bit2int cpu_to_le32((buf_len-1) & 0x3FFFFF); - command_sg->dba = - __force_bit2int cpu_to_le32(buffer & 0xFFFFFFFF); - command_sg->dba_upper = - __force_bit2int cpu_to_le32((buffer >> 16) >> 16); - - int_cmd->command_header->opts |= - __force_bit2int cpu_to_le32((1 << 16)); - } - - /* Populate the command header */ - int_cmd->command_header->byte_count = 0; - start = jiffies; + rq->timeout = timeout; - /* Issue the command to the hardware */ - mtip_issue_non_ncq_command(port, MTIP_TAG_INTERNAL); + /* insert request and run queue */ + blk_execute_rq_nowait(rq->q, NULL, rq, true, NULL); + + wait_for_completion(&wait); + rv = int_cmd->status; - /* Wait for the command to complete or timeout. */ - rv = wait_for_completion_interruptible_timeout(&wait, - msecs_to_jiffies(timeout)); - if (rv <= 0) { + if (rv < 0) { if (rv == -ERESTARTSYS) { /* interrupted */ dev_err(&dd->pdev->dev, "Internal command [%02X] was interrupted after %u ms\n", @@ -1217,7 +1213,6 @@ static int mtip_exec_internal_command(struct mtip_port *port, goto exec_ic_exit; } - rv = 0; if (readl(port->cmd_issue[MTIP_TAG_INTERNAL]) & (1 << MTIP_TAG_INTERNAL)) { rv = -ENXIO; @@ -3762,6 +3757,44 @@ static bool mtip_check_unal_depth(struct blk_mq_hw_ctx *hctx, return false; } +static int mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx, + struct request *rq) +{ + struct driver_data *dd = hctx->queue->queuedata; + struct mtip_int_cmd *icmd = rq->end_io_data; + struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); + struct mtip_cmd_sg *command_sg; + + if (mtip_commands_active(dd->port)) + return BLK_MQ_RQ_QUEUE_BUSY; + + rq->end_io_data = NULL; + + /* Populate the SG list */ + cmd->command_header->opts = + __force_bit2int cpu_to_le32(icmd->opts | icmd->fis_len); + if (icmd->buf_len) { + command_sg = cmd->command + AHCI_CMD_TBL_HDR_SZ; + + command_sg->info = + __force_bit2int cpu_to_le32((icmd->buf_len-1) & 0x3FFFFF); + command_sg->dba = + __force_bit2int cpu_to_le32(icmd->buffer & 0xFFFFFFFF); + command_sg->dba_upper = + __force_bit2int cpu_to_le32((icmd->buffer >> 16) >> 16); + + cmd->command_header->opts |= + __force_bit2int cpu_to_le32((1 << 16)); + } + + /* Populate the command header */ + cmd->command_header->byte_count = 0; + + blk_mq_start_request(rq); + mtip_issue_non_ncq_command(dd->port, rq->tag); + return BLK_MQ_RQ_QUEUE_OK; +} + static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { @@ -3770,6 +3803,9 @@ static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx, mtip_init_cmd_header(rq); + if (blk_rq_is_passthrough(rq)) + return mtip_issue_reserved_cmd(hctx, rq); + if (unlikely(mtip_check_unal_depth(hctx, rq))) return BLK_MQ_RQ_QUEUE_BUSY; @@ -3825,8 +3861,14 @@ static enum blk_eh_timer_return mtip_cmd_timeout(struct request *req, { struct driver_data *dd = req->q->queuedata; - if (reserved) + if (reserved) { + struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req); + + cmd->status = -ETIME; + if (cmd->comp_func) + cmd->comp_func(dd->port, MTIP_TAG_INTERNAL, cmd, -ETIME); goto exit_handler; + } if (test_bit(req->tag, dd->port->cmds_to_issue)) goto exit_handler; @@ -4063,6 +4105,7 @@ static void mtip_no_dev_cleanup(struct request *rq, void *data, bool reserv) } else if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &dd->port->flags)) { cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL); + cmd->status = -ENODEV; if (cmd->comp_func) cmd->comp_func(dd->port, MTIP_TAG_INTERNAL, cmd, -ENODEV); -- cgit v1.2.3 From 9f2779bff2f178496fb00b89797734ee245d2c93 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 28 Apr 2017 08:53:01 -0600 Subject: blk-mq-sched: remove hack that bypasses scheduler for reserved requests We have update the troublesome driver (mtip32xx) to deal with this appropriately. So kill the hack that bypassed scheduler allocation and insertion for reserved requests. Reviewed-by: Ming Lei Reviewed-by: Christoph Hellwig Tested-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq-sched.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 8b361e192e8a..e79e9f18d7c2 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -82,11 +82,7 @@ struct request *blk_mq_sched_get_request(struct request_queue *q, if (likely(!data->hctx)) data->hctx = blk_mq_map_queue(q, data->ctx->cpu); - /* - * For a reserved tag, allocate a normal request since we might - * have driver dependencies on the value of the internal tag. - */ - if (e && !(data->flags & BLK_MQ_REQ_RESERVED)) { + if (e) { data->flags |= BLK_MQ_REQ_INTERNAL; /* -- cgit v1.2.3 From a800ce8ba53da88571872cbccb0e2fff8e374752 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 27 Apr 2017 16:46:02 -0600 Subject: Revert "mtip32xx: pass BLK_MQ_F_NO_SCHED" This reverts commit 4981d04dd8f1ab19e2cce008da556d7f099b6e68. The driver has been converted to using the proper infrastructure for issuing internal commands. This means it's now safe to use with the scheduling infrastruture, so we can now revert the change that turned off scheduling for mtip32xx. Reviewed-by: Ming Lei Reviewed-by: Christoph Hellwig Tested-by: Ming Lei Signed-off-by: Jens Axboe --- drivers/block/mtip32xx/mtip32xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 9749b099a914..9108be601a64 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -3961,7 +3961,7 @@ static int mtip_block_initialize(struct driver_data *dd) dd->tags.reserved_tags = 1; dd->tags.cmd_size = sizeof(struct mtip_cmd); dd->tags.numa_node = dd->numa_node; - dd->tags.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_NO_SCHED; + dd->tags.flags = BLK_MQ_F_SHOULD_MERGE; dd->tags.driver_data = dd; dd->tags.timeout = MTIP_NCQ_CMD_TIMEOUT_MS; -- cgit v1.2.3 From d6296d39e90c9075bc2fc15f1e86dac44930d4b5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 1 May 2017 10:19:08 -0600 Subject: blk-mq: update ->init_request and ->exit_request prototypes Remove the request_idx parameter, which can't be used safely now that we support I/O schedulers with blk-mq. Except for a superflous check in mtip32xx it was unused anyway. Also pass the tag_set instead of just the driver data - this allows drivers to avoid some code duplication in a follow on cleanup. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-mq.c | 18 +++++------------- drivers/block/loop.c | 5 ++--- drivers/block/mtip32xx/mtip32xx.c | 20 ++++++-------------- drivers/block/nbd.c | 7 +++---- drivers/block/rbd.c | 5 ++--- drivers/block/virtio_blk.c | 7 +++---- drivers/md/dm-rq.c | 7 +++---- drivers/mtd/ubi/block.c | 7 +++---- drivers/nvme/host/fc.c | 20 +++++++++----------- drivers/nvme/host/pci.c | 15 +++++++-------- drivers/nvme/host/rdma.c | 28 ++++++++++++++-------------- drivers/nvme/target/loop.c | 17 +++++++++-------- drivers/scsi/scsi_lib.c | 13 ++++++------- include/linux/blk-mq.h | 4 ++-- 14 files changed, 74 insertions(+), 99 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index bf90684a007a..b81e4a7cd7f2 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1655,8 +1655,7 @@ void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, if (!rq) continue; - set->ops->exit_request(set->driver_data, rq, - hctx_idx, i); + set->ops->exit_request(set, rq, hctx_idx); tags->static_rqs[i] = NULL; } } @@ -1787,8 +1786,7 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, tags->static_rqs[i] = rq; if (set->ops->init_request) { - if (set->ops->init_request(set->driver_data, - rq, hctx_idx, i, + if (set->ops->init_request(set, rq, hctx_idx, node)) { tags->static_rqs[i] = NULL; goto fail; @@ -1849,14 +1847,10 @@ static void blk_mq_exit_hctx(struct request_queue *q, struct blk_mq_tag_set *set, struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) { - unsigned flush_start_tag = set->queue_depth; - blk_mq_tag_idle(hctx); if (set->ops->exit_request) - set->ops->exit_request(set->driver_data, - hctx->fq->flush_rq, hctx_idx, - flush_start_tag + hctx_idx); + set->ops->exit_request(set, hctx->fq->flush_rq, hctx_idx); blk_mq_sched_exit_hctx(q, hctx, hctx_idx); @@ -1889,7 +1883,6 @@ static int blk_mq_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx, unsigned hctx_idx) { int node; - unsigned flush_start_tag = set->queue_depth; node = hctx->numa_node; if (node == NUMA_NO_NODE) @@ -1933,9 +1926,8 @@ static int blk_mq_init_hctx(struct request_queue *q, goto sched_exit_hctx; if (set->ops->init_request && - set->ops->init_request(set->driver_data, - hctx->fq->flush_rq, hctx_idx, - flush_start_tag + hctx_idx, node)) + set->ops->init_request(set, hctx->fq->flush_rq, hctx_idx, + node)) goto free_fq; if (hctx->flags & BLK_MQ_F_BLOCKING) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 994403efee19..28d932906f24 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1697,9 +1697,8 @@ static void loop_queue_work(struct kthread_work *work) loop_handle_cmd(cmd); } -static int loop_init_request(void *data, struct request *rq, - unsigned int hctx_idx, unsigned int request_idx, - unsigned int numa_node) +static int loop_init_request(struct blk_mq_tag_set *set, struct request *rq, + unsigned int hctx_idx, unsigned int numa_node) { struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq); diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 9108be601a64..96fe6500e941 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -3818,10 +3818,10 @@ static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx, return BLK_MQ_RQ_QUEUE_ERROR; } -static void mtip_free_cmd(void *data, struct request *rq, - unsigned int hctx_idx, unsigned int request_idx) +static void mtip_free_cmd(struct blk_mq_tag_set *set, struct request *rq, + unsigned int hctx_idx) { - struct driver_data *dd = data; + struct driver_data *dd = set->driver_data; struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); if (!cmd->command) @@ -3831,20 +3831,12 @@ static void mtip_free_cmd(void *data, struct request *rq, cmd->command, cmd->command_dma); } -static int mtip_init_cmd(void *data, struct request *rq, unsigned int hctx_idx, - unsigned int request_idx, unsigned int numa_node) +static int mtip_init_cmd(struct blk_mq_tag_set *set, struct request *rq, + unsigned int hctx_idx, unsigned int numa_node) { - struct driver_data *dd = data; + struct driver_data *dd = set->driver_data; struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); - /* - * For flush requests, request_idx starts at the end of the - * tag space. Since we don't support FLUSH/FUA, simply return - * 0 as there's nothing to be done. - */ - if (request_idx >= MTIP_MAX_COMMAND_SLOTS) - return 0; - cmd->command = dmam_alloc_coherent(&dd->pdev->dev, CMD_DMA_ALLOC_SZ, &cmd->command_dma, GFP_KERNEL); if (!cmd->command) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index ac376b9b852d..6b98ec2a3824 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1396,12 +1396,11 @@ static void nbd_dbg_close(void) #endif -static int nbd_init_request(void *data, struct request *rq, - unsigned int hctx_idx, unsigned int request_idx, - unsigned int numa_node) +static int nbd_init_request(struct blk_mq_tag_set *set, struct request *rq, + unsigned int hctx_idx, unsigned int numa_node) { struct nbd_cmd *cmd = blk_mq_rq_to_pdu(rq); - cmd->nbd = data; + cmd->nbd = set->driver_data; return 0; } diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 089ac4179919..3670e8dd03fe 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -4307,9 +4307,8 @@ out: return ret; } -static int rbd_init_request(void *data, struct request *rq, - unsigned int hctx_idx, unsigned int request_idx, - unsigned int numa_node) +static int rbd_init_request(struct blk_mq_tag_set *set, struct request *rq, + unsigned int hctx_idx, unsigned int numa_node) { struct work_struct *work = blk_mq_rq_to_pdu(rq); diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index f94614257462..94173de1efaa 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -573,11 +573,10 @@ static const struct device_attribute dev_attr_cache_type_rw = __ATTR(cache_type, S_IRUGO|S_IWUSR, virtblk_cache_type_show, virtblk_cache_type_store); -static int virtblk_init_request(void *data, struct request *rq, - unsigned int hctx_idx, unsigned int request_idx, - unsigned int numa_node) +static int virtblk_init_request(struct blk_mq_tag_set *set, struct request *rq, + unsigned int hctx_idx, unsigned int numa_node) { - struct virtio_blk *vblk = data; + struct virtio_blk *vblk = set->driver_data; struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq); #ifdef CONFIG_VIRTIO_BLK_SCSI diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index bff7e3bdb4ed..522d4fa8db64 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -719,11 +719,10 @@ int dm_old_init_request_queue(struct mapped_device *md, struct dm_table *t) return 0; } -static int dm_mq_init_request(void *data, struct request *rq, - unsigned int hctx_idx, unsigned int request_idx, - unsigned int numa_node) +static int dm_mq_init_request(struct blk_mq_tag_set *set, struct request *rq, + unsigned int hctx_idx, unsigned int numa_node) { - return __dm_rq_init_rq(data, rq); + return __dm_rq_init_rq(set->driver_data, rq); } static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx, diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c index 51f2be8889b5..5497e65439df 100644 --- a/drivers/mtd/ubi/block.c +++ b/drivers/mtd/ubi/block.c @@ -334,10 +334,9 @@ static int ubiblock_queue_rq(struct blk_mq_hw_ctx *hctx, } -static int ubiblock_init_request(void *data, struct request *req, - unsigned int hctx_idx, - unsigned int request_idx, - unsigned int numa_node) +static int ubiblock_init_request(struct blk_mq_tag_set *set, + struct request *req, unsigned int hctx_idx, + unsigned int numa_node) { struct ubiblock_pdu *pdu = blk_mq_rq_to_pdu(req); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 4976db56e351..70e689bf1cad 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1172,12 +1172,12 @@ __nvme_fc_exit_request(struct nvme_fc_ctrl *ctrl, } static void -nvme_fc_exit_request(void *data, struct request *rq, - unsigned int hctx_idx, unsigned int rq_idx) +nvme_fc_exit_request(struct blk_mq_tag_set *set, struct request *rq, + unsigned int hctx_idx) { struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); - return __nvme_fc_exit_request(data, op); + return __nvme_fc_exit_request(set->driver_data, op); } static int @@ -1434,11 +1434,10 @@ out_on_error: } static int -nvme_fc_init_request(void *data, struct request *rq, - unsigned int hctx_idx, unsigned int rq_idx, - unsigned int numa_node) +nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq, + unsigned int hctx_idx, unsigned int numa_node) { - struct nvme_fc_ctrl *ctrl = data; + struct nvme_fc_ctrl *ctrl = set->driver_data; struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); struct nvme_fc_queue *queue = &ctrl->queues[hctx_idx+1]; @@ -1446,11 +1445,10 @@ nvme_fc_init_request(void *data, struct request *rq, } static int -nvme_fc_init_admin_request(void *data, struct request *rq, - unsigned int hctx_idx, unsigned int rq_idx, - unsigned int numa_node) +nvme_fc_init_admin_request(struct blk_mq_tag_set *set, struct request *rq, + unsigned int hctx_idx, unsigned int numa_node) { - struct nvme_fc_ctrl *ctrl = data; + struct nvme_fc_ctrl *ctrl = set->driver_data; struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); struct nvme_fc_queue *queue = &ctrl->queues[0]; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index c8541c3dcd19..56a315bd4d96 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -356,11 +356,11 @@ static void nvme_admin_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_i nvmeq->tags = NULL; } -static int nvme_admin_init_request(void *data, struct request *req, - unsigned int hctx_idx, unsigned int rq_idx, - unsigned int numa_node) +static int nvme_admin_init_request(struct blk_mq_tag_set *set, + struct request *req, unsigned int hctx_idx, + unsigned int numa_node) { - struct nvme_dev *dev = data; + struct nvme_dev *dev = set->driver_data; struct nvme_iod *iod = blk_mq_rq_to_pdu(req); struct nvme_queue *nvmeq = dev->queues[0]; @@ -383,11 +383,10 @@ static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, return 0; } -static int nvme_init_request(void *data, struct request *req, - unsigned int hctx_idx, unsigned int rq_idx, - unsigned int numa_node) +static int nvme_init_request(struct blk_mq_tag_set *set, struct request *req, + unsigned int hctx_idx, unsigned int numa_node) { - struct nvme_dev *dev = data; + struct nvme_dev *dev = set->driver_data; struct nvme_iod *iod = blk_mq_rq_to_pdu(req); struct nvme_queue *nvmeq = dev->queues[hctx_idx + 1]; diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 29cf88ac3f61..dd1c6deef82f 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -315,16 +315,16 @@ static void __nvme_rdma_exit_request(struct nvme_rdma_ctrl *ctrl, DMA_TO_DEVICE); } -static void nvme_rdma_exit_request(void *data, struct request *rq, - unsigned int hctx_idx, unsigned int rq_idx) +static void nvme_rdma_exit_request(struct blk_mq_tag_set *set, + struct request *rq, unsigned int hctx_idx) { - return __nvme_rdma_exit_request(data, rq, hctx_idx + 1); + return __nvme_rdma_exit_request(set->driver_data, rq, hctx_idx + 1); } -static void nvme_rdma_exit_admin_request(void *data, struct request *rq, - unsigned int hctx_idx, unsigned int rq_idx) +static void nvme_rdma_exit_admin_request(struct blk_mq_tag_set *set, + struct request *rq, unsigned int hctx_idx) { - return __nvme_rdma_exit_request(data, rq, 0); + return __nvme_rdma_exit_request(set->driver_data, rq, 0); } static int __nvme_rdma_init_request(struct nvme_rdma_ctrl *ctrl, @@ -358,18 +358,18 @@ out_free_qe: return -ENOMEM; } -static int nvme_rdma_init_request(void *data, struct request *rq, - unsigned int hctx_idx, unsigned int rq_idx, - unsigned int numa_node) +static int nvme_rdma_init_request(struct blk_mq_tag_set *set, + struct request *rq, unsigned int hctx_idx, + unsigned int numa_node) { - return __nvme_rdma_init_request(data, rq, hctx_idx + 1); + return __nvme_rdma_init_request(set->driver_data, rq, hctx_idx + 1); } -static int nvme_rdma_init_admin_request(void *data, struct request *rq, - unsigned int hctx_idx, unsigned int rq_idx, - unsigned int numa_node) +static int nvme_rdma_init_admin_request(struct blk_mq_tag_set *set, + struct request *rq, unsigned int hctx_idx, + unsigned int numa_node) { - return __nvme_rdma_init_request(data, rq, 0); + return __nvme_rdma_init_request(set->driver_data, rq, 0); } static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 304f1c87c160..feb497134aee 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -230,18 +230,19 @@ static int nvme_loop_init_iod(struct nvme_loop_ctrl *ctrl, return 0; } -static int nvme_loop_init_request(void *data, struct request *req, - unsigned int hctx_idx, unsigned int rq_idx, - unsigned int numa_node) +static int nvme_loop_init_request(struct blk_mq_tag_set *set, + struct request *req, unsigned int hctx_idx, + unsigned int numa_node) { - return nvme_loop_init_iod(data, blk_mq_rq_to_pdu(req), hctx_idx + 1); + return nvme_loop_init_iod(set->driver_data, blk_mq_rq_to_pdu(req), + hctx_idx + 1); } -static int nvme_loop_init_admin_request(void *data, struct request *req, - unsigned int hctx_idx, unsigned int rq_idx, - unsigned int numa_node) +static int nvme_loop_init_admin_request(struct blk_mq_tag_set *set, + struct request *req, unsigned int hctx_idx, + unsigned int numa_node) { - return nvme_loop_init_iod(data, blk_mq_rq_to_pdu(req), 0); + return nvme_loop_init_iod(set->driver_data, blk_mq_rq_to_pdu(req), 0); } static int nvme_loop_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 1c3e87d6c48f..327b10206d63 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1999,11 +1999,10 @@ static enum blk_eh_timer_return scsi_timeout(struct request *req, return scsi_times_out(req); } -static int scsi_init_request(void *data, struct request *rq, - unsigned int hctx_idx, unsigned int request_idx, - unsigned int numa_node) +static int scsi_init_request(struct blk_mq_tag_set *set, struct request *rq, + unsigned int hctx_idx, unsigned int numa_node) { - struct Scsi_Host *shost = data; + struct Scsi_Host *shost = set->driver_data; struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq); cmd->sense_buffer = @@ -2014,10 +2013,10 @@ static int scsi_init_request(void *data, struct request *rq, return 0; } -static void scsi_exit_request(void *data, struct request *rq, - unsigned int hctx_idx, unsigned int request_idx) +static void scsi_exit_request(struct blk_mq_tag_set *set, struct request *rq, + unsigned int hctx_idx) { - struct Scsi_Host *shost = data; + struct Scsi_Host *shost = set->driver_data; struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq); scsi_free_sense_buffer(shost, cmd->sense_buffer); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index f3e5e1de1bdb..a104832e7ae5 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -86,9 +86,9 @@ typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool); typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int); typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int); -typedef int (init_request_fn)(void *, struct request *, unsigned int, +typedef int (init_request_fn)(struct blk_mq_tag_set *set, struct request *, unsigned int, unsigned int); -typedef void (exit_request_fn)(void *, struct request *, unsigned int, +typedef void (exit_request_fn)(struct blk_mq_tag_set *set, struct request *, unsigned int); typedef int (reinit_request_fn)(void *, struct request *); -- cgit v1.2.3 From 7a148c2fcff83309748bfaafe121aa85b724624f Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 2 May 2017 07:28:02 +0800 Subject: block: don't call blk_mq_quiesce_queue() after queue is frozen After queue is frozen, no request in this queue can be in use at all, so there can't be any .queue_rq() running on this queue. It isn't necessary to call blk_mq_quiesce_queue() any more, so remove it in both elevator_switch_mq() and blk_mq_update_nr_requests(). Cc: Bart Van Assche Signed-off-by: Ming Lei Fixed up the description a bit. Signed-off-by: Jens Axboe --- block/blk-mq.c | 2 -- block/elevator.c | 3 --- 2 files changed, 5 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index b81e4a7cd7f2..e339247a2570 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2609,7 +2609,6 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr) return -EINVAL; blk_mq_freeze_queue(q); - blk_mq_quiesce_queue(q); ret = 0; queue_for_each_hw_ctx(q, hctx, i) { @@ -2635,7 +2634,6 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr) q->nr_requests = nr; blk_mq_unfreeze_queue(q); - blk_mq_start_stopped_hw_queues(q, true); return ret; } diff --git a/block/elevator.c b/block/elevator.c index 80f485451096..ab726a5c0bf6 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -950,7 +950,6 @@ static int elevator_switch_mq(struct request_queue *q, int ret; blk_mq_freeze_queue(q); - blk_mq_quiesce_queue(q); if (q->elevator) { if (q->elevator->registered) @@ -978,9 +977,7 @@ static int elevator_switch_mq(struct request_queue *q, out: blk_mq_unfreeze_queue(q); - blk_mq_start_stopped_hw_queues(q, true); return ret; - } /* -- cgit v1.2.3 From 994ff079e8f6399e1f8cd43141da0f79ce7a179a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 2 May 2017 09:53:04 -0600 Subject: mtip32xx: cleanup internal tag assumptions We don't decode the internal tag to the proper group or tag indx. This works fine because we have hard wired it as 0 for now, but could break if we get rid of that. Signed-off-by: Jens Axboe --- drivers/block/mtip32xx/mtip32xx.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 96fe6500e941..3204623f746a 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -847,16 +847,15 @@ static inline void mtip_process_legacy(struct driver_data *dd, u32 port_stat) struct mtip_port *port = dd->port; struct mtip_cmd *cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL); - if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags) && - (cmd != NULL) && !(readl(port->cmd_issue[MTIP_TAG_INTERNAL]) - & (1 << MTIP_TAG_INTERNAL))) { - if (cmd->comp_func) { - cmd->comp_func(port, MTIP_TAG_INTERNAL, cmd, 0); - return; + if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags) && cmd) { + int group = MTIP_TAG_INDEX(MTIP_TAG_INTERNAL); + int status = readl(port->cmd_issue[group]); + + if (!(status & (1 << MTIP_TAG_BIT(MTIP_TAG_INTERNAL)))) { + if (cmd->comp_func) + cmd->comp_func(port, MTIP_TAG_INTERNAL, cmd, 0); } } - - return; } /* @@ -1213,8 +1212,8 @@ static int mtip_exec_internal_command(struct mtip_port *port, goto exec_ic_exit; } - if (readl(port->cmd_issue[MTIP_TAG_INTERNAL]) - & (1 << MTIP_TAG_INTERNAL)) { + if (readl(port->cmd_issue[MTIP_TAG_INDEX(MTIP_TAG_INTERNAL)]) + & (1 << MTIP_TAG_BIT(MTIP_TAG_INTERNAL))) { rv = -ENXIO; if (!test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)) { mtip_device_reset(dd); -- cgit v1.2.3 From 6f63503c8af569fcd60bb27dfe740d13be0030f6 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 2 May 2017 09:56:00 -0600 Subject: mtip32xx: convert internal commands to regular block infrastructure Get rid of the private end_io handlers and data, and just use the regular block IO path for these requests. This removes a lot of redundant code. Signed-off-by: Jens Axboe --- drivers/block/mtip32xx/mtip32xx.c | 198 ++++++-------------------------------- drivers/block/mtip32xx/mtip32xx.h | 10 -- 2 files changed, 30 insertions(+), 178 deletions(-) diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 3204623f746a..3a779a4f5653 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -205,66 +205,12 @@ static struct mtip_cmd *mtip_get_int_command(struct driver_data *dd) return blk_mq_rq_to_pdu(rq); } -static void mtip_put_int_command(struct driver_data *dd, struct mtip_cmd *cmd) -{ - blk_put_request(blk_mq_rq_from_pdu(cmd)); -} - -/* - * Once we add support for one hctx per mtip group, this will change a bit - */ -static struct request *mtip_rq_from_tag(struct driver_data *dd, - unsigned int tag) -{ - struct blk_mq_hw_ctx *hctx = dd->queue->queue_hw_ctx[0]; - - return blk_mq_tag_to_rq(hctx->tags, tag); -} - static struct mtip_cmd *mtip_cmd_from_tag(struct driver_data *dd, unsigned int tag) { - struct request *rq = mtip_rq_from_tag(dd, tag); - - return blk_mq_rq_to_pdu(rq); -} - -/* - * IO completion function. - * - * This completion function is called by the driver ISR when a - * command that was issued by the kernel completes. It first calls the - * asynchronous completion function which normally calls back into the block - * layer passing the asynchronous callback data, then unmaps the - * scatter list associated with the completed command, and finally - * clears the allocated bit associated with the completed command. - * - * @port Pointer to the port data structure. - * @tag Tag of the command. - * @data Pointer to driver_data. - * @status Completion status. - * - * return value - * None - */ -static void mtip_async_complete(struct mtip_port *port, - int tag, struct mtip_cmd *cmd, int status) -{ - struct driver_data *dd = port->dd; - struct request *rq; - - if (unlikely(!dd) || unlikely(!port)) - return; - - if (unlikely(status == PORT_IRQ_TF_ERR)) { - dev_warn(&port->dd->pdev->dev, - "Command tag %d failed due to TFE\n", tag); - } - - rq = mtip_rq_from_tag(dd, tag); + struct blk_mq_hw_ctx *hctx = dd->queue->queue_hw_ctx[0]; - cmd->status = status; - blk_mq_complete_request(rq); + return blk_mq_rq_to_pdu(blk_mq_tag_to_rq(hctx->tags, tag)); } /* @@ -581,38 +527,19 @@ static void print_tags(struct driver_data *dd, "%d command(s) %s: tagmap [%s]", cnt, msg, tagmap); } -/* - * Internal command completion callback function. - * - * This function is normally called by the driver ISR when an internal - * command completed. This function signals the command completion by - * calling complete(). - * - * @port Pointer to the port data structure. - * @tag Tag of the command that has completed. - * @data Pointer to a completion structure. - * @status Completion status. - * - * return value - * None - */ -static void mtip_completion(struct mtip_port *port, - int tag, struct mtip_cmd *command, int status) -{ - struct completion *waiting = command->comp_data; - if (unlikely(status == PORT_IRQ_TF_ERR)) - dev_warn(&port->dd->pdev->dev, - "Internal command %d completed with TFE\n", tag); - - command->comp_func = NULL; - command->comp_data = NULL; - complete(waiting); -} - static int mtip_read_log_page(struct mtip_port *port, u8 page, u16 *buffer, dma_addr_t buffer_dma, unsigned int sectors); static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id, struct smart_attr *attrib); + +static void mtip_complete_command(struct mtip_cmd *cmd, int status) +{ + struct request *req = blk_mq_rq_from_pdu(cmd); + + cmd->status = status; + blk_mq_complete_request(req); +} + /* * Handle an error. * @@ -641,11 +568,7 @@ static void mtip_handle_tfe(struct driver_data *dd) if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) { cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL); dbg_printk(MTIP_DRV_NAME " TFE for the internal command\n"); - - if (cmd->comp_data && cmd->comp_func) { - cmd->comp_func(port, MTIP_TAG_INTERNAL, - cmd, PORT_IRQ_TF_ERR); - } + mtip_complete_command(cmd, -EIO); return; } @@ -672,19 +595,9 @@ static void mtip_handle_tfe(struct driver_data *dd) continue; cmd = mtip_cmd_from_tag(dd, tag); - if (likely(cmd->comp_func)) { - set_bit(tag, tagaccum); - cmd_cnt++; - cmd->comp_func(port, tag, cmd, 0); - } else { - dev_err(&port->dd->pdev->dev, - "Missing completion func for tag %d", - tag); - if (mtip_check_surprise_removal(dd->pdev)) { - /* don't proceed further */ - return; - } - } + mtip_complete_command(cmd, 0); + set_bit(tag, tagaccum); + cmd_cnt++; } } @@ -754,10 +667,7 @@ static void mtip_handle_tfe(struct driver_data *dd) tag, fail_reason != NULL ? fail_reason : "unknown"); - if (cmd->comp_func) { - cmd->comp_func(port, tag, - cmd, -ENODATA); - } + mtip_complete_command(cmd, -ENODATA); continue; } } @@ -780,12 +690,7 @@ static void mtip_handle_tfe(struct driver_data *dd) dev_warn(&port->dd->pdev->dev, "retiring tag %d\n", tag); - if (cmd->comp_func) - cmd->comp_func(port, tag, cmd, PORT_IRQ_TF_ERR); - else - dev_warn(&port->dd->pdev->dev, - "Bad completion for tag %d\n", - tag); + mtip_complete_command(cmd, -EIO); } } print_tags(dd, "reissued (TFE)", tagaccum, cmd_cnt); @@ -818,18 +723,7 @@ static inline void mtip_workq_sdbfx(struct mtip_port *port, int group, continue; command = mtip_cmd_from_tag(dd, tag); - if (likely(command->comp_func)) - command->comp_func(port, tag, command, 0); - else { - dev_dbg(&dd->pdev->dev, - "Null completion for tag %d", - tag); - - if (mtip_check_surprise_removal( - dd->pdev)) { - return; - } - } + mtip_complete_command(command, 0); } completed >>= 1; } @@ -851,10 +745,8 @@ static inline void mtip_process_legacy(struct driver_data *dd, u32 port_stat) int group = MTIP_TAG_INDEX(MTIP_TAG_INTERNAL); int status = readl(port->cmd_issue[group]); - if (!(status & (1 << MTIP_TAG_BIT(MTIP_TAG_INTERNAL)))) { - if (cmd->comp_func) - cmd->comp_func(port, MTIP_TAG_INTERNAL, cmd, 0); - } + if (!(status & (1 << MTIP_TAG_BIT(MTIP_TAG_INTERNAL)))) + mtip_complete_command(cmd, 0); } } @@ -863,7 +755,6 @@ static inline void mtip_process_legacy(struct driver_data *dd, u32 port_stat) */ static inline void mtip_process_errors(struct driver_data *dd, u32 port_stat) { - if (unlikely(port_stat & PORT_IRQ_CONNECT)) { dev_warn(&dd->pdev->dev, "Clearing PxSERR.DIAG.x\n"); @@ -990,8 +881,7 @@ static irqreturn_t mtip_irq_handler(int irq, void *instance) static void mtip_issue_non_ncq_command(struct mtip_port *port, int tag) { - writel(1 << MTIP_TAG_BIT(tag), - port->cmd_issue[MTIP_TAG_INDEX(tag)]); + writel(1 << MTIP_TAG_BIT(tag), port->cmd_issue[MTIP_TAG_INDEX(tag)]); } static bool mtip_pause_ncq(struct mtip_port *port, @@ -1120,7 +1010,6 @@ static int mtip_exec_internal_command(struct mtip_port *port, u32 opts, unsigned long timeout) { - DECLARE_COMPLETION_ONSTACK(wait); struct mtip_cmd *int_cmd; struct driver_data *dd = port->dd; struct request *rq; @@ -1145,7 +1034,7 @@ static int mtip_exec_internal_command(struct mtip_port *port, return -EFAULT; } rq = blk_mq_rq_from_pdu(int_cmd); - rq->end_io_data = &icmd; + rq->special = &icmd; set_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); @@ -1158,17 +1047,13 @@ static int mtip_exec_internal_command(struct mtip_port *port, /* wait for io to complete if non atomic */ if (mtip_quiesce_io(port, MTIP_QUIESCE_IO_TIMEOUT_MS) < 0) { dev_warn(&dd->pdev->dev, "Failed to quiesce IO\n"); - mtip_put_int_command(dd, int_cmd); + blk_mq_free_request(rq); clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); wake_up_interruptible(&port->svc_wait); return -EBUSY; } } - /* Set the completion function and data for the command. */ - int_cmd->comp_data = &wait; - int_cmd->comp_func = mtip_completion; - /* Copy the command to the command table */ memcpy(int_cmd->command, fis, fis_len*4); @@ -1176,11 +1061,9 @@ static int mtip_exec_internal_command(struct mtip_port *port, rq->timeout = timeout; /* insert request and run queue */ - blk_execute_rq_nowait(rq->q, NULL, rq, true, NULL); + blk_execute_rq(rq->q, NULL, rq, true); - wait_for_completion(&wait); rv = int_cmd->status; - if (rv < 0) { if (rv == -ERESTARTSYS) { /* interrupted */ dev_err(&dd->pdev->dev, @@ -1222,7 +1105,7 @@ static int mtip_exec_internal_command(struct mtip_port *port, } exec_ic_exit: /* Clear the allocated and active bits for the internal command. */ - mtip_put_int_command(dd, int_cmd); + blk_mq_free_request(rq); clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); if (rv >= 0 && mtip_pause_ncq(port, fis)) { /* NCQ paused */ @@ -2377,12 +2260,6 @@ static void mtip_hw_submit_io(struct driver_data *dd, struct request *rq, (nents << 16) | 5 | AHCI_CMD_PREFETCH); command->command_header->byte_count = 0; - /* - * Set the completion function and data for the command - * within this layer. - */ - command->comp_data = dd; - command->comp_func = mtip_async_complete; command->direction = dma_dir; /* @@ -3760,15 +3637,13 @@ static int mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx, struct request *rq) { struct driver_data *dd = hctx->queue->queuedata; - struct mtip_int_cmd *icmd = rq->end_io_data; + struct mtip_int_cmd *icmd = rq->special; struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); struct mtip_cmd_sg *command_sg; if (mtip_commands_active(dd->port)) return BLK_MQ_RQ_QUEUE_BUSY; - rq->end_io_data = NULL; - /* Populate the SG list */ cmd->command_header->opts = __force_bit2int cpu_to_le32(icmd->opts | icmd->fis_len); @@ -3856,9 +3731,7 @@ static enum blk_eh_timer_return mtip_cmd_timeout(struct request *req, struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req); cmd->status = -ETIME; - if (cmd->comp_func) - cmd->comp_func(dd->port, MTIP_TAG_INTERNAL, cmd, -ETIME); - goto exit_handler; + return BLK_EH_HANDLED; } if (test_bit(req->tag, dd->port->cmds_to_issue)) @@ -4086,21 +3959,10 @@ protocol_init_error: static void mtip_no_dev_cleanup(struct request *rq, void *data, bool reserv) { - struct driver_data *dd = (struct driver_data *)data; - struct mtip_cmd *cmd; - - if (likely(!reserv)) { - cmd = blk_mq_rq_to_pdu(rq); - cmd->status = -ENODEV; - blk_mq_complete_request(rq); - } else if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &dd->port->flags)) { + struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); - cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL); - cmd->status = -ENODEV; - if (cmd->comp_func) - cmd->comp_func(dd->port, MTIP_TAG_INTERNAL, - cmd, -ENODEV); - } + cmd->status = -ENODEV; + blk_mq_complete_request(rq); } /* diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h index 57b41528a824..37b8e3e0bb78 100644 --- a/drivers/block/mtip32xx/mtip32xx.h +++ b/drivers/block/mtip32xx/mtip32xx.h @@ -333,16 +333,6 @@ struct mtip_cmd { dma_addr_t command_dma; /* corresponding physical address */ - void *comp_data; /* data passed to completion function comp_func() */ - /* - * Completion function called by the ISR upon completion of - * a command. - */ - void (*comp_func)(struct mtip_port *port, - int tag, - struct mtip_cmd *cmd, - int status); - int scatter_ents; /* Number of scatter list entries used */ int unaligned; /* command is unaligned on 4k boundary */ -- cgit v1.2.3 From 2719aa217e0d025dbfce74ac777815776ccec072 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 3 May 2017 11:08:14 -0600 Subject: blk-mq: don't use sync workqueue flushing from drivers A previous commit introduced the sync flush, which we need from internal callers like blk_mq_quiesce_queue(). However, we also call the stop helpers from drivers, particularly from ->queue_rq() when we have to stop processing for a bit. We can't block from those locations, and we don't have to guarantee that we're fully flushed. Fixes: 9f993737906b ("blk-mq: unify hctx delayed_run_work and run_work") Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- block/blk-mq.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index e339247a2570..dec70ca0aafd 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -41,6 +41,7 @@ static LIST_HEAD(all_q_list); static void blk_mq_poll_stats_start(struct request_queue *q); static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb); +static void __blk_mq_stop_hw_queues(struct request_queue *q, bool sync); static int blk_mq_poll_stats_bkt(const struct request *rq) { @@ -166,7 +167,7 @@ void blk_mq_quiesce_queue(struct request_queue *q) unsigned int i; bool rcu = false; - blk_mq_stop_hw_queues(q); + __blk_mq_stop_hw_queues(q, true); queue_for_each_hw_ctx(q, hctx, i) { if (hctx->flags & BLK_MQ_F_BLOCKING) @@ -1218,20 +1219,34 @@ bool blk_mq_queue_stopped(struct request_queue *q) } EXPORT_SYMBOL(blk_mq_queue_stopped); -void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx) +static void __blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx, bool sync) { - cancel_delayed_work_sync(&hctx->run_work); + if (sync) + cancel_delayed_work_sync(&hctx->run_work); + else + cancel_delayed_work(&hctx->run_work); + set_bit(BLK_MQ_S_STOPPED, &hctx->state); } + +void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx) +{ + __blk_mq_stop_hw_queue(hctx, false); +} EXPORT_SYMBOL(blk_mq_stop_hw_queue); -void blk_mq_stop_hw_queues(struct request_queue *q) +void __blk_mq_stop_hw_queues(struct request_queue *q, bool sync) { struct blk_mq_hw_ctx *hctx; int i; queue_for_each_hw_ctx(q, hctx, i) - blk_mq_stop_hw_queue(hctx); + __blk_mq_stop_hw_queue(hctx, sync); +} + +void blk_mq_stop_hw_queues(struct request_queue *q) +{ + __blk_mq_stop_hw_queues(q, false); } EXPORT_SYMBOL(blk_mq_stop_hw_queues); -- cgit v1.2.3 From 2e13f33a2464fc3aa7783022a90309cabeca8935 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Wed, 3 May 2017 11:19:04 +0200 Subject: lightnvm: create cmd before allocating request MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Create nvme command before allocating a request using nvme_alloc_request, which uses the command direction. Up until now, the command has been zeroized, so all commands have been allocated as a read operation. Signed-off-by: Javier González Reviewed-by: Matias Bjørling Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/lightnvm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index e4e4e60b1224..8c4adac6fafc 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -503,6 +503,8 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd) if (!cmd) return -ENOMEM; + nvme_nvm_rqtocmd(rq, rqd, ns, cmd); + rq = nvme_alloc_request(q, (struct nvme_command *)cmd, 0, NVME_QID_ANY); if (IS_ERR(rq)) { kfree(cmd); @@ -517,8 +519,6 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd) rq->__data_len = 0; } - nvme_nvm_rqtocmd(rq, rqd, ns, cmd); - rq->end_io_data = rqd; blk_execute_rq_nowait(q, NULL, rq, 0, nvme_nvm_end_io); -- cgit v1.2.3 From 507f7d68fe5c24973dcd6e48f011bdfbd2197f01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Wed, 3 May 2017 11:19:05 +0200 Subject: lightnvm: fix bad back free on error path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Free memory correctly when an allocation fails on a loop and we free backwards previously successful allocations. Signed-off-by: Javier González Reviewed-by: Matias Bjørling Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/lightnvm/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index 54a06c3a2b8c..6a4aa608ad95 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -74,7 +74,7 @@ static int nvm_reserve_luns(struct nvm_dev *dev, int lun_begin, int lun_end) return 0; err: - while (--i > lun_begin) + while (--i >= lun_begin) clear_bit(i, dev->lun_map); return -EBUSY; @@ -211,7 +211,7 @@ static struct nvm_tgt_dev *nvm_create_tgt_dev(struct nvm_dev *dev, return tgt_dev; err_ch: - while (--i > 0) + while (--i >= 0) kfree(dev_map->chnls[i].lun_offs); kfree(luns); err_luns: -- cgit v1.2.3 From eabe06595d62cfa9278e2cd012df614bc68a7042 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 4 May 2017 15:05:26 +0200 Subject: block/mq: Cure cpu hotplug lock inversion By poking at /debug/sched_features I triggered the following splat: [] ====================================================== [] WARNING: possible circular locking dependency detected [] 4.11.0-00873-g964c8b7-dirty #694 Not tainted [] ------------------------------------------------------ [] bash/2109 is trying to acquire lock: [] (cpu_hotplug_lock.rw_sem){++++++}, at: [] static_key_slow_dec+0x1b/0x50 [] [] but task is already holding lock: [] (&sb->s_type->i_mutex_key#4){+++++.}, at: [] sched_feat_write+0x86/0x170 [] [] which lock already depends on the new lock. [] [] [] the existing dependency chain (in reverse order) is: [] [] -> #2 (&sb->s_type->i_mutex_key#4){+++++.}: [] lock_acquire+0x100/0x210 [] down_write+0x28/0x60 [] start_creating+0x5e/0xf0 [] debugfs_create_dir+0x13/0x110 [] blk_mq_debugfs_register+0x21/0x70 [] blk_mq_register_dev+0x64/0xd0 [] blk_register_queue+0x6a/0x170 [] device_add_disk+0x22d/0x440 [] loop_add+0x1f3/0x280 [] loop_init+0x104/0x142 [] do_one_initcall+0x43/0x180 [] kernel_init_freeable+0x1de/0x266 [] kernel_init+0xe/0x100 [] ret_from_fork+0x31/0x40 [] [] -> #1 (all_q_mutex){+.+.+.}: [] lock_acquire+0x100/0x210 [] __mutex_lock+0x6c/0x960 [] mutex_lock_nested+0x1b/0x20 [] blk_mq_init_allocated_queue+0x37c/0x4e0 [] blk_mq_init_queue+0x3a/0x60 [] loop_add+0xe5/0x280 [] loop_init+0x104/0x142 [] do_one_initcall+0x43/0x180 [] kernel_init_freeable+0x1de/0x266 [] kernel_init+0xe/0x100 [] ret_from_fork+0x31/0x40 [] *** DEADLOCK *** [] [] 3 locks held by bash/2109: [] #0: (sb_writers#11){.+.+.+}, at: [] vfs_write+0x17d/0x1a0 [] #1: (debugfs_srcu){......}, at: [] full_proxy_write+0x5d/0xd0 [] #2: (&sb->s_type->i_mutex_key#4){+++++.}, at: [] sched_feat_write+0x86/0x170 [] [] stack backtrace: [] CPU: 9 PID: 2109 Comm: bash Not tainted 4.11.0-00873-g964c8b7-dirty #694 [] Hardware name: Intel Corporation S2600GZ/S2600GZ, BIOS SE5C600.86B.02.02.0002.122320131210 12/23/2013 [] Call Trace: [] lock_acquire+0x100/0x210 [] get_online_cpus+0x2a/0x90 [] static_key_slow_dec+0x1b/0x50 [] static_key_disable+0x20/0x30 [] sched_feat_write+0x131/0x170 [] full_proxy_write+0x97/0xd0 [] __vfs_write+0x28/0x120 [] vfs_write+0xb5/0x1a0 [] SyS_write+0x49/0xa0 [] entry_SYSCALL_64_fastpath+0x23/0xc2 This is because of the cpu hotplug lock rework. Break the chain at #1 by reversing the lock acquisition order. This way i_mutex_key#4 no longer depends on cpu_hotplug_lock and things are good. Cc: Jens Axboe Signed-off-by: Peter Zijlstra (Intel) Signed-o