diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-12-13 14:22:26 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-12-13 14:22:26 -0800 |
commit | 9ea18f8cab5f1c36cdd0f09717e35ceb48c36a87 (patch) | |
tree | 0c8da7ac47cb59fe39f177ab0407f554aff77194 /drivers | |
parent | caf292ae5bb9d57198ce001d8b762f7abae3a94d (diff) | |
parent | 849c6e7746e4f6317ace6aa7d2fcdcd844e99ddb (diff) |
Merge branch 'for-3.19/drivers' of git://git.kernel.dk/linux-block
Pull block layer driver updates from Jens Axboe:
- NVMe updates:
- The blk-mq conversion from Matias (and others)
- A stack of NVMe bug fixes from the nvme tree, mostly from Keith.
- Various bug fixes from me, fixing issues in both the blk-mq
conversion and generic bugs.
- Abort and CPU online fix from Sam.
- Hot add/remove fix from Indraneel.
- A couple of drbd fixes from the drbd team (Andreas, Lars, Philipp)
- With the generic IO stat accounting from 3.19/core, converting md,
bcache, and rsxx to use those. From Gu Zheng.
- Boundary check for queue/irq mode for null_blk from Matias. Fixes
cases where invalid values could be given, causing the device to hang.
- The xen blkfront pull request, with two bug fixes from Vitaly.
* 'for-3.19/drivers' of git://git.kernel.dk/linux-block: (56 commits)
NVMe: fix race condition in nvme_submit_sync_cmd()
NVMe: fix retry/error logic in nvme_queue_rq()
NVMe: Fix FS mount issue (hot-remove followed by hot-add)
NVMe: fix error return checking from blk_mq_alloc_request()
NVMe: fix freeing of wrong request in abort path
xen/blkfront: remove redundant flush_op
xen/blkfront: improve protection against issuing unsupported REQ_FUA
NVMe: Fix command setup on IO retry
null_blk: boundary check queue_mode and irqmode
block/rsxx: use generic io stats accounting functions to simplify io stat accounting
md: use generic io stats accounting functions to simplify io stat accounting
drbd: use generic io stats accounting functions to simplify io stat accounting
md/bcache: use generic io stats accounting functions to simplify io stat accounting
NVMe: Update module version major number
NVMe: fail pci initialization if the device doesn't have any BARs
NVMe: add ->exit_hctx() hook
NVMe: make setup work for devices that don't do INTx
NVMe: enable IO stats by default
NVMe: nvme_submit_async_admin_req() must use atomic rq allocation
NVMe: replace blk_put_request() with blk_mq_free_request()
...
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/block/drbd/drbd_actlog.c | 3 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_int.h | 39 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_main.c | 23 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_nl.c | 64 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_receiver.c | 2 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_req.c | 25 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_state.c | 42 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_state.h | 5 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_worker.c | 5 | ||||
-rw-r--r-- | drivers/block/null_blk.c | 42 | ||||
-rw-r--r-- | drivers/block/nvme-core.c | 1594 | ||||
-rw-r--r-- | drivers/block/nvme-scsi.c | 162 | ||||
-rw-r--r-- | drivers/block/rsxx/dev.c | 29 | ||||
-rw-r--r-- | drivers/block/xen-blkfront.c | 65 | ||||
-rw-r--r-- | drivers/md/bcache/request.c | 23 | ||||
-rw-r--r-- | drivers/md/dm.c | 13 | ||||
-rw-r--r-- | drivers/md/md.c | 6 |
17 files changed, 952 insertions, 1190 deletions
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index a2dfa169237d..1318e3217cb0 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -827,8 +827,7 @@ static int update_sync_bits(struct drbd_device *device, * */ int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size, - enum update_sync_bits_mode mode, - const char *file, const unsigned int line) + enum update_sync_bits_mode mode) { /* Is called from worker and receiver context _only_ */ unsigned long sbnr, ebnr, lbnr; diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 9b22f8f01b57..b905e9888b88 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1454,7 +1454,6 @@ extern int is_valid_ar_handle(struct drbd_request *, sector_t); /* drbd_nl.c */ -extern int drbd_msg_put_info(struct sk_buff *skb, const char *info); extern void drbd_suspend_io(struct drbd_device *device); extern void drbd_resume_io(struct drbd_device *device); extern char *ppsize(char *buf, unsigned long long size); @@ -1558,52 +1557,31 @@ extern void drbd_set_recv_tcq(struct drbd_device *device, int tcq_enabled); extern void _drbd_clear_done_ee(struct drbd_device *device, struct list_head *to_be_freed); extern int drbd_connected(struct drbd_peer_device *); -/* Yes, there is kernel_setsockopt, but only since 2.6.18. - * So we have our own copy of it here. */ -static inline int drbd_setsockopt(struct socket *sock, int level, int optname, - char *optval, int optlen) -{ - mm_segment_t oldfs = get_fs(); - char __user *uoptval; - int err; - - uoptval = (char __user __force *)optval; - - set_fs(KERNEL_DS); - if (level == SOL_SOCKET) - err = sock_setsockopt(sock, level, optname, uoptval, optlen); - else - err = sock->ops->setsockopt(sock, level, optname, uoptval, - optlen); - set_fs(oldfs); - return err; -} - static inline void drbd_tcp_cork(struct socket *sock) { int val = 1; - (void) drbd_setsockopt(sock, SOL_TCP, TCP_CORK, + (void) kernel_setsockopt(sock, SOL_TCP, TCP_CORK, (char*)&val, sizeof(val)); } static inline void drbd_tcp_uncork(struct socket *sock) { int val = 0; - (void) drbd_setsockopt(sock, SOL_TCP, TCP_CORK, + (void) kernel_setsockopt(sock, SOL_TCP, TCP_CORK, (char*)&val, sizeof(val)); } static inline void drbd_tcp_nodelay(struct socket *sock) { int val = 1; - (void) drbd_setsockopt(sock, SOL_TCP, TCP_NODELAY, + (void) kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char*)&val, sizeof(val)); } static inline void drbd_tcp_quickack(struct socket *sock) { int val = 2; - (void) drbd_setsockopt(sock, SOL_TCP, TCP_QUICKACK, + (void) kernel_setsockopt(sock, SOL_TCP, TCP_QUICKACK, (char*)&val, sizeof(val)); } @@ -1662,14 +1640,13 @@ extern void drbd_advance_rs_marks(struct drbd_device *device, unsigned long stil enum update_sync_bits_mode { RECORD_RS_FAILED, SET_OUT_OF_SYNC, SET_IN_SYNC }; extern int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size, - enum update_sync_bits_mode mode, - const char *file, const unsigned int line); + enum update_sync_bits_mode mode); #define drbd_set_in_sync(device, sector, size) \ - __drbd_change_sync(device, sector, size, SET_IN_SYNC, __FILE__, __LINE__) + __drbd_change_sync(device, sector, size, SET_IN_SYNC) #define drbd_set_out_of_sync(device, sector, size) \ - __drbd_change_sync(device, sector, size, SET_OUT_OF_SYNC, __FILE__, __LINE__) + __drbd_change_sync(device, sector, size, SET_OUT_OF_SYNC) #define drbd_rs_failed_io(device, sector, size) \ - __drbd_change_sync(device, sector, size, RECORD_RS_FAILED, __FILE__, __LINE__) + __drbd_change_sync(device, sector, size, RECORD_RS_FAILED) extern void drbd_al_shrink(struct drbd_device *device); extern int drbd_initialize_al(struct drbd_device *, void *); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 973c185c9cfe..1fc83427199c 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2532,10 +2532,6 @@ int set_resource_options(struct drbd_resource *resource, struct res_opts *res_op if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) return -ENOMEM; - /* - retcode = ERR_NOMEM; - drbd_msg_put_info("unable to allocate cpumask"); - */ /* silently ignore cpu mask on UP kernel */ if (nr_cpu_ids > 1 && res_opts->cpu_mask[0] != 0) { @@ -2731,7 +2727,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig device = minor_to_device(minor); if (device) - return ERR_MINOR_EXISTS; + return ERR_MINOR_OR_VOLUME_EXISTS; /* GFP_KERNEL, we are outside of all write-out paths */ device = kzalloc(sizeof(struct drbd_device), GFP_KERNEL); @@ -2793,20 +2789,16 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig id = idr_alloc(&drbd_devices, device, minor, minor + 1, GFP_KERNEL); if (id < 0) { - if (id == -ENOSPC) { - err = ERR_MINOR_EXISTS; - drbd_msg_put_info(adm_ctx->reply_skb, "requested minor exists already"); - } + if (id == -ENOSPC) + err = ERR_MINOR_OR_VOLUME_EXISTS; goto out_no_minor_idr; } kref_get(&device->kref); id = idr_alloc(&resource->devices, device, vnr, vnr + 1, GFP_KERNEL); if (id < 0) { - if (id == -ENOSPC) { - err = ERR_MINOR_EXISTS; - drbd_msg_put_info(adm_ctx->reply_skb, "requested minor exists already"); - } + if (id == -ENOSPC) + err = ERR_MINOR_OR_VOLUME_EXISTS; goto out_idr_remove_minor; } kref_get(&device->kref); @@ -2825,10 +2817,8 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig id = idr_alloc(&connection->peer_devices, peer_device, vnr, vnr + 1, GFP_KERNEL); if (id < 0) { - if (id == -ENOSPC) { + if (id == -ENOSPC) err = ERR_INVALID_REQUEST; - drbd_msg_put_info(adm_ctx->reply_skb, "requested volume exists already"); - } goto out_idr_remove_from_resource; } kref_get(&connection->kref); @@ -2836,7 +2826,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig if (init_submitter(device)) { err = ERR_NOMEM; - drbd_msg_put_info(adm_ctx->reply_skb, "unable to create submit workqueue"); goto out_idr_remove_vol; } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 1cd47df44bda..74df8cfad414 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -92,7 +92,7 @@ static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info) /* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only * reason it could fail was no space in skb, and there are 4k available. */ -int drbd_msg_put_info(struct sk_buff *skb, const char *info) +static int drbd_msg_put_info(struct sk_buff *skb, const char *info) { struct nlattr *nla; int err = -EMSGSIZE; @@ -588,7 +588,7 @@ drbd_set_role(struct drbd_device *const device, enum drbd_role new_role, int for val.i = 0; val.role = new_role; while (try++ < max_tries) { - rv = _drbd_request_state(device, mask, val, CS_WAIT_COMPLETE); + rv = _drbd_request_state_holding_state_mutex(device, mask, val, CS_WAIT_COMPLETE); /* in case we first succeeded to outdate, * but now suddenly could establish a connection */ @@ -2052,7 +2052,7 @@ check_net_options(struct drbd_connection *connection, struct net_conf *new_net_c rv = _check_net_options(connection, rcu_dereference(connection->net_conf), new_net_conf); rcu_read_unlock(); - /* connection->volumes protected by genl_lock() here */ + /* connection->peer_devices protected by genl_lock() here */ idr_for_each_entry(&connection->peer_devices, peer_device, i) { struct drbd_device *device = peer_device->device; if (!device->bitmap) { @@ -3483,7 +3483,7 @@ int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info) * that first_peer_device(device)->connection and device->vnr match the request. */ if (adm_ctx.device) { if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) - retcode = ERR_MINOR_EXISTS; + retcode = ERR_MINOR_OR_VOLUME_EXISTS; /* else: still NO_ERROR */ goto out; } @@ -3530,6 +3530,27 @@ out: return 0; } +static int adm_del_resource(struct drbd_resource *resource) +{ + struct drbd_connection *connection; + + for_each_connection(connection, resource) { + if (connection->cstate > C_STANDALONE) + return ERR_NET_CONFIGURED; + } + if (!idr_is_empty(&resource->devices)) + return ERR_RES_IN_USE; + + list_del_rcu(&resource->resources); + /* Make sure all threads have actually stopped: state handling only + * does drbd_thread_stop_nowait(). */ + list_for_each_entry(connection, &resource->connections, connections) + drbd_thread_stop(&connection->worker); + synchronize_rcu(); + drbd_free_resource(resource); + return NO_ERROR; +} + int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) { struct drbd_config_context adm_ctx; @@ -3575,14 +3596,6 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) } } - /* If we reach this, all volumes (of this connection) are Secondary, - * Disconnected, Diskless, aka Unconfigured. Make sure all threads have - * actually stopped, state handling only does drbd_thread_stop_nowait(). */ - for_each_connection(connection, resource) - drbd_thread_stop(&connection->worker); - - /* Now, nothing can fail anymore */ - /* delete volumes */ idr_for_each_entry(&resource->devices, device, i) { retcode = adm_del_minor(device); @@ -3593,10 +3606,7 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) } } - list_del_rcu(&resource->resources); - synchronize_rcu(); - drbd_free_resource(resource); - retcode = NO_ERROR; + retcode = adm_del_resource(resource); out: mutex_unlock(&resource->adm_mutex); finish: @@ -3608,7 +3618,6 @@ int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info) { struct drbd_config_context adm_ctx; struct drbd_resource *resource; - struct drbd_connection *connection; enum drbd_ret_code retcode; retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE); @@ -3616,27 +3625,10 @@ int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info) return retcode; if (retcode != NO_ERROR) goto finish; - resource = adm_ctx.resource; - mutex_lock(&resource->adm_mutex); - for_each_connection(connection, resource) { - if (connection->cstate > C_STANDALONE) { - retcode = ERR_NET_CONFIGURED; - goto out; - } - } - if (!idr_is_empty(&resource->devices)) { - retcode = ERR_RES_IN_USE; - goto out; - } - list_del_rcu(&resource->resources); - for_each_connection(connection, resource) - drbd_thread_stop(&connection->worker); - synchronize_rcu(); - drbd_free_resource(resource); - retcode = NO_ERROR; -out: + mutex_lock(&resource->adm_mutex); + retcode = adm_del_resource(resource); mutex_unlock(&resource->adm_mutex); finish: drbd_adm_finish(&adm_ctx, info, retcode); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 6960fb064731..d169b4a79267 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2482,7 +2482,7 @@ bool drbd_rs_c_min_rate_throttle(struct drbd_device *device) atomic_read(&device->rs_sect_ev); if (atomic_read(&device->ap_actlog_cnt) - || !device->rs_last_events || curr_events - device->rs_last_events > 64) { + || curr_events - device->rs_last_events > 64) { unsigned long rs_left; int i; diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 5a01c53dddeb..34f2f0ba409b 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -36,29 +36,15 @@ static bool drbd_may_do_local_read(struct drbd_device *device, sector_t sector, /* Update disk stats at start of I/O request */ static void _drbd_start_io_acct(struct drbd_device *device, struct drbd_request *req) { - const int rw = bio_data_dir(req->master_bio); - int cpu; - cpu = part_stat_lock(); - part_round_stats(cpu, &device->vdisk->part0); - part_stat_inc(cpu, &device->vdisk->part0, ios[rw]); - part_stat_add(cpu, &device->vdisk->part0, sectors[rw], req->i.size >> 9); - (void) cpu; /* The macro invocations above want the cpu argument, I do not like - the compiler warning about cpu only assigned but never used... */ - part_inc_in_flight(&device->vdisk->part0, rw); - part_stat_unlock(); + generic_start_io_acct(bio_data_dir(req->master_bio), req->i.size >> 9, + &device->vdisk->part0); } /* Update disk stats when completing request upwards */ static void _drbd_end_io_acct(struct drbd_device *device, struct drbd_request *req) { - int rw = bio_data_dir(req->master_bio); - unsigned long duration = jiffies - req->start_jif; - int cpu; - cpu = part_stat_lock(); - part_stat_add(cpu, &device->vdisk->part0, ticks[rw], duration); - part_round_stats(cpu, &device->vdisk->part0); - part_dec_in_flight(&device->vdisk->part0, rw); - part_stat_unlock(); + generic_end_io_acct(bio_data_dir(req->master_bio), + &device->vdisk->part0, req->start_jif); } static struct drbd_request *drbd_req_new(struct drbd_device *device, @@ -1545,6 +1531,7 @@ int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct struct request_queue * const b = device->ldev->backing_bdev->bd_disk->queue; if (b->merge_bvec_fn) { + bvm->bi_bdev = device->ldev->backing_bdev; backing_limit = b->merge_bvec_fn(b, bvm, bvec); limit = min(limit, backing_limit); } @@ -1628,7 +1615,7 @@ void request_timer_fn(unsigned long data) time_after(now, req_peer->pre_send_jif + ent) && !time_in_range(now, connection->last_reconnect_jif, connection->last_reconnect_jif + ent)) { drbd_warn(device, "Remote failed to finish a request within ko-count * timeout\n"); - _drbd_set_state(_NS(device, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL); + _conn_request_state(connection, NS(conn, C_TIMEOUT), CS_VERBOSE | CS_HARD); } if (dt && oldest_submit_jif != now && time_after(now, oldest_submit_jif + dt) && diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 84b11f887d73..2d7dd269b6a8 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -215,6 +215,18 @@ static bool no_peer_wf_report_params(struct drbd_connection *connection) return rv; } +static void wake_up_all_devices(struct drbd_connection *connection) +{ + struct drbd_peer_device *peer_device; + int vnr; + + rcu_read_lock(); + idr_for_each_entry(&connection->peer_devices, peer_device, vnr) + wake_up(&peer_device->device->state_wait); + rcu_read_unlock(); + +} + /** * cl_wide_st_chg() - true if the state change is a cluster wide one @@ -410,6 +422,22 @@ _drbd_request_state(struct drbd_device *device, union drbd_state mask, return rv; } +enum drbd_state_rv +_drbd_request_state_holding_state_mutex(struct drbd_device *device, union drbd_state mask, + union drbd_state val, enum chg_state_flags f) +{ + enum drbd_state_rv rv; + + BUG_ON(f & CS_SERIALIZE); + + wait_event_cmd(device->state_wait, + (rv = drbd_req_state(device, mask, val, f)) != SS_IN_TRANSIENT_STATE, + mutex_unlock(device->state_mutex), + mutex_lock(device->state_mutex)); + + return rv; +} + static void print_st(struct drbd_device *device, const char *name, union drbd_state ns) { drbd_err(device, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c%c%c }\n", @@ -629,14 +657,11 @@ is_valid_soft_transition(union drbd_state os, union drbd_state ns, struct drbd_c if (ns.conn == C_DISCONNECTING && os.conn == C_UNCONNECTED) rv = SS_IN_TRANSIENT_STATE; - /* if (ns.conn == os.conn && ns.conn == C_WF_REPORT_PARAMS) - rv = SS_IN_TRANSIENT_STATE; */ - /* While establishing a connection only allow cstate to change. - Delay/refuse role changes, detach attach etc... */ + Delay/refuse role changes, detach attach etc... (they do not touch cstate) */ if (test_bit(STATE_SENT, &connection->flags) && - !(os.conn == C_WF_REPORT_PARAMS || - (ns.conn == C_WF_REPORT_PARAMS && os.conn == C_WF_CONNECTION))) + !((ns.conn == C_WF_REPORT_PARAMS && os.conn == C_WF_CONNECTION) || + (ns.conn >= C_CONNECTED && os.conn == C_WF_REPORT_PARAMS))) rv = SS_IN_TRANSIENT_STATE; if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED) @@ -1032,8 +1057,10 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns, /* Wake up role changes, that were delayed because of connection establishing */ if (os.conn == C_WF_REPORT_PARAMS && ns.conn != C_WF_REPORT_PARAMS && - no_peer_wf_report_params(connection)) + no_peer_wf_report_params(connection)) { clear_bit(STATE_SENT, &connection->flags); + wake_up_all_devices(connection); + } wake_up(&device->misc_wait); wake_up(&device->state_wait); @@ -1072,7 +1099,6 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns, set_ov_position(device, ns.conn); device->rs_start = now; - device->rs_last_events = 0; device->rs_last_sect_ev = 0; device->ov_last_oos_size = 0; device->ov_last_oos_start = 0; diff --git a/drivers/block/drbd/drbd_state.h b/drivers/block/drbd/drbd_state.h index cc41605ba21c..7f53c40823cd 100644 --- a/drivers/block/drbd/drbd_state.h +++ b/drivers/block/drbd/drbd_state.h @@ -117,6 +117,11 @@ extern enum drbd_state_rv _drbd_request_state(struct drbd_device *, union drbd_state, union drbd_state, enum chg_state_flags); + +extern enum drbd_state_rv +_drbd_request_state_holding_state_mutex(struct drbd_device *, union drbd_state, + union drbd_state, enum chg_state_flags); + extern enum drbd_state_rv __drbd_set_state(struct drbd_device *, union drbd_state, enum chg_state_flags, struct completion *done); diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index d2d1f97511bd..d0fae55d871d 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1592,11 +1592,15 @@ void drbd_resync_after_changed(struct drbd_device *device) void drbd_rs_controller_reset(struct drbd_device *device) { + struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk; struct fifo_buffer *plan; atomic_set(&device->rs_sect_in, 0); atomic_set(&device->rs_sect_ev, 0); device->rs_in_flight = 0; + device->rs_last_events = + (int)part_stat_read(&disk->part0, sectors[0]) + + (int)part_stat_read(&disk->part0, sectors[1]); /* Updating the RCU protected object in place is necessary since this function gets called from atomic context. @@ -1743,7 +1747,6 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) device->rs_failed = 0; device->rs_paused = 0; device->rs_same_csum = 0; - device->rs_last_events = 0; device->rs_last_sect_ev = 0; device->rs_total = tw; device->rs_start = now; diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index caa61212fdb5..ae9f615382f6 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -78,7 +78,33 @@ module_param(home_node, int, S_IRUGO); MODULE_PARM_DESC(home_node, "Home node for the device"); static int queue_mode = NULL_Q_MQ; -module_param(queue_mode, int, S_IRUGO); + +static int null_param_store_val(const char *str, int *val, int min, int max) +{ + int ret, new_val; + + ret = kstrtoint(str, 10, &new_val); + if (ret) + return -EINVAL; + + if (new_val < min || new_val > max) + return -EINVAL; + + *val = new_val; + return 0; +} + +static int null_set_queue_mode(const char *str, const struct kernel_param *kp) +{ + return null_param_store_val(str, &queue_mode, NULL_Q_BIO, NULL_Q_MQ); +} + +static struct kernel_param_ops null_queue_mode_param_ops = { + .set = null_set_queue_mode, + .get = param_get_int, +}; + +device_param_cb(queue_mode, &null_queue_mode_param_ops, &queue_mode, S_IRUGO); MODULE_PARM_DESC(queue_mode, "Block interface to use (0=bio,1=rq,2=multiqueue)"); static int gb = 250; @@ -94,7 +120,19 @@ module_param(nr_devices, int, S_IRUGO); MODULE_PARM_DESC(nr_devices, "Number of devices to register"); static int irqmode = NULL_IRQ_SOFTIRQ; -module_param(irqmode, int, S_IRUGO); + +static int null_set_irqmode(const char *str, const struct kernel_param *kp) +{ + return null_param_store_val(str, &irqmode, NULL_IRQ_NONE, + NULL_IRQ_TIMER); +} + +static struct kernel_param_ops null_irqmode_param_ops = { + .set = null_set_irqmode, + .get = param_get_int, +}; + +device_param_cb(irqmode, &null_irqmode_param_ops, &irqmode, S_IRUGO); MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, 2-timer"); static int completion_nsec = 10000; diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index e2bb8afbeae5..b1d5d8797315 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -13,9 +13,9 @@ */ #include <linux/nvme.h> -#include <linux/bio.h> #include <linux/bitops.h> #include <linux/blkdev.h> +#include <linux/blk-mq.h> #include <linux/cpu.h> #include <linux/delay.h> #include <linux/errno.h> @@ -33,7 +33,6 @@ #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/pci.h> -#include <linux/percpu.h> #include <linux/poison.h> #include <linux/ptrace.h> #include <linux/sched.h> @@ -42,12 +41,12 @@ #include <scsi/sg.h> #include <asm-generic/io-64-nonatomic-lo-hi.h> -#include <trace/events/block.h> - #define NVME_Q_DEPTH 1024 +#define NVME_AQ_DEPTH 64 #define SQ_SIZE(depth) (depth * sizeof(struct nvme_command)) #define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion)) #define ADMIN_TIMEOUT (admin_timeout * HZ) +#define SHUTDOWN_TIMEOUT (shutdown_timeout * HZ) #define IOD_TIMEOUT (retry_time * HZ) static unsigned char admin_timeout = 60; @@ -62,6 +61,10 @@ static unsigned char retry_time = 30; module_param(retry_time, byte, 0644); MODULE_PARM_DESC(retry_time, "time in seconds to retry failed I/O"); +static unsigned char shutdown_timeout = 5; +module_param(shutdown_timeout, byte, 0644); +MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown"); + static int nvme_major; module_param(nvme_major, int, 0); @@ -76,10 +79,12 @@ static wait_queue_head_t nvme_kthread_wait; static struct notifier_block nvme_nb; static void nvme_reset_failed_dev(struct work_struct *ws); +static int nvme_process_cq(struct nvme_queue *nvmeq); struct async_cmd_info { struct kthread_work work; struct kthread_worker *worker; + struct request *req; u32 result; int status; void *ctx; @@ -90,7 +95,7 @@ struct async_cmd_info { * commands and one for I/O commands). */ struct nvme_queue { - struct rcu_head r_head; + struct llist_node node; struct device *q_dmadev; struct nvme_dev *dev; char irqname[24]; /* nvme4294967295-65535\0 */ @@ -99,10 +104,6 @@ struct nvme_queue { volatile struct nvme_completion *cqes; dma_addr_t sq_dma_addr; dma_addr_t cq_dma_addr; - wait_queue_head_t sq_full; - wait_queue_t sq_cong_wait; - struct bio_list sq_cong; - struct list_head iod_bio; u32 __iomem *q_db; u16 q_depth; u16 cq_vector; @@ -112,10 +113,8 @@ struct nvme_queue { u16 qid; u8 cq_phase; u8 cqe_seen; - u8 q_suspended; - cpumask_var_t cpu_mask; struct async_cmd_info cmdinfo; - unsigned long cmdid_data[]; + struct blk_mq_hw_ctx *hctx; }; /* @@ -143,62 +142,79 @@ typedef void (*nvme_completion_fn)(struct nvme_queue *, void *, struct nvme_cmd_info { nvme_completion_fn fn; void *ctx; - unsigned long timeout; int aborted; + struct nvme_queue *nvmeq; }; -static struct nvme_cmd_info *nvme_cmd_info(struct nvme_queue *nvmeq) +static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, + unsigned int hctx_idx) { - return (void *)&nvmeq->cmdid_data[BITS_TO_LONGS(nvmeq->q_depth)]; + struct nvme_dev *dev = data; + struct nvme_queue *nvmeq = dev->queues[0]; + + WARN_ON(nvmeq->hctx); + nvmeq->hctx = hctx; + hctx->driver_data = nvmeq; + return 0; } -static unsigned nvme_queue_extra(int depth) +static int nvme_admin_init_request(void *data, struct request *req, + unsigned int hctx_idx, unsigned int rq_idx, + unsigned int numa_node) { - return DIV_ROUND_UP(depth, 8) + (depth * sizeof(struct nvme_cmd_info)); + struct nvme_dev *dev = data; + struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req); + struct nvme_queue *nvmeq = dev->queues[0]; + + BUG_ON(!nvmeq); + cmd->nvmeq = nvmeq; + return 0; } -/** - * alloc_cmdid() - Allocate a Command ID - * @nvmeq: The queue that will be used for this command - * @ctx: A pointer that will be passed to the handler - * @handler: The function to call on completion - * - * Allocate a Command ID for a queue. The data passed in will - * be passed to the completion handler. This is implemented by using - * the bottom two bits of the ctx pointer to store the handler ID. - * Passing in a pointer that's not 4-byte aligned will cause a BUG. - * We can change this if it becomes a problem. - * - * May be called with local interrupts disabled and the q_lock held, - * or with interrupts enabled and no locks held. - */ -static int alloc_cmdid(struct nvme_queue *nvmeq, void *ctx, - nvme_completion_fn handler, unsigned timeout) +static void nvme_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) { - int depth = nvmeq->q_depth - 1; - struct nvme_cmd_info *info = nvme_cmd_info(nvmeq); - int cmdid; + struct nvme_queue *nvmeq = hctx->driver_data; - do { - cmdid = find_first_zero_bit(nvmeq->cmdid_data, depth); - if (cmdid >= depth) - return -EBUSY; - } while (test_and_set_bit(cmdid, nvmeq->cmdid_data)); + nvmeq->hctx = NULL; +} + +static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, + unsigned int hctx_idx) +{ + struct nvme_dev *dev = data; + struct nvme_queue *nvmeq = dev->queues[ + (hctx_idx % dev->queue_count) + 1]; + + if (!nvmeq->hctx) + nvmeq->hctx = hctx; - info[cmdid].fn = handler; - info[cmdid].ctx = ctx; - info[cmdid].timeout = jiffies + timeout; - info[cmdid].aborted = 0; - return cmdid; + /* nvmeq queues are shared between namespaces. We assume here that + * blk-mq map the tags so they match up with the nvme queue tags. */ + WARN_ON(nvmeq->hctx->tags != hctx->tags); + + hctx->driver_data = nvmeq; + return 0; } -static int alloc_cmdid_killable(struct nvme_queue *nvmeq, void *ctx, - nvme_completion_fn handler, unsigned timeout) +static int nvme_init_request(void *data, struct request *req, + unsigned int hctx_idx, unsigned int rq_idx, + unsigned int numa_node) { - int cmdid; - wait_event_killable(nvmeq->sq_full, - (cmdid = alloc_cmdid(nvmeq, ctx, handler, timeout)) >= 0); - return (cmdid < 0) ? -EINTR : cmdid; + struct nvme_dev *dev = data; + struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req); + struct nvme_queue *nvmeq = dev->queues[hctx_idx + 1]; + + BUG_ON(!nvmeq); + cmd->nvmeq = nvmeq; + return 0; +} + +static void nvme_set_info(struct nvme_cmd_info *cmd, void *ctx, + nvme_completion_fn handler) +{ + cmd->fn = handler; + cmd->ctx = ctx; + cmd->aborted = 0; } /* Special values must be less than 0x1000 */ @@ -206,17 +222,12 @@ static int alloc_cmdid_killable(struct nvme_queue *nvmeq, void *ctx, #define CMD_CTX_CANCELLED (0x30C + CMD_CTX_BASE) #define CMD_CTX_COMPLETED (0x310 + CMD_CTX_BASE) #define CMD_CTX_INVALID (0x314 + CMD_CTX_BASE) -#define CMD_CTX_ABORT (0x318 + CMD_CTX_BASE) static void special_completion(struct nvme_queue *nvmeq, void *ctx, struct nvme_completion *cqe) { if (ctx == CMD_CTX_CANCELLED) return; - if (ctx == CMD_CTX_ABORT) { - ++nvmeq->dev->abort_limit; - return; - } if (ctx == CMD_CTX_COMPLETED) { dev_warn(nvmeq->q_dmadev, "completed id %d twice on queue %d\n", @@ -229,99 +240,89 @@ static void special_completion(struct nvme_queue *nvmeq, void *ctx, cqe->command_id, le16_to_cpup(&cqe->sq_id)); return; } - dev_warn(nvmeq->q_dmadev, "Unknown special completion %p\n", ctx); } -static void async_completion(struct nvme_queue *nvmeq, void *ctx, - struct nvme_completion *cqe) -{ - struct async_cmd_info *cmdinfo = ctx; - cmdinfo->result = le32_to_cpup(&cqe->result); - cmdinfo->status = le16_to_cpup(&cqe->status) >> 1; - queue_kthread_work(cmdinfo->worker, &cmdinfo->work); -} - -/* - * Called with local interrupts disabled and the q_lock held. May not sleep. - */ -static void *free_cmdid(struct nvme_queue *nvmeq, int cmdid, - nvme_completion_fn *fn) +static void *cancel_cmd_info(struct nvme_cmd_info *cmd, nvme_completion_fn *fn) { void *ctx; - struct nvme_cmd_info *info = nvme_cmd_info(nvmeq); - if (cmdid >= nvmeq->q_depth || !info[cmdid].fn) { - if (fn) - *fn = special_completion; - return CMD_CTX_INVALID; - } if (fn) - *fn = info[cmdid].fn; - ctx = info[cmdid].ctx; - info[cmdid].fn = special_completion; - info[cmdid].ctx = CMD_CTX_COMPLETED; - clear_bit(cmdid, nvmeq->cmdid_data); - wake_up(&nvmeq->sq_full); + *fn = cmd->fn; + ctx = cmd->ctx; + cmd->fn = special_completion; + cmd->ctx = CMD_CTX_CANCELLED; return ctx; } -static void *cancel_cmdid(struct nvme_queue *nvmeq, int cmdid, - nvme_comple |