summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-01-13 10:45:01 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2011-01-13 10:45:01 -0800
commit275220f0fcff1adf28a717076e00f575edf05fda (patch)
treed249bccc80c64443dab211639050c4fb14332648
parentfe3c560b8a22cb28e54fe8950abef38e88d75831 (diff)
parent81c5e2ae33c4b19e53966b427e33646bf6811830 (diff)
Merge branch 'for-2.6.38/core' of git://git.kernel.dk/linux-2.6-block
* 'for-2.6.38/core' of git://git.kernel.dk/linux-2.6-block: (43 commits) block: ensure that completion error gets properly traced blktrace: add missing probe argument to block_bio_complete block cfq: don't use atomic_t for cfq_group block cfq: don't use atomic_t for cfq_queue block: trace event block fix unassigned field block: add internal hd part table references block: fix accounting bug on cross partition merges kref: add kref_test_and_get bio-integrity: mark kintegrityd_wq highpri and CPU intensive block: make kblockd_workqueue smarter Revert "sd: implement sd_check_events()" block: Clean up exit_io_context() source code. Fix compile warnings due to missing removal of a 'ret' variable fs/block: type signature of major_to_index(int) to major_to_index(unsigned) block: convert !IS_ERR(p) && p to !IS_ERR_NOR_NULL(p) cfq-iosched: don't check cfqg in choose_service_tree() fs/splice: Pull buf->ops->confirm() from splice_from_pipe actors cdrom: export cdrom_check_events() sd: implement sd_check_events() sr: implement sr_check_events() ...
-rw-r--r--Documentation/cgroups/blkio-controller.txt27
-rw-r--r--block/blk-cgroup.c4
-rw-r--r--block/blk-core.c40
-rw-r--r--block/blk-ioc.c5
-rw-r--r--block/blk-merge.c3
-rw-r--r--block/cfq-iosched.c112
-rw-r--r--block/genhd.c550
-rw-r--r--block/ioctl.c5
-rw-r--r--drivers/block/drbd/drbd_int.h2
-rw-r--r--drivers/block/drbd/drbd_main.c7
-rw-r--r--drivers/block/drbd/drbd_nl.c103
-rw-r--r--drivers/block/loop.c6
-rw-r--r--drivers/block/pktcdvd.c22
-rw-r--r--drivers/cdrom/cdrom.c56
-rw-r--r--drivers/char/raw.c14
-rw-r--r--drivers/md/dm-table.c20
-rw-r--r--drivers/md/dm.c6
-rw-r--r--drivers/md/md.c16
-rw-r--r--drivers/mtd/devices/block2mtd.c10
-rw-r--r--drivers/s390/block/dasd_genhd.c2
-rw-r--r--drivers/scsi/scsi_lib.c13
-rw-r--r--drivers/scsi/sd.c10
-rw-r--r--drivers/scsi/sr.c174
-rw-r--r--drivers/scsi/sr.h3
-rw-r--r--drivers/scsi/sr_ioctl.c2
-rw-r--r--drivers/usb/gadget/storage_common.c7
-rw-r--r--fs/bio-integrity.c7
-rw-r--r--fs/block_dev.c741
-rw-r--r--fs/btrfs/volumes.c28
-rw-r--r--fs/btrfs/volumes.h2
-rw-r--r--fs/char_dev.c2
-rw-r--r--fs/ext3/super.c12
-rw-r--r--fs/ext4/super.c12
-rw-r--r--fs/gfs2/ops_fstype.c8
-rw-r--r--fs/jfs/jfs_logmgr.c17
-rw-r--r--fs/logfs/dev_bdev.c7
-rw-r--r--fs/nfsd/vfs.c5
-rw-r--r--fs/nilfs2/super.c8
-rw-r--r--fs/ocfs2/cluster/heartbeat.c2
-rw-r--r--fs/partitions/check.c106
-rw-r--r--fs/reiserfs/journal.c21
-rw-r--r--fs/splice.c43
-rw-r--r--fs/super.c19
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c5
-rw-r--r--include/linux/blkdev.h5
-rw-r--r--include/linux/cdrom.h6
-rw-r--r--include/linux/fs.h26
-rw-r--r--include/linux/genhd.h45
-rw-r--r--include/scsi/scsi.h1
-rw-r--r--include/trace/events/block.h12
-rw-r--r--kernel/power/swap.c5
-rw-r--r--kernel/trace/blktrace.c37
-rw-r--r--mm/swapfile.c7
53 files changed, 1323 insertions, 1085 deletions
diff --git a/Documentation/cgroups/blkio-controller.txt b/Documentation/cgroups/blkio-controller.txt
index d6da611f8f63..4ed7b5ceeed2 100644
--- a/Documentation/cgroups/blkio-controller.txt
+++ b/Documentation/cgroups/blkio-controller.txt
@@ -89,6 +89,33 @@ Throttling/Upper Limit policy
Limits for writes can be put using blkio.write_bps_device file.
+Hierarchical Cgroups
+====================
+- Currently none of the IO control policy supports hierarhical groups. But
+ cgroup interface does allow creation of hierarhical cgroups and internally
+ IO policies treat them as flat hierarchy.
+
+ So this patch will allow creation of cgroup hierarhcy but at the backend
+ everything will be treated as flat. So if somebody created a hierarchy like
+ as follows.
+
+ root
+ / \
+ test1 test2
+ |
+ test3
+
+ CFQ and throttling will practically treat all groups at same level.
+
+ pivot
+ / | \ \
+ root test1 test2 test3
+
+ Down the line we can implement hierarchical accounting/control support
+ and also introduce a new cgroup file "use_hierarchy" which will control
+ whether cgroup hierarchy is viewed as flat or hierarchical by the policy..
+ This is how memory controller also has implemented the things.
+
Various user visible config options
===================================
CONFIG_BLK_CGROUP
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index b1febd0f6d2a..455768a3eb9e 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1452,10 +1452,6 @@ blkiocg_create(struct cgroup_subsys *subsys, struct cgroup *cgroup)
goto done;
}
- /* Currently we do not support hierarchy deeper than two level (0,1) */
- if (parent != cgroup->top_cgroup)
- return ERR_PTR(-EPERM);
-
blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
if (!blkcg)
return ERR_PTR(-ENOMEM);
diff --git a/block/blk-core.c b/block/blk-core.c
index 4ce953f1b390..2f4002f79a24 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -33,7 +33,7 @@
#include "blk.h"
-EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap);
+EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
@@ -64,13 +64,27 @@ static void drive_stat_acct(struct request *rq, int new_io)
return;
cpu = part_stat_lock();
- part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
- if (!new_io)
+ if (!new_io) {
+ part = rq->part;
part_stat_inc(cpu, part, merges[rw]);
- else {
+ } else {
+ part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
+ if (!hd_struct_try_get(part)) {
+ /*
+ * The partition is already being removed,
+ * the request will be accounted on the disk only
+ *
+ * We take a reference on disk->part0 although that
+ * partition will never be deleted, so we can treat
+ * it as any other partition.
+ */
+ part = &rq->rq_disk->part0;
+ hd_struct_get(part);
+ }
part_round_stats(cpu, part);
part_inc_in_flight(part, rw);
+ rq->part = part;
}
part_stat_unlock();
@@ -128,6 +142,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
rq->ref_count = 1;
rq->start_time = jiffies;
set_start_time_ns(rq);
+ rq->part = NULL;
}
EXPORT_SYMBOL(blk_rq_init);
@@ -1329,9 +1344,9 @@ static inline void blk_partition_remap(struct bio *bio)
bio->bi_sector += p->start_sect;
bio->bi_bdev = bdev->bd_contains;
- trace_block_remap(bdev_get_queue(bio->bi_bdev), bio,
- bdev->bd_dev,
- bio->bi_sector - p->start_sect);
+ trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio,
+ bdev->bd_dev,
+ bio->bi_sector - p->start_sect);
}
}
@@ -1500,7 +1515,7 @@ static inline void __generic_make_request(struct bio *bio)
goto end_io;
if (old_sector != -1)
- trace_block_remap(q, bio, old_dev, old_sector);
+ trace_block_bio_remap(q, bio, old_dev, old_sector);
old_sector = bio->bi_sector;
old_dev = bio->bi_bdev->bd_dev;
@@ -1776,7 +1791,7 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes)
int cpu;
cpu = part_stat_lock();
- part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
+ part = req->part;
part_stat_add(cpu, part, sectors[rw], bytes >> 9);
part_stat_unlock();
}
@@ -1796,13 +1811,14 @@ static void blk_account_io_done(struct request *req)
int cpu;
cpu = part_stat_lock();
- part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
+ part = req->part;
part_stat_inc(cpu, part, ios[rw]);
part_stat_add(cpu, part, ticks[rw], duration);
part_round_stats(cpu, part);
part_dec_in_flight(part, rw);
+ hd_struct_put(part);
part_stat_unlock();
}
}
@@ -2606,7 +2622,9 @@ int __init blk_dev_init(void)
BUILD_BUG_ON(__REQ_NR_BITS > 8 *
sizeof(((struct request *)0)->cmd_flags));
- kblockd_workqueue = create_workqueue("kblockd");
+ /* used for unplugging and affects IO latency/throughput - HIGHPRI */
+ kblockd_workqueue = alloc_workqueue("kblockd",
+ WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
if (!kblockd_workqueue)
panic("Failed to create kblockd\n");
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 3c7a339fe381..b791022beef3 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -64,7 +64,7 @@ static void cfq_exit(struct io_context *ioc)
rcu_read_unlock();
}
-/* Called by the exitting task */
+/* Called by the exiting task */
void exit_io_context(struct task_struct *task)
{
struct io_context *ioc;
@@ -74,10 +74,9 @@ void exit_io_context(struct task_struct *task)
task->io_context = NULL;
task_unlock(task);
- if (atomic_dec_and_test(&ioc->nr_tasks)) {
+ if (atomic_dec_and_test(&ioc->nr_tasks))
cfq_exit(ioc);
- }
put_io_context(ioc);
}
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 74bc4a768f32..ea85e20d5e94 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -351,11 +351,12 @@ static void blk_account_io_merge(struct request *req)
int cpu;
cpu = part_stat_lock();
- part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
+ part = req->part;
part_round_stats(cpu, part);
part_dec_in_flight(part, rq_data_dir(req));
+ hd_struct_put(part);
part_stat_unlock();
}
}
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 78ee4b1d4e85..8427697c5437 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -87,7 +87,6 @@ struct cfq_rb_root {
unsigned count;
unsigned total_weight;
u64 min_vdisktime;
- struct rb_node *active;
};
#define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, .left = NULL, \
.count = 0, .min_vdisktime = 0, }
@@ -97,7 +96,7 @@ struct cfq_rb_root {
*/
struct cfq_queue {
/* reference count */
- atomic_t ref;
+ int ref;
/* various state flags, see below */
unsigned int flags;
/* parent cfq_data */
@@ -180,7 +179,6 @@ struct cfq_group {
/* group service_tree key */
u64 vdisktime;
unsigned int weight;
- bool on_st;
/* number of cfqq currently on this group */
int nr_cfqq;
@@ -209,7 +207,7 @@ struct cfq_group {
struct blkio_group blkg;
#ifdef CONFIG_CFQ_GROUP_IOSCHED
struct hlist_node cfqd_node;
- atomic_t ref;
+ int ref;
#endif
/* number of requests that are on the dispatch list or inside driver */
int dispatched;
@@ -563,11 +561,6 @@ static void update_min_vdisktime(struct cfq_rb_root *st)
u64 vdisktime = st->min_vdisktime;
struct cfq_group *cfqg;
- if (st->active) {
- cfqg = rb_entry_cfqg(st->active);
- vdisktime = cfqg->vdisktime;
- }
-
if (st->left) {
cfqg = rb_entry_cfqg(st->left);
vdisktime = min_vdisktime(vdisktime, cfqg->vdisktime);
@@ -646,11 +639,11 @@ cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
static inline bool cfq_slice_used(struct cfq_queue *cfqq)
{
if (cfq_cfqq_slice_new(cfqq))
- return 0;
+ return false;
if (time_before(jiffies, cfqq->slice_end))
- return 0;
+ return false;
- return 1;
+ return true;
}
/*
@@ -869,7 +862,7 @@ cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
struct rb_node *n;
cfqg->nr_cfqq++;
- if (cfqg->on_st)
+ if (!RB_EMPTY_NODE(&cfqg->rb_node))
return;
/*
@@ -885,7 +878,6 @@ cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
cfqg->vdisktime = st->min_vdisktime;
__cfq_group_service_tree_add(st, cfqg);
- cfqg->on_st = true;
st->total_weight += cfqg->weight;
}
@@ -894,9 +886,6 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
{
struct cfq_rb_root *st = &cfqd->grp_service_tree;
- if (st->active == &cfqg->rb_node)
- st->active = NULL;
-
BUG_ON(cfqg->nr_cfqq < 1);
cfqg->nr_cfqq--;
@@ -905,7 +894,6 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
return;
cfq_log_cfqg(cfqd, cfqg, "del_from_rr group");
- cfqg->on_st = false;
st->total_weight -= cfqg->weight;
if (!RB_EMPTY_NODE(&cfqg->rb_node))
cfq_rb_erase(&cfqg->rb_node, st);
@@ -1026,7 +1014,7 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
* elevator which will be dropped by either elevator exit
* or cgroup deletion path depending on who is exiting first.
*/
- atomic_set(&cfqg->ref, 1);
+ cfqg->ref = 1;
/*
* Add group onto cgroup list. It might happen that bdi->dev is
@@ -1071,7 +1059,7 @@ static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create)
static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg)
{
- atomic_inc(&cfqg->ref);
+ cfqg->ref++;
return cfqg;
}
@@ -1083,7 +1071,7 @@ static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg)
cfqq->cfqg = cfqg;
/* cfqq reference on cfqg */
- atomic_inc(&cfqq->cfqg->ref);
+ cfqq->cfqg->ref++;
}
static void cfq_put_cfqg(struct cfq_group *cfqg)
@@ -1091,11 +1079,12 @@ static void cfq_put_cfqg(struct cfq_group *cfqg)
struct cfq_rb_root *st;
int i, j;
- BUG_ON(atomic_read(&cfqg->ref) <= 0);
- if (!atomic_dec_and_test(&cfqg->ref))
+ BUG_ON(cfqg->ref <= 0);
+ cfqg->ref--;
+ if (cfqg->ref)
return;
for_each_cfqg_st(cfqg, i, j, st)
- BUG_ON(!RB_EMPTY_ROOT(&st->rb) || st->active != NULL);
+ BUG_ON(!RB_EMPTY_ROOT(&st->rb));
kfree(cfqg);
}
@@ -1200,7 +1189,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
cfq_group_service_tree_del(cfqd, cfqq->cfqg);
cfqq->orig_cfqg = cfqq->cfqg;
cfqq->cfqg = &cfqd->root_group;
- atomic_inc(&cfqd->root_group.ref);
+ cfqd->root_group.ref++;
group_changed = 1;
} else if (!cfqd->cfq_group_isolation
&& cfqq_type(cfqq) == SYNC_WORKLOAD && cfqq->orig_cfqg) {
@@ -1687,9 +1676,6 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
if (cfqq == cfqd->active_queue)
cfqd->active_queue = NULL;
- if (&cfqq->cfqg->rb_node == cfqd->grp_service_tree.active)
- cfqd->grp_service_tree.active = NULL;
-
if (cfqd->active_cic) {
put_io_context(cfqd->active_cic->ioc);
cfqd->active_cic = NULL;
@@ -1901,10 +1887,10 @@ static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq)
* in their service tree.
*/
if (service_tree->count == 1 && cfq_cfqq_sync(cfqq))
- return 1;
+ return true;
cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d",
service_tree->count);
- return 0;
+ return false;
}
static void cfq_arm_slice_timer(struct cfq_data *cfqd)
@@ -2040,7 +2026,7 @@ static int cfqq_process_refs(struct cfq_queue *cfqq)
int process_refs, io_refs;
io_refs = cfqq->allocated[READ] + cfqq->allocated[WRITE];
- process_refs = atomic_read(&cfqq->ref) - io_refs;
+ process_refs = cfqq->ref - io_refs;
BUG_ON(process_refs < 0);
return process_refs;
}
@@ -2080,10 +2066,10 @@ static void cfq_setup_merge(struct cfq_queue *cfqq, struct cfq_queue *new_cfqq)
*/
if (new_process_refs >= process_refs) {
cfqq->new_cfqq = new_cfqq;
- atomic_add(process_refs, &new_cfqq->ref);
+ new_cfqq->ref += process_refs;
} else {
new_cfqq->new_cfqq = cfqq;
- atomic_add(new_process_refs, &cfqq->ref);
+ cfqq->ref += new_process_refs;
}
}
@@ -2116,12 +2102,7 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
unsigned count;
struct cfq_rb_root *st;
unsigned group_slice;
-
- if (!cfqg) {
- cfqd->serving_prio = IDLE_WORKLOAD;
- cfqd->workload_expires = jiffies + 1;
- return;
- }
+ enum wl_prio_t original_prio = cfqd->serving_prio;
/* Choose next priority. RT > BE > IDLE */
if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg))
@@ -2134,6 +2115,9 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
return;
}
+ if (original_prio != cfqd->serving_prio)
+ goto new_workload;
+
/*
* For RT and BE, we have to choose also the type
* (SYNC, SYNC_NOIDLE, ASYNC), and to compute a workload
@@ -2148,6 +2132,7 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
if (count && !time_after(jiffies, cfqd->workload_expires))
return;
+new_workload:
/* otherwise select new workload type */
cfqd->serving_type =
cfq_choose_wl(cfqd, cfqg, cfqd->serving_prio);
@@ -2199,7 +2184,6 @@ static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd)
if (RB_EMPTY_ROOT(&st->rb))
return NULL;
cfqg = cfq_rb_first_group(st);
- st->active = &cfqg->rb_node;
update_min_vdisktime(st);
return cfqg;
}
@@ -2293,6 +2277,17 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
goto keep_queue;
}
+ /*
+ * This is a deep seek queue, but the device is much faster than
+ * the queue can deliver, don't idle
+ **/
+ if (CFQQ_SEEKY(cfqq) && cfq_cfqq_idle_window(cfqq) &&
+ (cfq_cfqq_slice_new(cfqq) ||
+ (cfqq->slice_end - jiffies > jiffies - cfqq->slice_start))) {
+ cfq_clear_cfqq_deep(cfqq);
+ cfq_clear_cfqq_idle_window(cfqq);
+ }
+
if (cfqq->dispatched && cfq_should_idle(cfqd, cfqq)) {
cfqq = NULL;
goto keep_queue;
@@ -2367,12 +2362,12 @@ static inline bool cfq_slice_used_soon(struct cfq_data *cfqd,
{
/* the queue hasn't finished any request, can't estimate */
if (cfq_cfqq_slice_new(cfqq))
- return 1;
+ return true;
if (time_after(jiffies + cfqd->cfq_slice_idle * cfqq->dispatched,
cfqq->slice_end))
- return 1;
+ return true;
- return 0;
+ return false;
}
static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq)
@@ -2538,9 +2533,10 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
struct cfq_data *cfqd = cfqq->cfqd;
struct cfq_group *cfqg, *orig_cfqg;
- BUG_ON(atomic_read(&cfqq->ref) <= 0);
+ BUG_ON(cfqq->ref <= 0);
- if (!atomic_dec_and_test(&cfqq->ref))
+ cfqq->ref--;
+ if (cfqq->ref)
return;
cfq_log_cfqq(cfqd, cfqq, "put_queue");
@@ -2843,7 +2839,7 @@ static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
RB_CLEAR_NODE(&cfqq->p_node);
INIT_LIST_HEAD(&cfqq->fifo);
- atomic_set(&cfqq->ref, 0);
+ cfqq->ref = 0;
cfqq->cfqd = cfqd;
cfq_mark_cfqq_prio_changed(cfqq);
@@ -2979,11 +2975,11 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc,
* pin the queue now that it's allocated, scheduler exit will prune it
*/
if (!is_sync && !(*async_cfqq)) {
- atomic_inc(&cfqq->ref);
+ cfqq->ref++;
*async_cfqq = cfqq;
}
- atomic_inc(&cfqq->ref);
+ cfqq->ref++;
return cfqq;
}
@@ -3265,6 +3261,10 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq))
return true;
+ /* An idle queue should not be idle now for some reason */
+ if (RB_EMPTY_ROOT(&cfqq->sort_list) && !cfq_should_idle(cfqd, cfqq))
+ return true;
+
if (!cfqd->active_cic || !cfq_cfqq_wait_request(cfqq))
return false;
@@ -3681,13 +3681,13 @@ new_queue:
}
cfqq->allocated[rw]++;
- atomic_inc(&cfqq->ref);
-
- spin_unlock_irqrestore(q->queue_lock, flags);
-
+ cfqq->ref++;
rq->elevator_private = cic;
rq->elevator_private2 = cfqq;
rq->elevator_private3 = cfq_ref_get_cfqg(cfqq->cfqg);
+
+ spin_unlock_irqrestore(q->queue_lock, flags);
+
return 0;
queue_fail:
@@ -3862,6 +3862,10 @@ static void *cfq_init_queue(struct request_queue *q)
if (!cfqd)
return NULL;
+ /*
+ * Don't need take queue_lock in the routine, since we are
+ * initializing the ioscheduler, and nobody is using cfqd
+ */
cfqd->cic_index = i;
/* Init root service tree */
@@ -3881,7 +3885,7 @@ static void *cfq_init_queue(struct request_queue *q)
* Take a reference to root group which we never drop. This is just
* to make sure that cfq_put_cfqg() does not try to kfree root group
*/
- atomic_set(&cfqg->ref, 1);
+ cfqg->ref = 1;
rcu_read_lock();
cfq_blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg,
(void *)cfqd, 0);
@@ -3901,7 +3905,7 @@ static void *cfq_init_queue(struct request_queue *q)
* will not attempt to free it.
*/
cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0);
- atomic_inc(&cfqd->oom_cfqq.ref);
+ cfqd->oom_cfqq.ref++;
cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, &cfqd->root_group);
INIT_LIST_HEAD(&cfqd->cic_list);
diff --git a/block/genhd.c b/block/genhd.c
index 5fa2b44a72ff..6a5b772aa201 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -18,6 +18,7 @@
#include <linux/buffer_head.h>
#include <linux/mutex.h>
#include <linux/idr.h>
+#include <linux/log2.h>
#include "blk.h"
@@ -35,6 +36,10 @@ static DEFINE_IDR(ext_devt_idr);
static struct device_type disk_type;
+static void disk_add_events(struct gendisk *disk);
+static void disk_del_events(struct gendisk *disk);
+static void disk_release_events(struct gendisk *disk);
+
/**
* disk_get_part - get partition
* @disk: disk to look partition from
@@ -239,7 +244,7 @@ static struct blk_major_name {
} *major_names[BLKDEV_MAJOR_HASH_SIZE];
/* index in the above - for now: assume no multimajor ranges */
-static inline int major_to_index(int major)
+static inline int major_to_index(unsigned major)
{
return major % BLKDEV_MAJOR_HASH_SIZE;
}
@@ -502,6 +507,64 @@ static int exact_lock(dev_t devt, void *data)
return 0;
}
+void register_disk(struct gendisk *disk)
+{
+ struct device *ddev = disk_to_dev(disk);
+ struct block_device *bdev;
+ struct disk_part_iter piter;
+ struct hd_struct *part;
+ int err;
+
+ ddev->parent = disk->driverfs_dev;
+
+ dev_set_name(ddev, disk->disk_name);
+
+ /* delay uevents, until we scanned partition table */
+ dev_set_uevent_suppress(ddev, 1);
+
+ if (device_add(ddev))
+ return;
+ if (!sysfs_deprecated) {
+ err = sysfs_create_link(block_depr, &ddev->kobj,
+ kobject_name(&ddev->kobj));
+ if (err) {
+ device_del(ddev);
+ return;
+ }
+ }
+ disk->part0.holder_dir = kobject_create_and_add("holders", &ddev->kobj);
+ disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
+
+ /* No minors to use for partitions */
+ if (!disk_partitionable(disk))
+ goto exit;
+
+ /* No such device (e.g., media were just removed) */
+ if (!get_capacity(disk))
+ goto exit;
+
+ bdev = bdget_disk(disk, 0);
+ if (!bdev)
+ goto exit;
+
+ bdev->bd_invalidated = 1;
+ err = blkdev_get(bdev, FMODE_READ, NULL);
+ if (err < 0)
+ goto exit;
+ blkdev_put(bdev, FMODE_READ);
+
+exit:
+ /* announce disk after possible partitions are created */
+ dev_set_uevent_suppress(ddev, 0);
+ kobject_uevent(&ddev->kobj, KOBJ_ADD);
+
+ /* announce possible partitions */
+ disk_part_iter_init(&piter, disk, 0);
+ while ((part = disk_part_iter_next(&piter)))
+ kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD);
+ disk_part_iter_exit(&piter);
+}
+
/**
* add_disk - add partitioning information to kernel list
* @disk: per-device partitioning information
@@ -551,18 +614,48 @@ void add_disk(struct gendisk *disk)
retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj,
"bdi");
WARN_ON(retval);
-}
+ disk_add_events(disk);
+}
EXPORT_SYMBOL(add_disk);
-EXPORT_SYMBOL(del_gendisk); /* in partitions/check.c */
-void unlink_gendisk(struct gendisk *disk)
+void del_gendisk(struct gendisk *disk)
{
+ struct disk_part_iter piter;
+ struct hd_struct *part;
+
+ disk_del_events(disk);
+
+ /* invalidate stuff */
+ disk_part_iter_init(&piter, disk,
+ DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
+ while ((part = disk_part_iter_next(&piter))) {
+ invalidate_partition(disk, part->partno);
+ delete_partition(disk, part->partno);
+ }
+ disk_part_iter_exit(&piter);
+
+ invalidate_partition(disk, 0);
+ blk_free_devt(disk_to_dev(disk)->devt);