summaryrefslogtreecommitdiffstats
path: root/fs/block_dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/block_dev.c')
-rw-r--r--fs/block_dev.c755
1 files changed, 265 insertions, 490 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 9e84b1928b94..9e56ee1f2652 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -32,6 +32,7 @@
#include <linux/cleancache.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/falloc.h>
+#include <linux/part_stat.h>
#include <linux/uaccess.h>
#include <linux/suspend.h>
#include "internal.h"
@@ -110,24 +111,20 @@ EXPORT_SYMBOL(invalidate_bdev);
int truncate_bdev_range(struct block_device *bdev, fmode_t mode,
loff_t lstart, loff_t lend)
{
- struct block_device *claimed_bdev = NULL;
- int err;
-
/*
* If we don't hold exclusive handle for the device, upgrade to it
* while we discard the buffer cache to avoid discarding buffers
* under live filesystem.
*/
if (!(mode & FMODE_EXCL)) {
- claimed_bdev = bdev->bd_contains;
- err = bd_prepare_to_claim(bdev, claimed_bdev,
- truncate_bdev_range);
+ int err = bd_prepare_to_claim(bdev, truncate_bdev_range);
if (err)
return err;
}
+
truncate_inode_pages_range(bdev->bd_inode->i_mapping, lstart, lend);
- if (claimed_bdev)
- bd_abort_claiming(bdev, claimed_bdev, truncate_bdev_range);
+ if (!(mode & FMODE_EXCL))
+ bd_abort_claiming(bdev, truncate_bdev_range);
return 0;
}
EXPORT_SYMBOL(truncate_bdev_range);
@@ -548,55 +545,47 @@ EXPORT_SYMBOL(fsync_bdev);
* count down in thaw_bdev(). When it becomes 0, thaw_bdev() will unfreeze
* actually.
*/
-struct super_block *freeze_bdev(struct block_device *bdev)
+int freeze_bdev(struct block_device *bdev)
{
struct super_block *sb;
int error = 0;
mutex_lock(&bdev->bd_fsfreeze_mutex);
- if (++bdev->bd_fsfreeze_count > 1) {
- /*
- * We don't even need to grab a reference - the first call
- * to freeze_bdev grab an active reference and only the last
- * thaw_bdev drops it.
- */
- sb = get_super(bdev);
- if (sb)
- drop_super(sb);
- mutex_unlock(&bdev->bd_fsfreeze_mutex);
- return sb;
- }
+ if (++bdev->bd_fsfreeze_count > 1)
+ goto done;
sb = get_active_super(bdev);
if (!sb)
- goto out;
+ goto sync;
if (sb->s_op->freeze_super)
error = sb->s_op->freeze_super(sb);
else
error = freeze_super(sb);
+ deactivate_super(sb);
+
if (error) {
- deactivate_super(sb);
bdev->bd_fsfreeze_count--;
- mutex_unlock(&bdev->bd_fsfreeze_mutex);
- return ERR_PTR(error);
+ goto done;
}
- deactivate_super(sb);
- out:
+ bdev->bd_fsfreeze_sb = sb;
+
+sync:
sync_blockdev(bdev);
+done:
mutex_unlock(&bdev->bd_fsfreeze_mutex);
- return sb; /* thaw_bdev releases s->s_umount */
+ return error;
}
EXPORT_SYMBOL(freeze_bdev);
/**
* thaw_bdev -- unlock filesystem
* @bdev: blockdevice to unlock
- * @sb: associated superblock
*
* Unlocks the filesystem and marks it writeable again after freeze_bdev().
*/
-int thaw_bdev(struct block_device *bdev, struct super_block *sb)
+int thaw_bdev(struct block_device *bdev)
{
+ struct super_block *sb;
int error = -EINVAL;
mutex_lock(&bdev->bd_fsfreeze_mutex);
@@ -607,6 +596,7 @@ int thaw_bdev(struct block_device *bdev, struct super_block *sb)
if (--bdev->bd_fsfreeze_count > 0)
goto out;
+ sb = bdev->bd_fsfreeze_sb;
if (!sb)
goto out;
@@ -792,23 +782,19 @@ static struct inode *bdev_alloc_inode(struct super_block *sb)
static void bdev_free_inode(struct inode *inode)
{
+ struct block_device *bdev = I_BDEV(inode);
+
+ free_percpu(bdev->bd_stats);
+ kfree(bdev->bd_meta_info);
+
kmem_cache_free(bdev_cachep, BDEV_I(inode));
}
-static void init_once(void *foo)
+static void init_once(void *data)
{
- struct bdev_inode *ei = (struct bdev_inode *) foo;
- struct block_device *bdev = &ei->bdev;
+ struct bdev_inode *ei = data;
- memset(bdev, 0, sizeof(*bdev));
- mutex_init(&bdev->bd_mutex);
-#ifdef CONFIG_SYSFS
- INIT_LIST_HEAD(&bdev->bd_holder_disks);
-#endif
- bdev->bd_bdi = &noop_backing_dev_info;
inode_init_once(&ei->vfs_inode);
- /* Initialize mutex for freeze. */
- mutex_init(&bdev->bd_fsfreeze_mutex);
}
static void bdev_evict_inode(struct inode *inode)
@@ -870,72 +856,72 @@ void __init bdev_cache_init(void)
blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */
}
-/*
- * Most likely _very_ bad one - but then it's hardly critical for small
- * /dev and can be fixed when somebody will need really large one.
- * Keep in mind that it will be fed through icache hash function too.
- */
-static inline unsigned long hash(dev_t dev)
+struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
{
- return MAJOR(dev)+MINOR(dev);
-}
+ struct block_device *bdev;
+ struct inode *inode;
-static int bdev_test(struct inode *inode, void *data)
-{
- return BDEV_I(inode)->bdev.bd_dev == *(dev_t *)data;
+ inode = new_inode(blockdev_superblock);
+ if (!inode)
+ return NULL;
+ inode->i_mode = S_IFBLK;
+ inode->i_rdev = 0;
+ inode->i_data.a_ops = &def_blk_aops;
+ mapping_set_gfp_mask(&inode->i_data, GFP_USER);
+
+ bdev = I_BDEV(inode);
+ memset(bdev, 0, sizeof(*bdev));
+ mutex_init(&bdev->bd_mutex);
+ mutex_init(&bdev->bd_fsfreeze_mutex);
+ spin_lock_init(&bdev->bd_size_lock);
+ bdev->bd_disk = disk;
+ bdev->bd_partno = partno;
+ bdev->bd_inode = inode;
+ bdev->bd_bdi = &noop_backing_dev_info;
+#ifdef CONFIG_SYSFS
+ INIT_LIST_HEAD(&bdev->bd_holder_disks);
+#endif
+ bdev->bd_stats = alloc_percpu(struct disk_stats);
+ if (!bdev->bd_stats) {
+ iput(inode);
+ return NULL;
+ }
+ return bdev;
}
-static int bdev_set(struct inode *inode, void *data)
+void bdev_add(struct block_device *bdev, dev_t dev)
{
- BDEV_I(inode)->bdev.bd_dev = *(dev_t *)data;
- return 0;
+ bdev->bd_dev = dev;
+ bdev->bd_inode->i_rdev = dev;
+ bdev->bd_inode->i_ino = dev;
+ insert_inode_hash(bdev->bd_inode);
}
static struct block_device *bdget(dev_t dev)
{
- struct block_device *bdev;
struct inode *inode;
- inode = iget5_locked(blockdev_superblock, hash(dev),
- bdev_test, bdev_set, &dev);
-
+ inode = ilookup(blockdev_superblock, dev);
if (!inode)
return NULL;
-
- bdev = &BDEV_I(inode)->bdev;
-
- if (inode->i_state & I_NEW) {
- spin_lock_init(&bdev->bd_size_lock);
- bdev->bd_contains = NULL;
- bdev->bd_super = NULL;
- bdev->bd_inode = inode;
- bdev->bd_part_count = 0;
- inode->i_mode = S_IFBLK;
- inode->i_rdev = dev;
- inode->i_bdev = bdev;
- inode->i_data.a_ops = &def_blk_aops;
- mapping_set_gfp_mask(&inode->i_data, GFP_USER);
- unlock_new_inode(inode);
- }
- return bdev;
+ return &BDEV_I(inode)->bdev;
}
/**
* bdgrab -- Grab a reference to an already referenced block device
* @bdev: Block device to grab a reference to.
+ *
+ * Returns the block_device with an additional reference when successful,
+ * or NULL if the inode is already beeing freed.
*/
struct block_device *bdgrab(struct block_device *bdev)
{
- ihold(bdev->bd_inode);
+ if (!igrab(bdev->bd_inode))
+ return NULL;
return bdev;
}
EXPORT_SYMBOL(bdgrab);
-struct block_device *bdget_part(struct hd_struct *part)
-{
- return bdget(part_devt(part));
-}
-
long nr_blockdev_pages(void)
{
struct inode *inode;
@@ -953,67 +939,8 @@ void bdput(struct block_device *bdev)
{
iput(bdev->bd_inode);
}
-
EXPORT_SYMBOL(bdput);
-static struct block_device *bd_acquire(struct inode *inode)
-{
- struct block_device *bdev;
-
- spin_lock(&bdev_lock);
- bdev = inode->i_bdev;
- if (bdev && !inode_unhashed(bdev->bd_inode)) {
- bdgrab(bdev);
- spin_unlock(&bdev_lock);
- return bdev;
- }
- spin_unlock(&bdev_lock);
-
- /*
- * i_bdev references block device inode that was already shut down
- * (corresponding device got removed). Remove the reference and look
- * up block device inode again just in case new device got
- * reestablished under the same device number.
- */
- if (bdev)
- bd_forget(inode);
-
- bdev = bdget(inode->i_rdev);
- if (bdev) {
- spin_lock(&bdev_lock);
- if (!inode->i_bdev) {
- /*
- * We take an additional reference to bd_inode,
- * and it's released in clear_inode() of inode.
- * So, we can access it via ->i_mapping always
- * without igrab().
- */
- bdgrab(bdev);
- inode->i_bdev = bdev;
- inode->i_mapping = bdev->bd_inode->i_mapping;
- }
- spin_unlock(&bdev_lock);
- }
- return bdev;
-}
-
-/* Call when you free inode */
-
-void bd_forget(struct inode *inode)
-{
- struct block_device *bdev = NULL;
-
- spin_lock(&bdev_lock);
- if (!sb_is_blkdev_sb(inode->i_sb))
- bdev = inode->i_bdev;
- inode->i_bdev = NULL;
- inode->i_mapping = &inode->i_data;
- spin_unlock(&bdev_lock);
-
- if (bdev)
- bdput(bdev);
-}
-
/**
* bd_may_claim - test whether a block device can be claimed
* @bdev: block device of interest
@@ -1049,7 +976,6 @@ static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
/**
* bd_prepare_to_claim - claim a block device
* @bdev: block device of interest
- * @whole: the whole device containing @bdev, may equal @bdev
* @holder: holder trying to claim @bdev
*
* Claim @bdev. This function fails if @bdev is already claimed by another
@@ -1059,9 +985,12 @@ static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
* RETURNS:
* 0 if @bdev can be claimed, -EBUSY otherwise.
*/
-int bd_prepare_to_claim(struct block_device *bdev, struct block_device *whole,
- void *holder)
+int bd_prepare_to_claim(struct block_device *bdev, void *holder)
{
+ struct block_device *whole = bdev_whole(bdev);
+
+ if (WARN_ON_ONCE(!holder))
+ return -EINVAL;
retry:
spin_lock(&bdev_lock);
/* if someone else claimed, fail */
@@ -1089,27 +1018,6 @@ retry:
}
EXPORT_SYMBOL_GPL(bd_prepare_to_claim); /* only for the loop driver */
-static struct gendisk *bdev_get_gendisk(struct block_device *bdev, int *partno)
-{
- struct gendisk *disk = get_gendisk(bdev->bd_dev, partno);
-
- if (!disk)
- return NULL;
- /*
- * Now that we hold gendisk reference we make sure bdev we looked up is
- * not stale. If it is, it means device got removed and created before
- * we looked up gendisk and we fail open in such case. Associating
- * unhashed bdev with newly created gendisk could lead to two bdevs
- * (and thus two independent caches) being associated with one device
- * which is bad.
- */
- if (inode_unhashed(bdev->bd_inode)) {
- put_disk_and_module(disk);
- return NULL;
- }
- return disk;
-}
-
static void bd_clear_claiming(struct block_device *whole, void *holder)
{
lockdep_assert_held(&bdev_lock);
@@ -1122,15 +1030,15 @@ static void bd_clear_claiming(struct block_device *whole, void *holder)
/**
* bd_finish_claiming - finish claiming of a block device
* @bdev: block device of interest
- * @whole: whole block device
* @holder: holder that has claimed @bdev
*
* Finish exclusive open of a block device. Mark the device as exlusively
* open by the holder and wake up all waiters for exclusive open to finish.
*/
-static void bd_finish_claiming(struct block_device *bdev,
- struct block_device *whole, void *holder)
+static void bd_finish_claiming(struct block_device *bdev, void *holder)
{
+ struct block_device *whole = bdev_whole(bdev);
+
spin_lock(&bdev_lock);
BUG_ON(!bd_may_claim(bdev, whole, holder));
/*
@@ -1155,11 +1063,10 @@ static void bd_finish_claiming(struct block_device *bdev,
* also used when exclusive open is not actually desired and we just needed
* to block other exclusive openers for a while.
*/
-void bd_abort_claiming(struct block_device *bdev, struct block_device *whole,
- void *holder)
+void bd_abort_claiming(struct block_device *bdev, void *holder)
{
spin_lock(&bdev_lock);
- bd_clear_claiming(whole, holder);
+ bd_clear_claiming(bdev_whole(bdev), holder);
spin_unlock(&bdev_lock);
}
EXPORT_SYMBOL(bd_abort_claiming);
@@ -1230,7 +1137,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
WARN_ON_ONCE(!bdev->bd_holder);
/* FIXME: remove the following once add_disk() handles errors */
- if (WARN_ON(!disk->slave_dir || !bdev->bd_part->holder_dir))
+ if (WARN_ON(!disk->slave_dir || !bdev->bd_holder_dir))
goto out_unlock;
holder = bd_find_holder_disk(bdev, disk);
@@ -1249,24 +1156,24 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
holder->disk = disk;
holder->refcnt = 1;
- ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
+ ret = add_symlink(disk->slave_dir, bdev_kobj(bdev));
if (ret)
goto out_free;
- ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
+ ret = add_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj);
if (ret)
goto out_del;
/*
* bdev could be deleted beneath us which would implicitly destroy
* the holder directory. Hold on to it.
*/
- kobject_get(bdev->bd_part->holder_dir);
+ kobject_get(bdev->bd_holder_dir);
list_add(&holder->list, &bdev->bd_holder_disks);
goto out_unlock;
out_del:
- del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
+ del_symlink(disk->slave_dir, bdev_kobj(bdev));
out_free:
kfree(holder);
out_unlock:
@@ -1294,10 +1201,9 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
holder = bd_find_holder_disk(bdev, disk);
if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) {
- del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
- del_symlink(bdev->bd_part->holder_dir,
- &disk_to_dev(disk)->kobj);
- kobject_put(bdev->bd_part->holder_dir);
+ del_symlink(disk->slave_dir, bdev_kobj(bdev));
+ del_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj);
+ kobject_put(bdev->bd_holder_dir);
list_del_init(&holder->list);
kfree(holder);
}
@@ -1307,77 +1213,6 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
#endif
-/**
- * check_disk_size_change - checks for disk size change and adjusts bdev size.
- * @disk: struct gendisk to check
- * @bdev: struct bdev to adjust.
- * @verbose: if %true log a message about a size change if there is any
- *
- * This routine checks to see if the bdev size does not match the disk size
- * and adjusts it if it differs. When shrinking the bdev size, its all caches
- * are freed.
- */
-static void check_disk_size_change(struct gendisk *disk,
- struct block_device *bdev, bool verbose)
-{
- loff_t disk_size, bdev_size;
-
- spin_lock(&bdev->bd_size_lock);
- disk_size = (loff_t)get_capacity(disk) << 9;
- bdev_size = i_size_read(bdev->bd_inode);
- if (disk_size != bdev_size) {
- if (verbose) {
- printk(KERN_INFO
- "%s: detected capacity change from %lld to %lld\n",
- disk->disk_name, bdev_size, disk_size);
- }
- i_size_write(bdev->bd_inode, disk_size);
- }
- spin_unlock(&bdev->bd_size_lock);
-
- if (bdev_size > disk_size) {
- if (__invalidate_device(bdev, false))
- pr_warn("VFS: busy inodes on resized disk %s\n",
- disk->disk_name);
- }
-}
-
-/**
- * revalidate_disk_size - checks for disk size change and adjusts bdev size.
- * @disk: struct gendisk to check
- * @verbose: if %true log a message about a size change if there is any
- *
- * This routine checks to see if the bdev size does not match the disk size
- * and adjusts it if it differs. When shrinking the bdev size, its all caches
- * are freed.
- */
-void revalidate_disk_size(struct gendisk *disk, bool verbose)
-{
- struct block_device *bdev;
-
- /*
- * Hidden disks don't have associated bdev so there's no point in
- * revalidating them.
- */
- if (disk->flags & GENHD_FL_HIDDEN)
- return;
-
- bdev = bdget_disk(disk, 0);
- if (bdev) {
- check_disk_size_change(disk, bdev, verbose);
- bdput(bdev);
- }
-}
-EXPORT_SYMBOL(revalidate_disk_size);
-
-void bd_set_nr_sectors(struct block_device *bdev, sector_t sectors)
-{
- spin_lock(&bdev->bd_size_lock);
- i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT);
- spin_unlock(&bdev->bd_size_lock);
-}
-EXPORT_SYMBOL(bd_set_nr_sectors);
-
static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
int bdev_disk_changed(struct block_device *bdev, bool invalidate)
@@ -1411,8 +1246,6 @@ rescan:
disk->fops->revalidate_disk(disk);
}
- check_disk_size_change(disk, bdev, !invalidate);
-
if (get_capacity(disk)) {
ret = blk_add_partitions(disk, bdev);
if (ret == -EAGAIN)
@@ -1439,71 +1272,19 @@ EXPORT_SYMBOL_GPL(bdev_disk_changed);
* mutex_lock(part->bd_mutex)
* mutex_lock_nested(whole->bd_mutex, 1)
*/
-
-static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder,
- int for_part)
+static int __blkdev_get(struct block_device *bdev, fmode_t mode)
{
- struct block_device *whole = NULL, *claiming = NULL;
- struct gendisk *disk;
- int ret;
- int partno;
- bool first_open = false, unblock_events = true, need_restart;
-
- restart:
- need_restart = false;
- ret = -ENXIO;
- disk = bdev_get_gendisk(bdev, &partno);
- if (!disk)
- goto out;
-
- if (partno) {
- whole = bdget_disk(disk, 0);
- if (!whole) {
- ret = -ENOMEM;
- goto out_put_disk;
- }
- }
-
- if (!for_part && (mode & FMODE_EXCL)) {
- WARN_ON_ONCE(!holder);
- if (whole)
- claiming = whole;
- else
- claiming = bdev;
- ret = bd_prepare_to_claim(bdev, claiming, holder);
- if (ret)
- goto out_put_whole;
- }
+ struct gendisk *disk = bdev->bd_disk;
+ int ret = 0;
- disk_block_events(disk);
- mutex_lock_nested(&bdev->bd_mutex, for_part);
if (!bdev->bd_openers) {
- first_open = true;
- bdev->bd_disk = disk;
- bdev->bd_contains = bdev;
- bdev->bd_partno = partno;
-
- if (!partno) {
- ret = -ENXIO;
- bdev->bd_part = disk_get_part(disk, partno);
- if (!bdev->bd_part)
- goto out_clear;
-
+ if (!bdev_is_partition(bdev)) {
ret = 0;
- if (disk->fops->open) {
+ if (disk->fops->open)
ret = disk->fops->open(bdev, mode);
- /*
- * If we lost a race with 'disk' being deleted,
- * try again. See md.c
- */
- if (ret == -ERESTARTSYS)
- need_restart = true;
- }
- if (!ret) {
- bd_set_nr_sectors(bdev, get_capacity(disk));
+ if (!ret)
set_init_blocksize(bdev);
- }
/*
* If the device is invalidated, rescan partition
@@ -1516,28 +1297,33 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder,
bdev_disk_changed(bdev, ret == -ENOMEDIUM);
if (ret)
- goto out_clear;
+ return ret;
} else {
- BUG_ON(for_part);
- ret = __blkdev_get(whole, mode, NULL, 1);
- if (ret)
- goto out_clear;
- bdev->bd_contains = bdgrab(whole);
- bdev->bd_part = disk_get_part(disk, partno);
+ struct block_device *whole = bdgrab(disk->part0);
+
+ mutex_lock_nested(&whole->bd_mutex, 1);
+ ret = __blkdev_get(whole, mode);
+ if (ret) {
+ mutex_unlock(&whole->bd_mutex);
+ bdput(whole);
+ return ret;
+ }
+ whole->bd_part_count++;
+ mutex_unlock(&whole->bd_mutex);
+
if (!(disk->flags & GENHD_FL_UP) ||
- !bdev->bd_part || !bdev->bd_part->nr_sects) {
- ret = -ENXIO;
- goto out_clear;
+ !bdev_nr_sectors(bdev)) {
+ __blkdev_put(whole, mode, 1);
+ bdput(whole);
+ return -ENXIO;
}
- bd_set_nr_sectors(bdev, bdev->bd_part->nr_sects);
set_init_blocksize(bdev);
}
if (bdev->bd_bdi == &noop_backing_dev_info)
bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info);
} else {
- if (bdev->bd_contains == bdev) {
- ret = 0;
+ if (!bdev_is_partition(bdev)) {
if (bdev->bd_disk->fops->open)
ret = bdev->bd_disk->fops->open(bdev, mode);
/* the same as first opener case, read comment there */
@@ -1545,101 +1331,145 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder,
(!ret || ret == -ENOMEDIUM))
bdev_disk_changed(bdev, ret == -ENOMEDIUM);
if (ret)
- goto out_unlock_bdev;
+ return ret;
}
}
bdev->bd_openers++;
- if (for_part)
- bdev->bd_part_count++;
- if (claiming)
- bd_finish_claiming(bdev, claiming, holder);
+ return 0;
+}
- /*
- * Block event polling for write claims if requested. Any write holder
- * makes the write_holder state stick until all are released. This is
- * good enough and tracking individual writeable reference is too
- * fragile given the way @mode is used in blkdev_get/put().
- */
- if (claiming && (mode & FMODE_WRITE) && !bdev->bd_write_holder &&
- (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) {
- bdev->bd_write_holder = true;
- unblock_events = false;
- }
- mutex_unlock(&bdev->bd_mutex);
+struct block_device *blkdev_get_no_open(dev_t dev)
+{
+ struct block_device *bdev;
+ struct gendisk *disk;
- if (unblock_events)
- disk_unblock_events(disk);
+ down_read(&bdev_lookup_sem);
+ bdev = bdget(dev);
+ if (!bdev) {
+ up_read(&bdev_lookup_sem);
+ blk_request_module(dev);
+ down_read(&bdev_lookup_sem);
+
+ bdev = bdget(dev);
+ if (!bdev)
+ goto unlock;
+ }
- /* only one opener holds refs to the module and disk */
- if (!first_open)
- put_disk_and_module(disk);
- if (whole)
- bdput(whole);
- return 0;
+ disk = bdev->bd_disk;
+ if (!kobject_get_unless_zero(&disk_to_dev(disk)->kobj))
+ goto bdput;
+ if ((disk->flags & (GENHD_FL_UP | GENHD_FL_HIDDEN)) != GENHD_FL_UP)
+ goto put_disk;
+ if (!try_module_get(bdev->bd_disk->fops->owner))
+ goto put_disk;
+ up_read(&bdev_lookup_sem);
+ return bdev;
+put_disk:
+ put_disk(disk);
+bdput:
+ bdput(bdev);
+unlock:
+ up_read(&bdev_lookup_sem);
+ return NULL;
+}
- out_clear:
- disk_put_part(bdev->bd_part);
- bdev->bd_disk = NULL;
- bdev->bd_part = NULL;
- if (bdev != bdev->bd_contains)
- __blkdev_put(bdev->bd_contains, mode, 1);
- bdev->bd_contains = NULL;
- out_unlock_bdev:
- if (claiming)
- bd_abort_claiming(bdev, claiming, holder);
- mutex_unlock(&bdev->bd_mutex);
- disk_unblock_events(disk);
- out_put_whole:
- if (whole)
- bdput(whole);
- out_put_disk:
- put_disk_and_module(disk);
- if (need_restart)
- goto restart;
- out:
- return ret;
+void blkdev_put_no_open(struct block_device *bdev)
+{
+ module_put(bdev->bd_disk->fops->owner);
+ put_disk(bdev->bd_disk);
+ bdput(bdev);
}
/**
- * blkdev_get - open a block device
- * @bdev: block_device to open
+ * blkdev_get_by_dev - open a block device by device number
+ * @dev: device number of block device to open
* @mode: FMODE_* mask
* @holder: exclusive holder identifier
*
- * Open @bdev with @mode. If @mode includes %FMODE_EXCL, @bdev is
- * open with exclusive access. Specifying %FMODE_EXCL with %NULL
- * @holder is invalid. Exclusive opens may nest for the same @holder.
+ * Open the block device described by device number @dev. If @mode includes
+ * %FMODE_EXCL, the block device is opened with exclusive access. Specifying
+ * %FMODE_EXCL with a %NULL @holder is invalid. Exclusive opens may nest for
+ * the same @holder.
*
- * On success, the reference count of @bdev is unchanged. On failure,
- * @bdev is put.
+ * Use this interface ONLY if you really do not have anything better - i.e. when
+ * you are behind a truly sucky interface and all you are given is a device
+ * number. Everything else should use blkdev_get_by_path().
*
* CONTEXT:
* Might sleep.
*
* RETURNS:
- * 0 on success, -errno on failure.
+ * Reference to the block_device on success, ERR_PTR(-errno) on failure.
*/
-static int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
+struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
{
- int ret, perm = 0;
+ bool unblock_events = true;
+ struct block_device *bdev;
+ struct gendisk *disk;
+ int ret;
- if (mode & FMODE_READ)
- perm |= MAY_READ;
- if (mode & FMODE_WRITE)
- perm |= MAY_WRITE;
- ret = devcgroup_inode_permission(bdev->bd_inode, perm);
+ ret = devcgroup_check_permission(DEVCG_DEV_BLOCK,
+ MAJOR(dev), MINOR(dev),
+ ((mode & FMODE_READ) ? DEVCG_ACC_READ : 0) |
+ ((mode & FMODE_WRITE) ? DEVCG_ACC_WRITE : 0));
if (ret)
- goto bdput;
+ return ERR_PTR(ret);
+
+ /*
+ * If we lost a race with 'disk' being deleted, try again. See md.c.
+ */
+retry:
+ bdev = blkdev_get_no_open(dev);
+ if (!bdev)
+ return ERR_PTR(-ENXIO);
+ disk = bdev->bd_disk;
+
+ if (mode & FMODE_EXCL) {
+ ret = bd_prepare_to_claim(bdev, holder);
+ if (ret)
+ goto put_blkdev;
+ }
+
+ disk_block_events(disk);
- ret =__blkdev_get(bdev, mode, holder, 0);
+ mutex_lock(&bdev->bd_mutex);
+ ret =__blkdev_get(bdev, mode);
if (ret)
- goto bdput;
- return 0;
+ goto abort_claiming;
+ if (mode & FMODE_EXCL) {
+ bd_finish_claiming(bdev, holder);
-bdput:
- bdput(bdev);
- return ret;
+ /*
+ * Block event polling for write claims if requested. Any write
+ * holder makes the write_holder state stick until all are
+ * released. This is good enough and tracking individual
+ * writeable reference is too fragile given the way @mode is
+ * used in blkdev_get/put().
+ */
+ if ((mode & FMODE_WRITE) && !bdev->bd_write_holder &&
+ (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) {
+ bdev->bd_write_holder = true;
+ unblock_events = false;
+ }
+ }
+ mutex_unlock(&bdev->bd_mutex);
+
+ if (unblock_events)
+ disk_unblock_events(disk);
+ return bdev;
+
+abort_claiming:
+ if (mode & FMODE_EXCL)
+ bd_abort_claiming(bdev, holder);
+ mutex_unlock(&bdev->bd_mutex);
+ disk_unblock_events(disk);
+put_blkdev:
+ blkdev_put_no_open(bdev);
+ if (ret == -ERESTARTSYS)
+ goto retry;
+ return ERR_PTR(ret);
}
+EXPORT_SYMBOL(blkdev_get_by_dev);
/**
* blkdev_get_by_path - open a block device by name
@@ -1647,32 +1477,30 @@ bdput:
* @mode: FMODE_* mask
* @holder: exclusive holder identifier
*
- * Open the blockdevice described by the device file at @path. @mode
- * and @holder are identical to blkdev_get().
- *
- * On success, the returned block_device has reference count of one.
+ * Open the block device described by the device file at @path. If @mode
+ * includes %FMODE_EXCL, the block device is opened with exclusive access.
+ * Specifying %FMODE_EXCL with a %NULL @holder is invalid. Exclusive opens may
+ * nest for the same @holder.
*
* CONTEXT:
* Might sleep.
*
* RETURNS:
- * Pointer to block_device on success, ERR_PTR(-errno) on failure.
+ * Reference to the block_device on success, ERR_PTR(-errno) on failure.
*/
struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
void *holder)
{
struct block_device *bdev;
- int err;
-
- bdev = lookup_bdev(path);
- if (IS_ERR(bdev))
- return bdev;
+ dev_t dev;
+ int error;
- err = blkdev_get(bdev, mode, holder);
- if (err)
- return ERR_PTR(err);
+ error = lookup_bdev(path, &dev);
+ if (error)
+ return ERR_PTR(error);
- if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) {
+ bdev = blkdev_get_by_dev(dev, mode, holder);
+ if (!IS_ERR(bdev) && (mode & FMODE_WRITE) && bdev_read_only(bdev)) {
blkdev_put(bdev, mode);
return ERR_PTR(-EACCES);
}
@@ -1681,45 +1509,6 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
}
EXPORT_SYMBOL(blkdev_get_by_path);
-/**
- * blkdev_get_by_dev - open a block device by device number
- * @dev: device number of block device to open
- * @mode: FMODE_* mask
- * @holder: exclusive holder identifier
- *
- * Open the blockdevice described by device number @dev. @mode and
- * @holder are identical to blkdev_get().
- *
- * Use it ONLY if you really do not have anything better - i.e. when
- * you are behind a truly sucky interface and all you are given is a
- * device number. _Never_ to be used for internal purposes. If you
- * ever need it - reconsider your API.
- *
- * On success, the returned block_device has reference count of one.
- *
- * CONTEXT:
- * Might sleep.
- *
- * RETURNS:
- * Pointer to block_device on success, ERR_PTR(-errno) on failure.
- */
-struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
-{
- struct block_device *bdev;
- int err;
-
- bdev = bdget(dev);
- if (!bdev)
- return ERR_PTR(-ENOMEM);
-
- err = blkdev_get(bdev, mode, holder);
- if (err)
- return ERR_PTR(err);
-
- return bdev;
-}
-EXPORT_SYMBOL(blkdev_get_by_dev);
-
static int blkdev_open(struct inode * inode, struct file * filp)
{
struct block_device *bdev;
@@ -1741,14 +1530,12 @@ static int blkdev_open(struct inode * inode, struct file * filp)
if ((filp->f_flags & O_ACCMODE) == 3)
filp->f_mode |= FMODE_WRITE_IOCTL;
- bdev = bd_acquire(inode);
- if (bdev == NULL)
- return -ENOMEM;
-
+ bdev = blkdev_get_by_dev(inode->i_rdev, filp->f_mode, filp);
+ if (IS_ERR(bdev))
+ return PTR_ERR(bdev);
filp->f_mapping = bdev->bd_inode->i_mapping;
filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping);
-
- return blkdev_get(bdev, filp->f_mode, filp);
+ return 0;
}
static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
@@ -1774,34 +1561,28 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
WARN_ON_ONCE(bdev->bd_holders);
sync_blockdev(bdev);
kill_bdev(bdev);
-
bdev_write_inode(bdev);
+ if (bdev_is_partition(bdev))
+ victim = bdev_whole(bdev);
}
- if (bdev->bd_contains == bdev) {
- if (disk->fops->release)
- disk->fops->release(disk, mode);
- }
- if (!bdev->bd_openers) {
- disk_put_part(bdev->bd_part);
- bdev->bd_part = NULL;
- bdev->bd_disk = NULL;
- if (bdev != bdev->bd_contains)
- victim = bdev->bd_contains;
- bdev->bd_contains = NULL;
-
- put_disk_and_module(disk);
- }
+
+ if (!bdev_is_partition(bdev) && disk->fops->release)
+ disk->fops->release(disk, mode);
mutex_unlock(&bdev->bd_mutex);
- bdput(bdev);
- if (victim)
+ if (victim) {
__blkdev_put(victim, mode, 1);
+ bdput(victim);
+ }
}
void blkdev_put(struct block_device *bdev, fmode_t mode)
{
+ struct gendisk *disk = bdev->bd_disk;
+
mutex_lock(&bdev->bd_mutex);
if (mode & FMODE_EXCL) {
+ struct block_device *whole = bdev_whole(bdev);
bool bdev_free;
/*
@@ -1812,13 +1593,12 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
spin_lock(&bdev_lock);
WARN_ON_ONCE(--bdev->bd_holders < 0);
- WARN_ON_ONCE(--bdev->bd_contains->bd_holders < 0);
+ WARN_ON_ONCE(--whole->bd_holders < 0);
- /* bd_contains might point to self, check in a separate step */
if ((bdev_free = !bdev->bd_holders))
bdev->bd_holder = NULL;
- if (!bdev->bd_contains->bd_holders)
- bdev->bd_contains->bd_holder = NULL;
+ if (!whole->bd_holders)
+ whole->bd_holder = NULL;
spin_unlock(&bdev_lock);
@@ -1827,7 +1607,7 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
* unblock evpoll if it was a write holder.
*/
if (bdev_free && bdev->bd_write_holder) {
- disk_unblock_events(bdev->bd_disk);
+ disk_unblock_events(disk);
bdev->bd_write_holder = false;
}
}
@@ -1837,11 +1617,11 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
* event. This is to ensure detection of media removal commanded
* from userland - e.g. eject(1).
*/
- disk_flush_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE);
-
+ disk_flush_events(disk, DISK_EVENT_MEDIA_CHANGE);
mutex_unlock(&bdev->bd_mutex);
__blkdev_put(bdev, mode, 0);
+ blkdev_put_no_open(bdev);
}
EXPORT_SYMBOL(blkdev_put);
@@ -2054,37 +1834,32 @@ const struct file_operations def_blk_fops = {
* namespace if possible and return it. Return ERR_PTR(error)
* otherwise.
*/
-struct block_device *lookup_bdev(const char *pathname)
+int lookup_bdev(const char *pathname, dev_t *dev)
{
- struct block_device *bdev;
struct inode *inode;
struct path path;
int error;
if (!pathname || !*pathname)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
error = kern_path(pathname, LOOKUP_FOLLOW, &path);
if (error)
- return ERR_PTR(error);
+ return error;
inode = d_backing_inode(path.dentry);
error = -ENOTBLK;
if (!S_ISBLK(inode->i_mode))
- goto fail;
+ goto out_path_put;
error = -EACCES;
if (!may_open_dev(&path))
- goto fail;
- error = -ENOMEM;
- bdev = bd_acquire(inode);
- if (!bdev)
- goto fail;
-out:
+ goto out_path_put;
+
+ *dev = inode->i_rdev;
+ error = 0;
+out_path_put:
path_put(&path);
- return bdev;
-fail:
- bdev = ERR_PTR(error);
- goto out;
+ return error;
}
EXPORT_SYMBOL(lookup_bdev);