/*
* fs/dax.c - Direct Access filesystem code
* Copyright (c) 2013-2014 Intel Corporation
* Author: Matthew Wilcox <matthew.r.wilcox@intel.com>
* Author: Ross Zwisler <ross.zwisler@linux.intel.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#include <linux/atomic.h>
#include <linux/blkdev.h>
#include <linux/buffer_head.h>
#include <linux/dax.h>
#include <linux/fs.h>
#include <linux/genhd.h>
#include <linux/highmem.h>
#include <linux/memcontrol.h>
#include <linux/mm.h>
#include <linux/mutex.h>
#include <linux/pagevec.h>
#include <linux/pmem.h>
#include <linux/sched.h>
#include <linux/uio.h>
#include <linux/vmstat.h>
#include <linux/pfn_t.h>
#include <linux/sizes.h>
static long dax_map_atomic(struct block_device *bdev, struct blk_dax_ctl *dax)
{
struct request_queue *q = bdev->bd_queue;
long rc = -EIO;
dax->addr = (void __pmem *) ERR_PTR(-EIO);
if (blk_queue_enter(q, true) != 0)
return rc;
rc = bdev_direct_access(bdev, dax);
if (rc < 0) {
dax->addr = (void __pmem *) ERR_PTR(rc);
blk_queue_exit(q);
return rc;
}
return rc;
}
static void dax_unmap_atomic(struct block_device *bdev,
const struct blk_dax_ctl *dax)
{
if (IS_ERR(dax->addr))
return;
blk_queue_exit(bdev->bd_queue);
}
/*
* dax_clear_blocks() is called from within transaction context from XFS,
* and hence this means the stack from this point must follow GFP_NOFS
* semantics for all operations.
*/
int dax_clear_blocks(struct inode *inode, sector_t block, long _size)
{
struct block_device *bdev = inode->i_sb->s_bdev;
struct blk_dax_ctl dax = {
.sector = block << (inode->i_blkbits - 9),
.size = _size,
};
might_sleep();
do {
long count, sz;
count = dax_map_atomic(bdev, &dax);
if (count < 0)
return count;
sz = min_t(long, count, SZ_128K);
clear_pmem(dax.addr, sz);
dax.size -= sz;
dax.sector += sz / 512;
dax_unmap_atomic(bdev, &dax);
cond_resched();
} while (dax.size);
wmb_pmem();
return 0;
}
EXPORT_SYMBOL_GPL(dax_clear_blocks);
/* the clear_pmem() calls are ordered by a wmb_pmem() in the caller */
static void dax_new_buf(void __pmem *addr, unsigned size, unsigned first,
loff_t pos, loff_t end)
{
loff_t final = end - pos + first; /* The final byte of the buffer */
if (first > 0)
clear_pmem(addr, first);
if (final < size)
clear_pmem(addr + final, size - final);
}
static bool buffer_written(struct buffer_head *bh)
{
return buffer_mapped(bh) && !buffer_unwritten(bh);
}
/*
* When ext4 encounters a hole, it returns without modifying the buffer_head
* which means that we can't trust b_size. To cope with this, we set b_state
* to 0 before calling get_block and, if any bit is set, we know we can trust
* b_size. Unfortunate, really, since ext4 knows precisely how long a hole is
* and would save us time calling get_block repeatedly.
*/
static bool buffer_size_valid(struct buffer_head *bh)
{
return bh->b_state != 0;
}
static sector_t to_sector(const struct buffer_head *bh,
const struct inode *