summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/removed/sysfs-bus-nfit17
-rw-r--r--Documentation/ABI/testing/sysfs-bus-nfit19
-rw-r--r--arch/x86/Kconfig.debug3
-rw-r--r--arch/x86/include/asm/mcsafe_test.h75
-rw-r--r--arch/x86/lib/memcpy_64.S10
-rw-r--r--drivers/acpi/nfit/core.c11
-rw-r--r--drivers/dax/super.c33
-rw-r--r--drivers/md/dm-linear.c16
-rw-r--r--drivers/md/dm-log-writes.c15
-rw-r--r--drivers/md/dm-stripe.c21
-rw-r--r--drivers/md/dm.c25
-rw-r--r--drivers/nvdimm/bus.c19
-rw-r--r--drivers/nvdimm/e820.c41
-rw-r--r--drivers/nvdimm/pfn_devs.c2
-rw-r--r--drivers/nvdimm/pmem.c46
-rw-r--r--drivers/nvdimm/region_devs.c3
-rw-r--r--drivers/s390/block/dcssblk.c7
-rw-r--r--fs/Kconfig1
-rw-r--r--fs/dax.c136
-rw-r--r--fs/xfs/xfs_file.c72
-rw-r--r--fs/xfs/xfs_inode.h16
-rw-r--r--fs/xfs/xfs_ioctl.c8
-rw-r--r--fs/xfs/xfs_iops.c16
-rw-r--r--fs/xfs/xfs_pnfs.c15
-rw-r--r--fs/xfs/xfs_pnfs.h5
-rw-r--r--include/linux/dax.h12
-rw-r--r--include/linux/device-mapper.h5
-rw-r--r--include/linux/memremap.h36
-rw-r--r--include/linux/mm.h71
-rw-r--r--include/linux/uio.h2
-rw-r--r--kernel/Makefile3
-rw-r--r--kernel/iomem.c167
-rw-r--r--kernel/memremap.c210
-rw-r--r--kernel/resource.c1
-rw-r--r--lib/Kconfig3
-rw-r--r--mm/Kconfig5
-rw-r--r--mm/gup.c36
-rw-r--r--mm/hmm.c13
-rw-r--r--mm/swap.c3
-rw-r--r--tools/testing/nvdimm/test/nfit.c104
40 files changed, 924 insertions, 379 deletions
diff --git a/Documentation/ABI/removed/sysfs-bus-nfit b/Documentation/ABI/removed/sysfs-bus-nfit
new file mode 100644
index 000000000000..ae8c1ca53828
--- /dev/null
+++ b/Documentation/ABI/removed/sysfs-bus-nfit
@@ -0,0 +1,17 @@
+What: /sys/bus/nd/devices/regionX/nfit/ecc_unit_size
+Date: Aug, 2017
+KernelVersion: v4.14 (Removed v4.18)
+Contact: linux-nvdimm@lists.01.org
+Description:
+ (RO) Size of a write request to a DIMM that will not incur a
+ read-modify-write cycle at the memory controller.
+
+ When the nfit driver initializes it runs an ARS (Address Range
+ Scrub) operation across every pmem range. Part of that process
+ involves determining the ARS capabilities of a given address
+ range. One of the capabilities that is reported is the 'Clear
+ Uncorrectable Error Range Length Unit Size' (see: ACPI 6.2
+ section 9.20.7.4 Function Index 1 - Query ARS Capabilities).
+ This property indicates the boundary at which the NVDIMM may
+ need to perform read-modify-write cycles to maintain ECC (Error
+ Correcting Code) blocks.
diff --git a/Documentation/ABI/testing/sysfs-bus-nfit b/Documentation/ABI/testing/sysfs-bus-nfit
index 619eb8ca0f99..a1cb44dcb908 100644
--- a/Documentation/ABI/testing/sysfs-bus-nfit
+++ b/Documentation/ABI/testing/sysfs-bus-nfit
@@ -212,22 +212,3 @@ Description:
range. Used by NVDIMM Region Mapping Structure to uniquely refer
to this structure. Value of 0 is reserved and not used as an
index.
-
-
-What: /sys/bus/nd/devices/regionX/nfit/ecc_unit_size
-Date: Aug, 2017
-KernelVersion: v4.14
-Contact: linux-nvdimm@lists.01.org
-Description:
- (RO) Size of a write request to a DIMM that will not incur a
- read-modify-write cycle at the memory controller.
-
- When the nfit driver initializes it runs an ARS (Address Range
- Scrub) operation across every pmem range. Part of that process
- involves determining the ARS capabilities of a given address
- range. One of the capabilities that is reported is the 'Clear
- Uncorrectable Error Range Length Unit Size' (see: ACPI 6.2
- section 9.20.7.4 Function Index 1 - Query ARS Capabilities).
- This property indicates the boundary at which the NVDIMM may
- need to perform read-modify-write cycles to maintain ECC (Error
- Correcting Code) blocks.
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 192e4d2f9efc..c6dd1d980081 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -72,6 +72,9 @@ config EARLY_PRINTK_USB_XDBC
You should normally say N here, unless you want to debug early
crashes or need a very simple printk logging facility.
+config MCSAFE_TEST
+ def_bool n
+
config X86_PTDUMP_CORE
def_bool n
diff --git a/arch/x86/include/asm/mcsafe_test.h b/arch/x86/include/asm/mcsafe_test.h
new file mode 100644
index 000000000000..eb59804b6201
--- /dev/null
+++ b/arch/x86/include/asm/mcsafe_test.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _MCSAFE_TEST_H_
+#define _MCSAFE_TEST_H_
+
+#ifndef __ASSEMBLY__
+#ifdef CONFIG_MCSAFE_TEST
+extern unsigned long mcsafe_test_src;
+extern unsigned long mcsafe_test_dst;
+
+static inline void mcsafe_inject_src(void *addr)
+{
+ if (addr)
+ mcsafe_test_src = (unsigned long) addr;
+ else
+ mcsafe_test_src = ~0UL;
+}
+
+static inline void mcsafe_inject_dst(void *addr)
+{
+ if (addr)
+ mcsafe_test_dst = (unsigned long) addr;
+ else
+ mcsafe_test_dst = ~0UL;
+}
+#else /* CONFIG_MCSAFE_TEST */
+static inline void mcsafe_inject_src(void *addr)
+{
+}
+
+static inline void mcsafe_inject_dst(void *addr)
+{
+}
+#endif /* CONFIG_MCSAFE_TEST */
+
+#else /* __ASSEMBLY__ */
+#include <asm/export.h>
+
+#ifdef CONFIG_MCSAFE_TEST
+.macro MCSAFE_TEST_CTL
+ .pushsection .data
+ .align 8
+ .globl mcsafe_test_src
+ mcsafe_test_src:
+ .quad 0
+ EXPORT_SYMBOL_GPL(mcsafe_test_src)
+ .globl mcsafe_test_dst
+ mcsafe_test_dst:
+ .quad 0
+ EXPORT_SYMBOL_GPL(mcsafe_test_dst)
+ .popsection
+.endm
+
+.macro MCSAFE_TEST_SRC reg count target
+ leaq \count(\reg), %r9
+ cmp mcsafe_test_src, %r9
+ ja \target
+.endm
+
+.macro MCSAFE_TEST_DST reg count target
+ leaq \count(\reg), %r9
+ cmp mcsafe_test_dst, %r9
+ ja \target
+.endm
+#else
+.macro MCSAFE_TEST_CTL
+.endm
+
+.macro MCSAFE_TEST_SRC reg count target
+.endm
+
+.macro MCSAFE_TEST_DST reg count target
+.endm
+#endif /* CONFIG_MCSAFE_TEST */
+#endif /* __ASSEMBLY__ */
+#endif /* _MCSAFE_TEST_H_ */
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index c3b527a9f95d..298ef1479240 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -3,6 +3,7 @@
#include <linux/linkage.h>
#include <asm/errno.h>
#include <asm/cpufeatures.h>
+#include <asm/mcsafe_test.h>
#include <asm/alternative-asm.h>
#include <asm/export.h>
@@ -183,6 +184,9 @@ ENTRY(memcpy_orig)
ENDPROC(memcpy_orig)
#ifndef CONFIG_UML
+
+MCSAFE_TEST_CTL
+
/*
* __memcpy_mcsafe - memory copy with machine check exception handling
* Note that we only catch machine checks when reading the source addresses.
@@ -206,6 +210,8 @@ ENTRY(__memcpy_mcsafe)
subl %ecx, %edx
.L_read_leading_bytes:
movb (%rsi), %al
+ MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes
+ MCSAFE_TEST_DST %rdi 1 .E_leading_bytes
.L_write_leading_bytes:
movb %al, (%rdi)
incq %rsi
@@ -221,6 +227,8 @@ ENTRY(__memcpy_mcsafe)
.L_read_words:
movq (%rsi), %r8
+ MCSAFE_TEST_SRC %rsi 8 .E_read_words
+ MCSAFE_TEST_DST %rdi 8 .E_write_words
.L_write_words:
movq %r8, (%rdi)
addq $8, %rsi
@@ -237,6 +245,8 @@ ENTRY(__memcpy_mcsafe)
movl %edx, %ecx
.L_read_trailing_bytes:
movb (%rsi), %al
+ MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes
+ MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes
.L_write_trailing_bytes:
movb %al, (%rdi)
incq %rsi
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index e2235ed3e4be..b87252bf4571 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -1978,19 +1978,8 @@ static ssize_t range_index_show(struct device *dev,
}
static DEVICE_ATTR_RO(range_index);
-static ssize_t ecc_unit_size_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct nd_region *nd_region = to_nd_region(dev);
- struct nfit_spa *nfit_spa = nd_region_provider_data(nd_region);
-
- return sprintf(buf, "%d\n", nfit_spa->clear_err_unit);
-}
-static DEVICE_ATTR_RO(ecc_unit_size);
-
static struct attribute *acpi_nfit_region_attributes[] = {
&dev_attr_range_index.attr,
- &dev_attr_ecc_unit_size.attr,
NULL,
};
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index 1d7bd96511f0..903d9c473749 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -85,6 +85,7 @@ EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev);
bool __bdev_dax_supported(struct block_device *bdev, int blocksize)
{
struct dax_device *dax_dev;
+ bool dax_enabled = false;
pgoff_t pgoff;
int err, id;
void *kaddr;
@@ -134,14 +135,21 @@ bool __bdev_dax_supported(struct block_device *bdev, int blocksize)
* on being able to do (page_address(pfn_to_page())).
*/
WARN_ON(IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API));
+ dax_enabled = true;
} else if (pfn_t_devmap(pfn)) {
- /* pass */;
- } else {
+ struct dev_pagemap *pgmap;
+
+ pgmap = get_dev_pagemap(pfn_t_to_pfn(pfn), NULL);
+ if (pgmap && pgmap->type == MEMORY_DEVICE_FS_DAX)
+ dax_enabled = true;
+ put_dev_pagemap(pgmap);
+ }
+
+ if (!dax_enabled) {
pr_debug("%s: error: dax support not enabled\n",
bdevname(bdev, buf));
return false;
}
-
return true;
}
EXPORT_SYMBOL_GPL(__bdev_dax_supported);
@@ -182,8 +190,7 @@ static ssize_t write_cache_show(struct device *dev,
if (!dax_dev)
return -ENXIO;
- rc = sprintf(buf, "%d\n", !!test_bit(DAXDEV_WRITE_CACHE,
- &dax_dev->flags));
+ rc = sprintf(buf, "%d\n", !!dax_write_cache_enabled(dax_dev));
put_dax(dax_dev);
return rc;
}
@@ -201,10 +208,8 @@ static ssize_t write_cache_store(struct device *dev,
if (rc)
len = rc;
- else if (write_cache)
- set_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags);
else
- clear_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags);
+ dax_write_cache(dax_dev, write_cache);
put_dax(dax_dev);
return len;
@@ -282,11 +287,21 @@ size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
}
EXPORT_SYMBOL_GPL(dax_copy_from_iter);
+size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
+ size_t bytes, struct iov_iter *i)
+{
+ if (!dax_alive(dax_dev))
+ return 0;
+
+ return dax_dev->ops->copy_to_iter(dax_dev, pgoff, addr, bytes, i);
+}
+EXPORT_SYMBOL_GPL(dax_copy_to_iter);
+
#ifdef CONFIG_ARCH_HAS_PMEM_API
void arch_wb_cache_pmem(void *addr, size_t size);
void dax_flush(struct dax_device *dax_dev, void *addr, size_t size)
{
- if (unlikely(!test_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags)))
+ if (unlikely(!dax_write_cache_enabled(dax_dev)))
return;
arch_wb_cache_pmem(addr, size);
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 775c06d953b7..d10964d41fd7 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -185,9 +185,24 @@ static size_t linear_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff,
return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i);
}
+static size_t linear_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff,
+ void *addr, size_t bytes, struct iov_iter *i)
+{
+ struct linear_c *lc = ti->private;
+ struct block_device *bdev = lc->dev->bdev;
+ struct dax_device *dax_dev = lc->dev->dax_dev;
+ sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
+
+ dev_sector = linear_map_sector(ti, sector);
+ if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), &pgoff))
+ return 0;
+ return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i);
+}
+
#else
#define linear_dax_direct_access NULL
#define linear_dax_copy_from_iter NULL
+#define linear_dax_copy_to_iter NULL
#endif
static struct target_type linear_target = {
@@ -204,6 +219,7 @@ static struct target_type linear_target = {
.iterate_devices = linear_iterate_devices,
.direct_access = linear_dax_direct_access,
.dax_copy_from_iter = linear_dax_copy_from_iter,
+ .dax_copy_to_iter = linear_dax_copy_to_iter,
};
int __init dm_linear_init(void)
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index c90c7c08a77f..9ea2b0291f20 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -962,9 +962,23 @@ static size_t log_writes_dax_copy_from_iter(struct dm_target *ti,
dax_copy:
return dax_copy_from_iter(lc->dev->dax_dev, pgoff, addr, bytes, i);
}
+
+static size_t log_writes_dax_copy_to_iter(struct dm_target *ti,
+ pgoff_t pgoff, void *addr, size_t bytes,
+ struct iov_iter *i)
+{
+ struct log_writes_c *lc = ti->private;
+ sector_t sector = pgoff * PAGE_SECTORS;
+
+ if (bdev_dax_pgoff(lc->dev->bdev, sector, ALIGN(bytes, PAGE_SIZE), &pgoff))
+ return 0;
+ return dax_copy_to_iter(lc->dev->dax_dev, pgoff, addr, bytes, i);
+}
+
#else
#define log_writes_dax_direct_access NULL
#define log_writes_dax_copy_from_iter NULL
+#define log_writes_dax_copy_to_iter NULL
#endif
static struct target_type log_writes_target = {
@@ -982,6 +996,7 @@ static struct target_type log_writes_target = {
.io_hints = log_writes_io_hints,
.direct_access = log_writes_dax_direct_access,
.dax_copy_from_iter = log_writes_dax_copy_from_iter,
+ .dax_copy_to_iter = log_writes_dax_copy_to_iter,
};
static int __init dm_log_writes_init(void)
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index fe7fb9b1aec3..8547d7594338 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -354,9 +354,29 @@ static size_t stripe_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff,
return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i);
}
+static size_t stripe_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff,
+ void *addr, size_t bytes, struct iov_iter *i)
+{
+ sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
+ struct stripe_c *sc = ti->private;
+ struct dax_device *dax_dev;
+ struct block_device *bdev;
+ uint32_t stripe;
+
+ stripe_map_sector(sc, sector, &stripe, &dev_sector);
+ dev_sector += sc->stripe[stripe].physical_start;
+ dax_dev = sc->stripe[stripe].dev->dax_dev;
+ bdev = sc->stripe[stripe].dev->bdev;
+
+ if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), &pgoff))
+ return 0;
+ return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i);
+}
+
#else
#define stripe_dax_direct_access NULL
#define stripe_dax_copy_from_iter NULL
+#define stripe_dax_copy_to_iter NULL
#endif
/*
@@ -478,6 +498,7 @@ static struct target_type stripe_target = {
.io_hints = stripe_io_hints,
.direct_access = stripe_dax_direct_access,
.dax_copy_from_iter = stripe_dax_copy_from_iter,
+ .dax_copy_to_iter = stripe_dax_copy_to_iter,
};
int __init dm_stripe_init(void)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 20a8d63754bf..e65429a29c06 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1089,6 +1089,30 @@ static size_t dm_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
return ret;
}
+static size_t dm_dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff,
+ void *addr, size_t bytes, struct iov_iter *i)
+{
+ struct mapped_device *md = dax_get_private(dax_dev);
+ sector_t sector = pgoff * PAGE_SECTORS;
+ struct dm_target *ti;
+ long ret = 0;
+ int srcu_idx;
+
+ ti = dm_dax_get_live_target(md, sector, &srcu_idx);
+
+ if (!ti)
+ goto out;
+ if (!ti->type->dax_copy_to_iter) {
+ ret = copy_to_iter(addr, bytes, i);
+ goto out;
+ }
+ ret = ti->type->dax_copy_to_iter(ti, pgoff, addr, bytes, i);
+ out:
+ dm_put_live_table(md, srcu_idx);
+
+ return ret;
+}
+
/*
* A target may call dm_accept_partial_bio only from the map routine. It is
* allowed for all bio types except REQ_PREFLUSH and REQ_OP_ZONE_RESET.
@@ -3137,6 +3161,7 @@ static const struct block_device_operations dm_blk_dops = {
static const struct dax_operations dm_dax_ops = {
.direct_access = dm_dax_direct_access,
.copy_from_iter = dm_dax_copy_from_iter,
+ .copy_to_iter = dm_dax_copy_to_iter,
};
/*
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index a64023690cad..27902a8799b1 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -100,6 +100,9 @@ static int nvdimm_bus_probe(struct device *dev)
if (!try_module_get(provider))
return -ENXIO;
+ dev_dbg(&nvdimm_bus->dev, "START: %s.probe(%s)\n",
+ dev->driver->name, dev_name(dev));
+
nvdimm_bus_probe_start(nvdimm_bus);
rc = nd_drv->probe(dev);
if (rc == 0)
@@ -108,7 +111,7 @@ static int nvdimm_bus_probe(struct device *dev)
nd_region_disable(nvdimm_bus, dev);
nvdimm_bus_probe_end(nvdimm_bus);
- dev_dbg(&nvdimm_bus->dev, "%s.probe(%s) = %d\n", dev->driver->name,
+ dev_dbg(&nvdimm_bus->dev, "END: %s.probe(%s) = %d\n", dev->driver->name,
dev_name(dev), rc);
if (rc != 0)
@@ -566,14 +569,18 @@ int nvdimm_revalidate_disk(struct gendisk *disk)
{
struct device *dev = disk_to_dev(disk)->parent;
struct nd_region *nd_region = to_nd_region(dev->parent);
- const char *pol = nd_region->ro ? "only" : "write";
+ int disk_ro = get_disk_ro(disk);
- if (nd_region->ro == get_disk_ro(disk))
+ /*
+ * Upgrade to read-only if the region is read-only preserve as
+ * read-only if the disk is already read-only.
+ */
+ if (disk_ro || nd_region->ro == disk_ro)
return 0;
- dev_info(dev, "%s read-%s, marking %s read-%s\n",
- dev_name(&nd_region->dev), pol, disk->disk_name, pol);
- set_disk_ro(disk, nd_region->ro);
+ dev_info(dev, "%s read-only, marking %s read-only\n",
+ dev_name(&nd_region->dev), disk->disk_name);
+ set_disk_ro(disk, 1);
return 0;
diff --git a/drivers/nvdimm/e820.c b/drivers/nvdimm/e820.c
index 6f9a6ffd7cde..521eaf53a52a 100644
--- a/drivers/nvdimm/e820.c
+++ b/drivers/nvdimm/e820.c
@@ -38,12 +38,27 @@ static int e820_range_to_nid(resource_size_t addr)
}
#endif
+static int e820_register_one(struct resource *res, void *data)
+{
+ struct nd_region_desc ndr_desc;
+ struct nvdimm_bus *nvdimm_bus = data;
+
+ memset(&ndr_desc, 0, sizeof(ndr_desc));
+ ndr_desc.res = res;
+ ndr_desc.attr_groups = e820_pmem_region_attribute_groups;
+ ndr_desc.numa_node = e820_range_to_nid(res->start);
+ set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
+ if (!nvdimm_pmem_region_create(nvdimm_bus, &ndr_desc))
+ return -ENXIO;
+ return 0;
+}
+
static int e820_pmem_probe(struct platform_device *pdev)
{
static struct nvdimm_bus_descriptor nd_desc;
struct device *dev = &pdev->dev;
struct nvdimm_bus *nvdimm_bus;
- struct resource *p;
+ int rc = -ENXIO;
nd_desc.attr_groups = e820_pmem_attribute_groups;
nd_desc.provider_name = "e820";
@@ -53,27 +68,15 @@ static int e820_pmem_probe(struct platform_device *pdev)
goto err;
platform_set_drvdata(pdev, nvdimm_bus);
- for (p = iomem_resource.child; p ; p = p->sibling) {
- struct nd_region_desc ndr_desc;
-
- if (p->desc != IORES_DESC_PERSISTENT_MEMORY_LEGACY)
- continue;
-
- memset(&ndr_desc, 0, sizeof(ndr_desc));
- ndr_desc.res = p;
- ndr_desc.attr_groups = e820_pmem_region_attribute_groups;
- ndr_desc.numa_node = e820_range_to_nid(p->start);
- set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
- if (!nvdimm_pmem_region_create(nvdimm_bus, &ndr_desc))
- goto err;
- }
-
+ rc = walk_iomem_res_desc(IORES_DESC_PERSISTENT_MEMORY_LEGACY,
+ IORESOURCE_MEM, 0, -1, nvdimm_bus, e820_register_one);
+ if (rc)
+ goto err;
return 0;
-
- err:
+err:
nvdimm_bus_unregister(nvdimm_bus);
dev_err(dev, "failed to register legacy persistent memory ranges\n");
- return -ENXIO;
+ return rc;
}
static struct platform_driver e820_pmem_driver = {
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index 30b08791597d..3f7ad5bc443e 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -561,8 +561,6 @@ static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap)
res->start += start_pad;
res->end -= end_trunc;
- pgmap->type = MEMORY_DEVICE_HOST;
-
if (nd_pfn->mode == PFN_MODE_RAM) {
if (offset < SZ_8K)
return -EINVAL;
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index e023d6aa22b5..68940356cad3 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -164,11 +164,6 @@ static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page,
return rc;
}
-/* account for REQ_FLUSH rename, replace with REQ_PREFLUSH after v4.8-rc1 */
-#ifndef REQ_FLUSH
-#define REQ_FLUSH REQ_PREFLUSH
-#endif
-
static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
{
blk_status_t rc = 0;
@@ -179,7 +174,7 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
struct pmem_device *pmem = q->queuedata;
struct nd_region *nd_region = to_region(pmem);
- if (bio->bi_opf & REQ_FLUSH)
+ if (bio->bi_opf & REQ_PREFLUSH)
nvdimm_flush(nd_region);
do_acct = nd_iostat_start(bio, &start);
@@ -264,9 +259,16 @@ static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
return copy_from_iter_flushcache(addr, bytes, i);
}
+static size_t pmem_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff,
+ void *addr, size_t bytes, struct iov_iter *i)
+{
+ return copy_to_iter_mcsafe(addr, bytes, i);
+}
+
static const struct dax_operations pmem_dax_ops = {
.direct_access = pmem_dax_direct_access,
.copy_from_iter = pmem_copy_from_iter,
+ .copy_to_iter = pmem_copy_to_iter,
};
static const struct attribute_group *pmem_attribute_groups[] = {
@@ -294,12 +296,33 @@ static void pmem_release_disk(void *__pmem)
put_disk(pmem->disk);
}
+static void pmem_release_pgmap_ops(void *__pgmap)
+{
+ dev_pagemap_put_ops();
+}
+
+static void fsdax_pagefree(struct page *page, void *data)
+{
+ wake_up_var(&page->_refcount);
+}
+
+static int setup_pagemap_fsdax(struct device *dev, struct dev_pagemap *pgmap)
+{
+ dev_pagemap_get_ops();
+ if (devm_add_action_or_reset(dev, pmem_release_pgmap_ops, pgmap))
+ return -ENOMEM;
+ pgmap->type = MEMORY_DEVICE_FS_DAX;
+ pgmap->page_free = fsdax_pagefree;
+
+ return 0;
+}
+
static int pmem_attach_disk(struct device *dev,
struct nd_namespace_common *ndns)
{
struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
struct nd_region *nd_region = to_nd_region(dev->parent);
- int nid = dev_to_node(dev), fua, wbc;
+ int nid = dev_to_node(dev), fua;
struct resource *res = &nsio->res;
struct resource bb_res;
struct nd_pfn *nd_pfn = NULL;
@@ -335,7 +358,6 @@ static int pmem_attach_disk(struct device *dev,
dev_warn(dev, "unable to guarantee persistence of writes\n");
fua = 0;
}
- wbc = nvdimm_has_cache(nd_region);
if (!devm_request_mem_region(dev, res->start, resource_size(res),
dev_name(&ndns->dev))) {
@@ -353,6 +375,8 @@ static int pmem_attach_disk(struct device *dev,
pmem->pfn_flags = PFN_DEV;
pmem->pgmap.ref = &q->q_usage_counter;
if (is_nd_pfn(dev)) {
+ if (setup_pagemap_fsdax(dev, &pmem->pgmap))
+ return -ENOMEM;
addr = devm_memremap_pages(dev, &pmem->pgmap);
pfn_sb = nd_pfn->pfn_sb;
pmem->data_offset = le64_to_cpu(pfn_sb->dataoff);
@@ -364,6 +388,8 @@ static int pmem_attach_disk(struct device *dev,
} else if (pmem_should_map_pages(dev)) {
memcpy(&pmem->pgmap.res, &nsio->res, sizeof(pmem->pgmap.res));
pmem->pgmap.altmap_valid = false;
+ if (setup_pagemap_fsdax(dev, &pmem->pgmap))
+ return -ENOMEM;
addr = devm_memremap_pages(dev, &pmem->pgmap);
pmem->pfn_flags |= PFN_MAP;
memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res));
@@ -382,7 +408,7 @@ static int pmem_attach_disk(struct device *dev,
return PTR_ERR(addr);
pmem->virt_addr = addr;
- blk_queue_write_cache(q, wbc, fua);
+ blk_queue_write_cache(q, true, fua);
blk_queue_make_request(q, pmem_make_request);
blk_queue_physical_block_size(q, PAGE_SIZE);
blk_queue_logical_block_size(q, pmem_sector_size(ndns));
@@ -413,7 +439,7 @@ static int pmem_attach_disk(struct device *dev,
put_disk(disk);
return -ENOMEM;
}
- dax_write_cache(dax_dev, wbc);
+ dax_write_cache(dax_dev, nvdimm_has_cache(nd_region));
pmem->dax_dev = dax_dev;
gendev = disk_to_dev(disk);
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index a612be6f019d..ec3543b83330 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -1132,7 +1132,8 @@ EXPORT_SYMBOL_GPL(nvdimm_has_flush);
int nvdimm_has_cache(struct nd_region *nd_region)
{
- return is_nd_pmem(&nd_region->dev);
+ return is_nd_pmem(&nd_region->dev) &&
+ !test_bit(ND_REGION_PERSIST_CACHE, &nd_region->flags);
}
EXPORT_SYMBOL_GPL(nvdimm_has_cache);
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 0a312e450207..29024492b8ed 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -51,9 +51,16 @@ static size_t dcssblk_dax_copy_from_iter(struct dax_device *dax_dev,
return copy_from_iter(addr, bytes, i);
}
+static size_t dcssblk_dax_copy_to_iter(struct dax_device *dax_dev,
+ pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i)
+{
+ return copy_to_iter(addr, bytes, i);
+}
+
static const struct dax_operations dcssblk_dax_ops = {
.direct_access = dcssblk_dax_direct_access,
.copy_from_iter = dcssblk_dax_copy_from_iter,
+ .copy_to_iter = dcssblk_dax_copy_to_iter,
};
struct dcssblk_dev_info {
diff --git a/fs/Kconfig b/fs/Kconfig
index 40cdae75e3b4..ab2d96d1abee 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -38,6 +38,7 @@ config FS_DAX
bool "Direct Access (DAX) support"
depends on MMU
depends on !(ARM || MIPS || SPARC)
+ select DEV_PAGEMAP_OPS if (ZONE_DEVICE && !FS_DAX_LIMITED)
select FS_IOMAP
select DAX
help
diff --git a/fs/dax.c b/fs/dax.c
index 08656a2f2aa6..641192808bb6 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -351,6 +351,19 @@ static void dax_disassociate_entry(void *entry, struct address_space *mapping,
}
}
+static struct page *dax_busy_page(