diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-17 09:51:57 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-17 09:51:57 -0800 |
commit | a3841f94c7ecb3ede0f888d3fcfe8fb6368ddd7a (patch) | |
tree | 6625eedf10d0672068ee218bb893a5a0e1803df2 /drivers | |
parent | adeba81ac2a6451f44545874da3d181081f0ab04 (diff) | |
parent | 4247f24c23589bcc3bc3490515ef8c9497e9ae55 (diff) |
Merge tag 'libnvdimm-for-4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm and dax updates from Dan Williams:
"Save for a few late fixes, all of these commits have shipped in -next
releases since before the merge window opened, and 0day has given a
build success notification.
The ext4 touches came from Jan, and the xfs touches have Darrick's
reviewed-by. An xfstest for the MAP_SYNC feature has been through
a few round of reviews and is on track to be merged.
- Introduce MAP_SYNC and MAP_SHARED_VALIDATE, a mechanism to enable
'userspace flush' of persistent memory updates via filesystem-dax
mappings. It arranges for any filesystem metadata updates that may
be required to satisfy a write fault to also be flushed ("on disk")
before the kernel returns to userspace from the fault handler.
Effectively every write-fault that dirties metadata completes an
fsync() before returning from the fault handler. The new
MAP_SHARED_VALIDATE mapping type guarantees that the MAP_SYNC flag
is validated as supported by the filesystem's ->mmap() file
operation.
- Add support for the standard ACPI 6.2 label access methods that
replace the NVDIMM_FAMILY_INTEL (vendor specific) label methods.
This enables interoperability with environments that only implement
the standardized methods.
- Add support for the ACPI 6.2 NVDIMM media error injection methods.
- Add support for the NVDIMM_FAMILY_INTEL v1.6 DIMM commands for
latch last shutdown status, firmware update, SMART error injection,
and SMART alarm threshold control.
- Cleanup physical address information disclosures to be root-only.
- Fix revalidation of the DIMM "locked label area" status to support
dynamic unlock of the label area.
- Expand unit test infrastructure to mock the ACPI 6.2 Translate SPA
(system-physical-address) command and error injection commands.
Acknowledgements that came after the commits were pushed to -next:
- 957ac8c421ad ("dax: fix PMD faults on zero-length files"):
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
- a39e596baa07 ("xfs: support for synchronous DAX faults") and
7b565c9f965b ("xfs: Implement xfs_filemap_pfn_mkwrite() using __xfs_filemap_fault()")
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>"
* tag 'libnvdimm-for-4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (49 commits)
acpi, nfit: add 'Enable Latch System Shutdown Status' command support
dax: fix general protection fault in dax_alloc_inode
dax: fix PMD faults on zero-length files
dax: stop requiring a live device for dax_flush()
brd: remove dax support
dax: quiet bdev_dax_supported()
fs, dax: unify IOMAP_F_DIRTY read vs write handling policy in the dax core
tools/testing/nvdimm: unit test clear-error commands
acpi, nfit: validate commands against the device type
tools/testing/nvdimm: stricter bounds checking for error injection commands
xfs: support for synchronous DAX faults
xfs: Implement xfs_filemap_pfn_mkwrite() using __xfs_filemap_fault()
ext4: Support for synchronous DAX faults
ext4: Simplify error handling in ext4_dax_huge_fault()
dax: Implement dax_finish_sync_fault()
dax, iomap: Add support for synchronous faults
mm: Define MAP_SYNC and VM_SYNC flags
dax: Allow tuning whether dax_insert_mapping_entry() dirties entry
dax: Allow dax_iomap_fault() to return pfn
dax: Fix comment describing dax_iomap_fault()
...
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/acpi/nfit/core.c | 274 | ||||
-rw-r--r-- | drivers/acpi/nfit/mce.c | 2 | ||||
-rw-r--r-- | drivers/acpi/nfit/nfit.h | 37 | ||||
-rw-r--r-- | drivers/block/Kconfig | 12 | ||||
-rw-r--r-- | drivers/block/brd.c | 65 | ||||
-rw-r--r-- | drivers/dax/device.c | 3 | ||||
-rw-r--r-- | drivers/dax/super.c | 14 | ||||
-rw-r--r-- | drivers/nvdimm/Makefile | 1 | ||||
-rw-r--r-- | drivers/nvdimm/badrange.c | 293 | ||||
-rw-r--r-- | drivers/nvdimm/bus.c | 24 | ||||
-rw-r--r-- | drivers/nvdimm/core.c | 260 | ||||
-rw-r--r-- | drivers/nvdimm/dimm.c | 3 | ||||
-rw-r--r-- | drivers/nvdimm/dimm_devs.c | 19 | ||||
-rw-r--r-- | drivers/nvdimm/label.c | 2 | ||||
-rw-r--r-- | drivers/nvdimm/namespace_devs.c | 6 | ||||
-rw-r--r-- | drivers/nvdimm/nd-core.h | 3 | ||||
-rw-r--r-- | drivers/nvdimm/nd.h | 7 | ||||
-rw-r--r-- | drivers/nvdimm/pfn_devs.c | 8 | ||||
-rw-r--r-- | drivers/nvdimm/region_devs.c | 8 |
19 files changed, 659 insertions, 382 deletions
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 9c2c49b6a240..ff2580e7611d 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -183,13 +183,33 @@ static int xlat_bus_status(void *buf, unsigned int cmd, u32 status) return 0; } -static int xlat_nvdimm_status(void *buf, unsigned int cmd, u32 status) +#define ACPI_LABELS_LOCKED 3 + +static int xlat_nvdimm_status(struct nvdimm *nvdimm, void *buf, unsigned int cmd, + u32 status) { + struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); + switch (cmd) { case ND_CMD_GET_CONFIG_SIZE: + /* + * In the _LSI, _LSR, _LSW case the locked status is + * communicated via the read/write commands + */ + if (nfit_mem->has_lsi) + break; + if (status >> 16 & ND_CONFIG_LOCKED) return -EACCES; break; + case ND_CMD_GET_CONFIG_DATA: + if (nfit_mem->has_lsr && status == ACPI_LABELS_LOCKED) + return -EACCES; + break; + case ND_CMD_SET_CONFIG_DATA: + if (nfit_mem->has_lsw && status == ACPI_LABELS_LOCKED) + return -EACCES; + break; default: break; } @@ -205,13 +225,182 @@ static int xlat_status(struct nvdimm *nvdimm, void *buf, unsigned int cmd, { if (!nvdimm) return xlat_bus_status(buf, cmd, status); - return xlat_nvdimm_status(buf, cmd, status); + return xlat_nvdimm_status(nvdimm, buf, cmd, status); +} + +/* convert _LS{I,R} packages to the buffer object acpi_nfit_ctl expects */ +static union acpi_object *pkg_to_buf(union acpi_object *pkg) +{ + int i; + void *dst; + size_t size = 0; + union acpi_object *buf = NULL; + + if (pkg->type != ACPI_TYPE_PACKAGE) { + WARN_ONCE(1, "BIOS bug, unexpected element type: %d\n", + pkg->type); + goto err; + } + + for (i = 0; i < pkg->package.count; i++) { + union acpi_object *obj = &pkg->package.elements[i]; + + if (obj->type == ACPI_TYPE_INTEGER) + size += 4; + else if (obj->type == ACPI_TYPE_BUFFER) + size += obj->buffer.length; + else { + WARN_ONCE(1, "BIOS bug, unexpected element type: %d\n", + obj->type); + goto err; + } + } + + buf = ACPI_ALLOCATE(sizeof(*buf) + size); + if (!buf) + goto err; + + dst = buf + 1; + buf->type = ACPI_TYPE_BUFFER; + buf->buffer.length = size; + buf->buffer.pointer = dst; + for (i = 0; i < pkg->package.count; i++) { + union acpi_object *obj = &pkg->package.elements[i]; + + if (obj->type == ACPI_TYPE_INTEGER) { + memcpy(dst, &obj->integer.value, 4); + dst += 4; + } else if (obj->type == ACPI_TYPE_BUFFER) { + memcpy(dst, obj->buffer.pointer, obj->buffer.length); + dst += obj->buffer.length; + } + } +err: + ACPI_FREE(pkg); + return buf; +} + +static union acpi_object *int_to_buf(union acpi_object *integer) +{ + union acpi_object *buf = ACPI_ALLOCATE(sizeof(*buf) + 4); + void *dst = NULL; + + if (!buf) + goto err; + + if (integer->type != ACPI_TYPE_INTEGER) { + WARN_ONCE(1, "BIOS bug, unexpected element type: %d\n", + integer->type); + goto err; + } + + dst = buf + 1; + buf->type = ACPI_TYPE_BUFFER; + buf->buffer.length = 4; + buf->buffer.pointer = dst; + memcpy(dst, &integer->integer.value, 4); +err: + ACPI_FREE(integer); + return buf; +} + +static union acpi_object *acpi_label_write(acpi_handle handle, u32 offset, + u32 len, void *data) +{ + acpi_status rc; + struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL }; + struct acpi_object_list input = { + .count = 3, + .pointer = (union acpi_object []) { + [0] = { + .integer.type = ACPI_TYPE_INTEGER, + .integer.value = offset, + }, + [1] = { + .integer.type = ACPI_TYPE_INTEGER, + .integer.value = len, + }, + [2] = { + .buffer.type = ACPI_TYPE_BUFFER, + .buffer.pointer = data, + .buffer.length = len, + }, + }, + }; + + rc = acpi_evaluate_object(handle, "_LSW", &input, &buf); + if (ACPI_FAILURE(rc)) + return NULL; + return int_to_buf(buf.pointer); +} + +static union acpi_object *acpi_label_read(acpi_handle handle, u32 offset, + u32 len) +{ + acpi_status rc; + struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL }; + struct acpi_object_list input = { + .count = 2, + .pointer = (union acpi_object []) { + [0] = { + .integer.type = ACPI_TYPE_INTEGER, + .integer.value = offset, + }, + [1] = { + .integer.type = ACPI_TYPE_INTEGER, + .integer.value = len, + }, + }, + }; + + rc = acpi_evaluate_object(handle, "_LSR", &input, &buf); + if (ACPI_FAILURE(rc)) + return NULL; + return pkg_to_buf(buf.pointer); +} + +static union acpi_object *acpi_label_info(acpi_handle handle) +{ + acpi_status rc; + struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL }; + + rc = acpi_evaluate_object(handle, "_LSI", NULL, &buf); + if (ACPI_FAILURE(rc)) + return NULL; + return pkg_to_buf(buf.pointer); +} + +static u8 nfit_dsm_revid(unsigned family, unsigned func) +{ + static const u8 revid_table[NVDIMM_FAMILY_MAX+1][32] = { + [NVDIMM_FAMILY_INTEL] = { + [NVDIMM_INTEL_GET_MODES] = 2, + [NVDIMM_INTEL_GET_FWINFO] = 2, + [NVDIMM_INTEL_START_FWUPDATE] = 2, + [NVDIMM_INTEL_SEND_FWUPDATE] = 2, + [NVDIMM_INTEL_FINISH_FWUPDATE] = 2, + [NVDIMM_INTEL_QUERY_FWUPDATE] = 2, + [NVDIMM_INTEL_SET_THRESHOLD] = 2, + [NVDIMM_INTEL_INJECT_ERROR] = 2, + }, + }; + u8 id; + + if (family > NVDIMM_FAMILY_MAX) + return 0; + if (func > 31) + return 0; + id = revid_table[family][func]; + if (id == 0) + return 1; /* default */ + return id; } int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc) { struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc); + struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); union acpi_object in_obj, in_buf, *out_obj; const struct nd_cmd_desc *desc = NULL; struct device *dev = acpi_desc->dev; @@ -235,7 +424,6 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, } if (nvdimm) { - struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); struct acpi_device *adev = nfit_mem->adev; if (!adev) @@ -294,7 +482,29 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, in_buf.buffer.pointer, min_t(u32, 256, in_buf.buffer.length), true); - out_obj = acpi_evaluate_dsm(handle, guid, 1, func, &in_obj); + /* call the BIOS, prefer the named methods over _DSM if available */ + if (nvdimm && cmd == ND_CMD_GET_CONFIG_SIZE && nfit_mem->has_lsi) + out_obj = acpi_label_info(handle); + else if (nvdimm && cmd == ND_CMD_GET_CONFIG_DATA && nfit_mem->has_lsr) { + struct nd_cmd_get_config_data_hdr *p = buf; + + out_obj = acpi_label_read(handle, p->in_offset, p->in_length); + } else if (nvdimm && cmd == ND_CMD_SET_CONFIG_DATA + && nfit_mem->has_lsw) { + struct nd_cmd_set_config_hdr *p = buf; + + out_obj = acpi_label_write(handle, p->in_offset, p->in_length, + p->in_buf); + } else { + u8 revid; + + if (nvdimm) + revid = nfit_dsm_revid(nfit_mem->family, func); + else + revid = 1; + out_obj = acpi_evaluate_dsm(handle, guid, revid, func, &in_obj); + } + if (!out_obj) { dev_dbg(dev, "%s:%s _DSM failed cmd: %s\n", __func__, dimm_name, cmd_name); @@ -356,8 +566,10 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, * Set fw_status for all the commands with a known format to be * later interpreted by xlat_status(). */ - if (i >= 1 && ((cmd >= ND_CMD_ARS_CAP && cmd <= ND_CMD_CLEAR_ERROR) - || (cmd >= ND_CMD_SMART && cmd <= ND_CMD_VENDOR))) + if (i >= 1 && ((!nvdimm && cmd >= ND_CMD_ARS_CAP + && cmd <= ND_CMD_CLEAR_ERROR) + || (nvdimm && cmd >= ND_CMD_SMART + && cmd <= ND_CMD_VENDOR))) fw_status = *(u32 *) out_obj->buffer.pointer; if (offset + in_buf.buffer.length < buf_len) { @@ -1431,6 +1643,7 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, { struct acpi_device *adev, *adev_dimm; struct device *dev = acpi_desc->dev; + union acpi_object *obj; unsigned long dsm_mask; const guid_t *guid; int i; @@ -1463,7 +1676,7 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, * different command sets. Note, that checking for function0 (bit0) * tells us if any commands are reachable through this GUID. */ - for (i = NVDIMM_FAMILY_INTEL; i <= NVDIMM_FAMILY_MSFT; i++) + for (i = 0; i <= NVDIMM_FAMILY_MAX; i++) if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 1)) if (family < 0 || i == default_dsm_family) family = i; @@ -1473,7 +1686,7 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, if (override_dsm_mask && !disable_vendor_specific) dsm_mask = override_dsm_mask; else if (nfit_mem->family == NVDIMM_FAMILY_INTEL) { - dsm_mask = 0x3fe; + dsm_mask = NVDIMM_INTEL_CMDMASK; if (disable_vendor_specific) dsm_mask &= ~(1 << ND_CMD_VENDOR); } else if (nfit_mem->family == NVDIMM_FAMILY_HPE1) { @@ -1493,9 +1706,32 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, guid = to_nfit_uuid(nfit_mem->family); for_each_set_bit(i, &dsm_mask, BITS_PER_LONG) - if (acpi_check_dsm(adev_dimm->handle, guid, 1, 1ULL << i)) + if (acpi_check_dsm(adev_dimm->handle, guid, + nfit_dsm_revid(nfit_mem->family, i), + 1ULL << i)) set_bit(i, &nfit_mem->dsm_mask); + obj = acpi_label_info(adev_dimm->handle); + if (obj) { + ACPI_FREE(obj); + nfit_mem->has_lsi = 1; + dev_dbg(dev, "%s: has _LSI\n", dev_name(&adev_dimm->dev)); + } + + obj = acpi_label_read(adev_dimm->handle, 0, 0); + if (obj) { + ACPI_FREE(obj); + nfit_mem->has_lsr = 1; + dev_dbg(dev, "%s: has _LSR\n", dev_name(&adev_dimm->dev)); + } + + obj = acpi_label_write(adev_dimm->handle, 0, 0, NULL); + if (obj) { + ACPI_FREE(obj); + nfit_mem->has_lsw = 1; + dev_dbg(dev, "%s: has _LSW\n", dev_name(&adev_dimm->dev)); + } + return 0; } @@ -1571,8 +1807,21 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc) * userspace interface. */ cmd_mask = 1UL << ND_CMD_CALL; - if (nfit_mem->family == NVDIMM_FAMILY_INTEL) - cmd_mask |= nfit_mem->dsm_mask; + if (nfit_mem->family == NVDIMM_FAMILY_INTEL) { + /* + * These commands have a 1:1 correspondence + * between DSM payload and libnvdimm ioctl + * payload format. + */ + cmd_mask |= nfit_mem->dsm_mask & NVDIMM_STANDARD_CMDMASK; + } + + if (nfit_mem->has_lsi) + set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask); + if (nfit_mem->has_lsr) + set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask); + if (nfit_mem->has_lsw) + set_bit(ND_CMD_SET_CONFIG_DATA, &cmd_mask); flush = nfit_mem->nfit_flush ? nfit_mem->nfit_flush->flush : NULL; @@ -1645,6 +1894,7 @@ static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc) int i; nd_desc->cmd_mask = acpi_desc->bus_cmd_force_en; + nd_desc->bus_dsm_mask = acpi_desc->bus_nfit_cmd_force_en; adev = to_acpi_dev(acpi_desc); if (!adev) return; @@ -2239,7 +2489,7 @@ static int ars_status_process_records(struct acpi_nfit_desc *acpi_desc, if (ars_status->out_length < 44 + sizeof(struct nd_ars_record) * (i + 1)) break; - rc = nvdimm_bus_add_poison(nvdimm_bus, + rc = nvdimm_bus_add_badrange(nvdimm_bus, ars_status->records[i].err_address, ars_status->records[i].length); if (rc) diff --git a/drivers/acpi/nfit/mce.c b/drivers/acpi/nfit/mce.c index feeb95d574fa..b92921439657 100644 --- a/drivers/acpi/nfit/mce.c +++ b/drivers/acpi/nfit/mce.c @@ -67,7 +67,7 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val, continue; /* If this fails due to an -ENOMEM, there is little we can do */ - nvdimm_bus_add_poison(acpi_desc->nvdimm_bus, + nvdimm_bus_add_badrange(acpi_desc->nvdimm_bus, ALIGN(mce->addr, L1_CACHE_BYTES), L1_CACHE_BYTES); nvdimm_region_notify(nfit_spa->nd_region, diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h index 54292db61262..f0cf18b2da8b 100644 --- a/drivers/acpi/nfit/nfit.h +++ b/drivers/acpi/nfit/nfit.h @@ -24,7 +24,7 @@ /* ACPI 6.1 */ #define UUID_NFIT_BUS "2f10e7a4-9e91-11e4-89d3-123b93f75cba" -/* http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf */ +/* http://pmem.io/documents/NVDIMM_DSM_Interface-V1.6.pdf */ #define UUID_NFIT_DIMM "4309ac30-0d11-11e4-9191-0800200c9a66" /* https://github.com/HewlettPackard/hpe-nvm/blob/master/Documentation/ */ @@ -38,6 +38,37 @@ | ACPI_NFIT_MEM_RESTORE_FAILED | ACPI_NFIT_MEM_FLUSH_FAILED \ | ACPI_NFIT_MEM_NOT_ARMED | ACPI_NFIT_MEM_MAP_FAILED) +#define NVDIMM_FAMILY_MAX NVDIMM_FAMILY_MSFT + +#define NVDIMM_STANDARD_CMDMASK \ +(1 << ND_CMD_SMART | 1 << ND_CMD_SMART_THRESHOLD | 1 << ND_CMD_DIMM_FLAGS \ + | 1 << ND_CMD_GET_CONFIG_SIZE | 1 << ND_CMD_GET_CONFIG_DATA \ + | 1 << ND_CMD_SET_CONFIG_DATA | 1 << ND_CMD_VENDOR_EFFECT_LOG_SIZE \ + | 1 << ND_CMD_VENDOR_EFFECT_LOG | 1 << ND_CMD_VENDOR) + +/* + * Command numbers that the kernel needs to know about to handle + * non-default DSM revision ids + */ +enum nvdimm_family_cmds { + NVDIMM_INTEL_LATCH_SHUTDOWN = 10, + NVDIMM_INTEL_GET_MODES = 11, + NVDIMM_INTEL_GET_FWINFO = 12, + NVDIMM_INTEL_START_FWUPDATE = 13, + NVDIMM_INTEL_SEND_FWUPDATE = 14, + NVDIMM_INTEL_FINISH_FWUPDATE = 15, + NVDIMM_INTEL_QUERY_FWUPDATE = 16, + NVDIMM_INTEL_SET_THRESHOLD = 17, + NVDIMM_INTEL_INJECT_ERROR = 18, +}; + +#define NVDIMM_INTEL_CMDMASK \ +(NVDIMM_STANDARD_CMDMASK | 1 << NVDIMM_INTEL_GET_MODES \ + | 1 << NVDIMM_INTEL_GET_FWINFO | 1 << NVDIMM_INTEL_START_FWUPDATE \ + | 1 << NVDIMM_INTEL_SEND_FWUPDATE | 1 << NVDIMM_INTEL_FINISH_FWUPDATE \ + | 1 << NVDIMM_INTEL_QUERY_FWUPDATE | 1 << NVDIMM_INTEL_SET_THRESHOLD \ + | 1 << NVDIMM_INTEL_INJECT_ERROR | 1 << NVDIMM_INTEL_LATCH_SHUTDOWN) + enum nfit_uuids { /* for simplicity alias the uuid index with the family id */ NFIT_DEV_DIMM = NVDIMM_FAMILY_INTEL, @@ -140,6 +171,9 @@ struct nfit_mem { struct resource *flush_wpq; unsigned long dsm_mask; int family; + u32 has_lsi:1; + u32 has_lsr:1; + u32 has_lsw:1; }; struct acpi_nfit_desc { @@ -167,6 +201,7 @@ struct acpi_nfit_desc { unsigned int init_complete:1; unsigned long dimm_cmd_force_en; unsigned long bus_cmd_force_en; + unsigned long bus_nfit_cmd_force_en; int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa, void *iobuf, u64 len, int rw); }; diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 923b417eaf4c..40579d0cb3d1 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -302,7 +302,6 @@ config BLK_DEV_SX8 config BLK_DEV_RAM tristate "RAM block device support" - select DAX if BLK_DEV_RAM_DAX ---help--- Saying Y here will allow you to use a portion of your RAM memory as a block device, so that you can make file systems on it, read and @@ -338,17 +337,6 @@ config BLK_DEV_RAM_SIZE The default value is 4096 kilobytes. Only change this if you know what you are doing. -config BLK_DEV_RAM_DAX - bool "Support Direct Access (DAX) to RAM block devices" - depends on BLK_DEV_RAM && FS_DAX - default n - help - Support filesystems using DAX to access RAM block devices. This - avoids double-buffering data in the page cache before copying it - to the block device. Answering Y will slightly enlarge the kernel, - and will prevent RAM block device backing store memory from being - allocated from highmem (only a problem for highmem systems). - config CDROM_PKTCDVD tristate "Packet writing on CD/DVD media (DEPRECATED)" depends on !UML diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 588360d79fca..8028a3a7e7fd 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -21,11 +21,6 @@ #include <linux/fs.h> #include <linux/slab.h> #include <linux/backing-dev.h> -#ifdef CONFIG_BLK_DEV_RAM_DAX -#include <linux/pfn_t.h> -#include <linux/dax.h> -#include <linux/uio.h> -#endif #include <linux/uaccess.h> @@ -45,9 +40,6 @@ struct brd_device { struct request_queue *brd_queue; struct gendisk *brd_disk; -#ifdef CONFIG_BLK_DEV_RAM_DAX - struct dax_device *dax_dev; -#endif struct list_head brd_list; /* @@ -112,9 +104,6 @@ static struct page *brd_insert_page(struct brd_device *brd, sector_t sector) * restriction might be able to be lifted. */ gfp_flags = GFP_NOIO | __GFP_ZERO; -#ifndef CONFIG_BLK_DEV_RAM_DAX - gfp_flags |= __GFP_HIGHMEM; -#endif page = alloc_page(gfp_flags); if (!page) return NULL; @@ -334,43 +323,6 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector, return err; } -#ifdef CONFIG_BLK_DEV_RAM_DAX -static long __brd_direct_access(struct brd_device *brd, pgoff_t pgoff, - long nr_pages, void **kaddr, pfn_t *pfn) -{ - struct page *page; - - if (!brd) - return -ENODEV; - page = brd_insert_page(brd, (sector_t)pgoff << PAGE_SECTORS_SHIFT); - if (!page) - return -ENOSPC; - *kaddr = page_address(page); - *pfn = page_to_pfn_t(page); - - return 1; -} - -static long brd_dax_direct_access(struct dax_device *dax_dev, - pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn) -{ - struct brd_device *brd = dax_get_private(dax_dev); - - return __brd_direct_access(brd, pgoff, nr_pages, kaddr, pfn); -} - -static size_t brd_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, - void *addr, size_t bytes, struct iov_iter *i) -{ - return copy_from_iter(addr, bytes, i); -} - -static const struct dax_operations brd_dax_ops = { - .direct_access = brd_dax_direct_access, - .copy_from_iter = brd_dax_copy_from_iter, -}; -#endif - static const struct block_device_operations brd_fops = { .owner = THIS_MODULE, .rw_page = brd_rw_page, @@ -451,21 +403,8 @@ static struct brd_device *brd_alloc(int i) set_capacity(disk, rd_size * 2); disk->queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO; -#ifdef CONFIG_BLK_DEV_RAM_DAX - queue_flag_set_unlocked(QUEUE_FLAG_DAX, brd->brd_queue); - brd->dax_dev = alloc_dax(brd, disk->disk_name, &brd_dax_ops); - if (!brd->dax_dev) - goto out_free_inode; -#endif - - return brd; -#ifdef CONFIG_BLK_DEV_RAM_DAX -out_free_inode: - kill_dax(brd->dax_dev); - put_dax(brd->dax_dev); -#endif out_free_queue: blk_cleanup_queue(brd->brd_queue); out_free_dev: @@ -505,10 +444,6 @@ out: static void brd_del_one(struct brd_device *brd) { list_del(&brd->brd_list); -#ifdef CONFIG_BLK_DEV_RAM_DAX - kill_dax(brd->dax_dev); - put_dax(brd->dax_dev); -#endif del_gendisk(brd->brd_disk); brd_free(brd); } diff --git a/drivers/dax/device.c b/drivers/dax/device.c index e9f3b3e4bbf4..6833ada237ab 100644 --- a/drivers/dax/device.c +++ b/drivers/dax/device.c @@ -222,7 +222,8 @@ __weak phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff, unsigned long size) { struct resource *res; - phys_addr_t phys; + /* gcc-4.6.3-nolibc for i386 complains that this is uninitialized */ + phys_addr_t uninitialized_var(phys); int i; for (i = 0; i < dev_dax->num_resources; i++) { diff --git a/drivers/dax/super.c b/drivers/dax/super.c index 557b93703532..3ec804672601 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -92,21 +92,21 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize) long len; if (blocksize != PAGE_SIZE) { - pr_err("VFS (%s): error: unsupported blocksize for dax\n", + pr_debug("VFS (%s): error: unsupported blocksize for dax\n", sb->s_id); return -EINVAL; } err = bdev_dax_pgoff(bdev, 0, PAGE_SIZE, &pgoff); if (err) { - pr_err("VFS (%s): error: unaligned partition for dax\n", + pr_debug("VFS (%s): error: unaligned partition for dax\n", sb->s_id); return err; } dax_dev = dax_get_by_host(bdev->bd_disk->disk_name); if (!dax_dev) { - pr_err("VFS (%s): error: device does not support dax\n", + pr_debug("VFS (%s): error: device does not support dax\n", sb->s_id); return -EOPNOTSUPP; } @@ -118,7 +118,7 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize) put_dax(dax_dev); if (len < 1) { - pr_err("VFS (%s): error: dax access failed (%ld)", + pr_debug("VFS (%s): error: dax access failed (%ld)\n", sb->s_id, len); return len < 0 ? len : -EIO; } @@ -273,9 +273,6 @@ EXPORT_SYMBOL_GPL(dax_copy_from_iter); void arch_wb_cache_pmem(void *addr, size_t size); void dax_flush(struct dax_device *dax_dev, void *addr, size_t size) { - if (unlikely(!dax_alive(dax_dev))) - return; - if (unlikely(!test_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags))) return; @@ -344,6 +341,9 @@ static struct inode *dax_alloc_inode(struct super_block *sb) struct inode *inode; dax_dev = kmem_cache_alloc(dax_cache, GFP_KERNEL); + if (!dax_dev) + return NULL; + inode = &dax_dev->inode; inode->i_rdev = 0; return inode; diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile index 447e0e14f3b6..70d5f3ad9909 100644 --- a/drivers/nvdimm/Makefile +++ b/drivers/nvdimm/Makefile @@ -21,6 +21,7 @@ libnvdimm-y += region_devs.o libnvdimm-y += region.o libnvdimm-y += namespace_devs.o libnvdimm-y += label.o +libnvdimm-y += badrange.o libnvdimm-$(CONFIG_ND_CLAIM) += claim.o libnvdimm-$(CONFIG_BTT) += btt_devs.o libnvdimm-$(CONFIG_NVDIMM_PFN) += pfn_devs.o diff --git a/drivers/nvdimm/badrange.c b/drivers/nvdimm/badrange.c new file mode 100644 index 000000000000..e068d72b4357 --- /dev/null +++ b/drivers/nvdimm/badrange.c @@ -0,0 +1,293 @@ +/* + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include <linux/libnvdimm.h> +#include <linux/badblocks.h> +#include <linux/export.h> +#include <linux/module.h> +#include <linux/blkdev.h> +#include <linux/device.h> +#include <linux/ctype.h> +#include <linux/ndctl.h> +#include <linux/mutex.h> +#include <linux/slab.h> +#include <linux/io.h> +#include "nd-core.h" +#include "nd.h" + +void badrange_init(struct badrange *badrange) +{ + INIT_LIST_HEAD(&badrange->list); + spin_lock_init(&badrange->lock); +} +EXPORT_SYMBOL_GPL(badrange_init); + +static void append_badrange_entry(struct badrange *badrange, + struct badrange_entry *bre, u64 addr, u64 length) +{ + lockdep_assert_held(&badrange->lock); + bre->start = addr; + bre->length = length; + list_add_tail(&bre->list, &badrange->list); +} + +static int alloc_and_append_badrange_entry(struct badrange *badrange, + u64 addr, u64 length, gfp_t flags) +{ + struct badrange_entry *bre; + + bre = kzalloc(sizeof(*bre), flags); + if (!bre) + return -ENOMEM; + + append_badrange_entry(badrange, bre, addr, length); + return 0; +} + +static int add_badrange(struct badrange *badrange, u64 addr, u64 length) +{ + struct badrange_entry *bre, *bre_new; + + spin_unlock(&badrange->lock); + bre_new = kzalloc(sizeof(*bre_new), GFP_KERNEL); + spin_lock(&badrange->lock); + + if (list_empty(&badrange->list)) { + if (!bre_new) + return -ENOMEM; + append_badrange_entry(badrange, bre_new, addr, length); + return 0; + } + + /* + * There is a chance this is a duplicate, check for those first. + * This will be the common case as ARS_STATUS returns all known + * errors in the SPA space, and we can't query it per region + */ + list_for_each_entry(bre, &badrange->list, list) + if (bre->start == addr) { + /* If length has changed, update this list entry */ + if (bre->length != length) + bre->length = length; + kfree(bre_new); + return 0; + } + + /* + * If not a duplicate or a simple length update, add the entry as is, + * as any overlapping ranges will get resolved when the list is consumed + * and converted to badblocks + */ + if (!bre_new) + return -ENOMEM; + append_badrange_entry(badrange, bre_new, addr, length); + + return 0; +} + +int badrange_add(struct badrange *badrange, u64 addr, u64 length) +{ + int rc; + + spin_lock(&badrange->lock); + rc = add_badrange(badrange, addr, length); + spin_unlock(&badrange->lock); + + return rc; +} +EXPORT_SYMBOL_GPL(badrange_add); + +void badrange_forget(struct badrange *badrange, phys_addr_t start, + unsigned int len) +{ + struct list_head *badrange_list = &badrange->list; + u64 clr_end = start + len - 1; + struct badrange_entry *bre, *next; + + spin_lock(&badrange->lock); + + /* + * [start, clr_end] is the badrange interval being cleared. + * [bre->start, bre_end] is the badrange_list entry we're comparing + * the above interval against. The badrange list entry may need + * to be modified (update either start or length), deleted, or + * split into two based on the overlap characteristics + */ + + list_for_each_entry_safe(bre, next, badrange_list, list) { + u64 bre_end = bre->start + bre->length - 1; + + /* Skip intervals with no intersection */ + if (bre_end < start) + continue; + if (bre->start > clr_end) + continue; + /* Delete completely overlapped badrange entries */ + if ((bre->start >= start) && (bre_end <= clr_end)) { + list_del(&bre->list); + kfree(bre); + continue; + } + /* Adjust start point of partially cleared entries */ + if ((start <= bre->start) && (clr_end > bre->start)) { + bre->length -= clr_end - bre->start + 1; + bre->start = clr_end + 1; + continue; + } + /* Adjust bre->length for partial clearing at the tail end */ + if ((bre->start < start) && (bre_end <= clr_end)) { + /* bre->start remains the same */ + bre->length = start - bre->start; + continue; + } + /* + * If clearing in the middle of an entry, we split it into + * two by modifying the current entry to represent one half of + * the split, and adding a new entry for the second half. + */ + if ((bre->start < start) && (bre_end > clr_end)) { + u64 new_start = clr_end + 1; + u64 new_len = bre_end - new_start + 1; + + /* Add new entry covering the right half */ + alloc_and_append_badrange_entry(badrange, new_start, + new |