summaryrefslogtreecommitdiffstats
path: root/drivers/lightnvm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-11-14 15:32:19 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2017-11-14 15:32:19 -0800
commite2c5923c349c1738fe8fda980874d93f6fb2e5b6 (patch)
treeb97a90170c45211bcc437761653aa8016c34afcd /drivers/lightnvm
parentabc36be236358162202e86ad88616ff95a755101 (diff)
parenta04b5de5050ab8b891128eb2c47a0916fe8622e1 (diff)
Merge branch 'for-4.15/block' of git://git.kernel.dk/linux-block
Pull core block layer updates from Jens Axboe: "This is the main pull request for block storage for 4.15-rc1. Nothing out of the ordinary in here, and no API changes or anything like that. Just various new features for drivers, core changes, etc. In particular, this pull request contains: - A patch series from Bart, closing the whole on blk/scsi-mq queue quescing. - A series from Christoph, building towards hidden gendisks (for multipath) and ability to move bio chains around. - NVMe - Support for native multipath for NVMe (Christoph). - Userspace notifications for AENs (Keith). - Command side-effects support (Keith). - SGL support (Chaitanya Kulkarni) - FC fixes and improvements (James Smart) - Lots of fixes and tweaks (Various) - bcache - New maintainer (Michael Lyle) - Writeback control improvements (Michael) - Various fixes (Coly, Elena, Eric, Liang, et al) - lightnvm updates, mostly centered around the pblk interface (Javier, Hans, and Rakesh). - Removal of unused bio/bvec kmap atomic interfaces (me, Christoph) - Writeback series that fix the much discussed hundreds of millions of sync-all units. This goes all the way, as discussed previously (me). - Fix for missing wakeup on writeback timer adjustments (Yafang Shao). - Fix laptop mode on blk-mq (me). - {mq,name} tupple lookup for IO schedulers, allowing us to have alias names. This means you can use 'deadline' on both !mq and on mq (where it's called mq-deadline). (me). - blktrace race fix, oopsing on sg load (me). - blk-mq optimizations (me). - Obscure waitqueue race fix for kyber (Omar). - NBD fixes (Josef). - Disable writeback throttling by default on bfq, like we do on cfq (Luca Miccio). - Series from Ming that enable us to treat flush requests on blk-mq like any other request. This is a really nice cleanup. - Series from Ming that improves merging on blk-mq with schedulers, getting us closer to flipping the switch on scsi-mq again. - BFQ updates (Paolo). - blk-mq atomic flags memory ordering fixes (Peter Z). - Loop cgroup support (Shaohua). - Lots of minor fixes from lots of different folks, both for core and driver code" * 'for-4.15/block' of git://git.kernel.dk/linux-block: (294 commits) nvme: fix visibility of "uuid" ns attribute blk-mq: fixup some comment typos and lengths ide: ide-atapi: fix compile error with defining macro DEBUG blk-mq: improve tag waiting setup for non-shared tags brd: remove unused brd_mutex blk-mq: only run the hardware queue if IO is pending block: avoid null pointer dereference on null disk fs: guard_bio_eod() needs to consider partitions xtensa/simdisk: fix compile error nvme: expose subsys attribute to sysfs nvme: create 'slaves' and 'holders' entries for hidden controllers block: create 'slaves' and 'holders' entries for hidden gendisks nvme: also expose the namespace identification sysfs files for mpath nodes nvme: implement multipath access to nvme subsystems nvme: track shared namespaces nvme: introduce a nvme_ns_ids structure nvme: track subsystems block, nvme: Introduce blk_mq_req_flags_t block, scsi: Make SCSI quiesce and resume work reliably block: Add the QUEUE_FLAG_PREEMPT_ONLY request queue flag ...
Diffstat (limited to 'drivers/lightnvm')
-rw-r--r--drivers/lightnvm/Kconfig3
-rw-r--r--drivers/lightnvm/core.c176
-rw-r--r--drivers/lightnvm/pblk-cache.c24
-rw-r--r--drivers/lightnvm/pblk-core.c512
-rw-r--r--drivers/lightnvm/pblk-gc.c289
-rw-r--r--drivers/lightnvm/pblk-init.c197
-rw-r--r--drivers/lightnvm/pblk-map.c28
-rw-r--r--drivers/lightnvm/pblk-rb.c30
-rw-r--r--drivers/lightnvm/pblk-read.c274
-rw-r--r--drivers/lightnvm/pblk-recovery.c129
-rw-r--r--drivers/lightnvm/pblk-rl.c43
-rw-r--r--drivers/lightnvm/pblk-sysfs.c2
-rw-r--r--drivers/lightnvm/pblk-write.c229
-rw-r--r--drivers/lightnvm/pblk.h132
14 files changed, 1079 insertions, 989 deletions
diff --git a/drivers/lightnvm/Kconfig b/drivers/lightnvm/Kconfig
index ead61a93cb4e..2a953efec4e1 100644
--- a/drivers/lightnvm/Kconfig
+++ b/drivers/lightnvm/Kconfig
@@ -4,7 +4,8 @@
menuconfig NVM
bool "Open-Channel SSD target support"
- depends on BLOCK && HAS_DMA
+ depends on BLOCK && HAS_DMA && PCI
+ select BLK_DEV_NVME
help
Say Y here to get to enable Open-channel SSDs.
diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index ddae430b6eae..83249b43dd06 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -22,6 +22,7 @@
#include <linux/types.h>
#include <linux/sem.h>
#include <linux/bitmap.h>
+#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/miscdevice.h>
#include <linux/lightnvm.h>
@@ -138,7 +139,6 @@ static struct nvm_tgt_dev *nvm_create_tgt_dev(struct nvm_dev *dev,
int prev_nr_luns;
int i, j;
- nr_chnls = nr_luns / dev->geo.luns_per_chnl;
nr_chnls = (nr_chnls_mod == 0) ? nr_chnls : nr_chnls + 1;
dev_map = kmalloc(sizeof(struct nvm_dev_map), GFP_KERNEL);
@@ -226,6 +226,24 @@ static const struct block_device_operations nvm_fops = {
.owner = THIS_MODULE,
};
+static struct nvm_tgt_type *nvm_find_target_type(const char *name, int lock)
+{
+ struct nvm_tgt_type *tmp, *tt = NULL;
+
+ if (lock)
+ down_write(&nvm_tgtt_lock);
+
+ list_for_each_entry(tmp, &nvm_tgt_types, list)
+ if (!strcmp(name, tmp->name)) {
+ tt = tmp;
+ break;
+ }
+
+ if (lock)
+ up_write(&nvm_tgtt_lock);
+ return tt;
+}
+
static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
{
struct nvm_ioctl_create_simple *s = &create->conf.s;
@@ -316,6 +334,8 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
list_add_tail(&t->list, &dev->targets);
mutex_unlock(&dev->mlock);
+ __module_get(tt->owner);
+
return 0;
err_sysfs:
if (tt->exit)
@@ -351,6 +371,7 @@ static void __nvm_remove_target(struct nvm_target *t)
nvm_remove_tgt_dev(t->dev, 1);
put_disk(tdisk);
+ module_put(t->type->owner);
list_del(&t->list);
kfree(t);
@@ -532,25 +553,6 @@ void nvm_part_to_tgt(struct nvm_dev *dev, sector_t *entries,
}
EXPORT_SYMBOL(nvm_part_to_tgt);
-struct nvm_tgt_type *nvm_find_target_type(const char *name, int lock)
-{
- struct nvm_tgt_type *tmp, *tt = NULL;
-
- if (lock)
- down_write(&nvm_tgtt_lock);
-
- list_for_each_entry(tmp, &nvm_tgt_types, list)
- if (!strcmp(name, tmp->name)) {
- tt = tmp;
- break;
- }
-
- if (lock)
- up_write(&nvm_tgtt_lock);
- return tt;
-}
-EXPORT_SYMBOL(nvm_find_target_type);
-
int nvm_register_tgt_type(struct nvm_tgt_type *tt)
{
int ret = 0;
@@ -571,9 +573,9 @@ void nvm_unregister_tgt_type(struct nvm_tgt_type *tt)
if (!tt)
return;
- down_write(&nvm_lock);
+ down_write(&nvm_tgtt_lock);
list_del(&tt->list);
- up_write(&nvm_lock);
+ up_write(&nvm_tgtt_lock);
}
EXPORT_SYMBOL(nvm_unregister_tgt_type);
@@ -602,6 +604,52 @@ static struct nvm_dev *nvm_find_nvm_dev(const char *name)
return NULL;
}
+static int nvm_set_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd,
+ const struct ppa_addr *ppas, int nr_ppas)
+{
+ struct nvm_dev *dev = tgt_dev->parent;
+ struct nvm_geo *geo = &tgt_dev->geo;
+ int i, plane_cnt, pl_idx;
+ struct ppa_addr ppa;
+
+ if (geo->plane_mode == NVM_PLANE_SINGLE && nr_ppas == 1) {
+ rqd->nr_ppas = nr_ppas;
+ rqd->ppa_addr = ppas[0];
+
+ return 0;
+ }
+
+ rqd->nr_ppas = nr_ppas;
+ rqd->ppa_list = nvm_dev_dma_alloc(dev, GFP_KERNEL, &rqd->dma_ppa_list);
+ if (!rqd->ppa_list) {
+ pr_err("nvm: failed to allocate dma memory\n");
+ return -ENOMEM;
+ }
+
+ plane_cnt = geo->plane_mode;
+ rqd->nr_ppas *= plane_cnt;
+
+ for (i = 0; i < nr_ppas; i++) {
+ for (pl_idx = 0; pl_idx < plane_cnt; pl_idx++) {
+ ppa = ppas[i];
+ ppa.g.pl = pl_idx;
+ rqd->ppa_list[(pl_idx * nr_ppas) + i] = ppa;
+ }
+ }
+
+ return 0;
+}
+
+static void nvm_free_rqd_ppalist(struct nvm_tgt_dev *tgt_dev,
+ struct nvm_rq *rqd)
+{
+ if (!rqd->ppa_list)
+ return;
+
+ nvm_dev_dma_free(tgt_dev->parent, rqd->ppa_list, rqd->dma_ppa_list);
+}
+
+
int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas,
int nr_ppas, int type)
{
@@ -616,7 +664,7 @@ int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas,
memset(&rqd, 0, sizeof(struct nvm_rq));
- nvm_set_rqd_ppalist(tgt_dev, &rqd, ppas, nr_ppas, 1);
+ nvm_set_rqd_ppalist(tgt_dev, &rqd, ppas, nr_ppas);
nvm_rq_tgt_to_dev(tgt_dev, &rqd);
ret = dev->ops->set_bb_tbl(dev, &rqd.ppa_addr, rqd.nr_ppas, type);
@@ -658,12 +706,25 @@ int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
}
EXPORT_SYMBOL(nvm_submit_io);
-static void nvm_end_io_sync(struct nvm_rq *rqd)
+int nvm_submit_io_sync(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
{
- struct completion *waiting = rqd->private;
+ struct nvm_dev *dev = tgt_dev->parent;
+ int ret;
- complete(waiting);
+ if (!dev->ops->submit_io_sync)
+ return -ENODEV;
+
+ nvm_rq_tgt_to_dev(tgt_dev, rqd);
+
+ rqd->dev = tgt_dev;
+
+ /* In case of error, fail with right address format */
+ ret = dev->ops->submit_io_sync(dev, rqd);
+ nvm_rq_dev_to_tgt(tgt_dev, rqd);
+
+ return ret;
}
+EXPORT_SYMBOL(nvm_submit_io_sync);
int nvm_erase_sync(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas,
int nr_ppas)
@@ -671,25 +732,21 @@ int nvm_erase_sync(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas,
struct nvm_geo *geo = &tgt_dev->geo;
struct nvm_rq rqd;
int ret;
- DECLARE_COMPLETION_ONSTACK(wait);
memset(&rqd, 0, sizeof(struct nvm_rq));
rqd.opcode = NVM_OP_ERASE;
- rqd.end_io = nvm_end_io_sync;
- rqd.private = &wait;
rqd.flags = geo->plane_mode >> 1;
- ret = nvm_set_rqd_ppalist(tgt_dev, &rqd, ppas, nr_ppas, 1);
+ ret = nvm_set_rqd_ppalist(tgt_dev, &rqd, ppas, nr_ppas);
if (ret)
return ret;
- ret = nvm_submit_io(tgt_dev, &rqd);
+ ret = nvm_submit_io_sync(tgt_dev, &rqd);
if (ret) {
pr_err("rrpr: erase I/O submission failed: %d\n", ret);
goto free_ppa_list;
}
- wait_for_completion_io(&wait);
free_ppa_list:
nvm_free_rqd_ppalist(tgt_dev, &rqd);
@@ -775,57 +832,6 @@ void nvm_put_area(struct nvm_tgt_dev *tgt_dev, sector_t begin)
}
EXPORT_SYMBOL(nvm_put_area);
-int nvm_set_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd,
- const struct ppa_addr *ppas, int nr_ppas, int vblk)
-{
- struct nvm_dev *dev = tgt_dev->parent;
- struct nvm_geo *geo = &tgt_dev->geo;
- int i, plane_cnt, pl_idx;
- struct ppa_addr ppa;
-
- if ((!vblk || geo->plane_mode == NVM_PLANE_SINGLE) && nr_ppas == 1) {
- rqd->nr_ppas = nr_ppas;
- rqd->ppa_addr = ppas[0];
-
- return 0;
- }
-
- rqd->nr_ppas = nr_ppas;
- rqd->ppa_list = nvm_dev_dma_alloc(dev, GFP_KERNEL, &rqd->dma_ppa_list);
- if (!rqd->ppa_list) {
- pr_err("nvm: failed to allocate dma memory\n");
- return -ENOMEM;
- }
-
- if (!vblk) {
- for (i = 0; i < nr_ppas; i++)
- rqd->ppa_list[i] = ppas[i];
- } else {
- plane_cnt = geo->plane_mode;
- rqd->nr_ppas *= plane_cnt;
-
- for (i = 0; i < nr_ppas; i++) {
- for (pl_idx = 0; pl_idx < plane_cnt; pl_idx++) {
- ppa = ppas[i];
- ppa.g.pl = pl_idx;
- rqd->ppa_list[(pl_idx * nr_ppas) + i] = ppa;
- }
- }
- }
-
- return 0;
-}
-EXPORT_SYMBOL(nvm_set_rqd_ppalist);
-
-void nvm_free_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
-{
- if (!rqd->ppa_list)
- return;
-
- nvm_dev_dma_free(tgt_dev->parent, rqd->ppa_list, rqd->dma_ppa_list);
-}
-EXPORT_SYMBOL(nvm_free_rqd_ppalist);
-
void nvm_end_io(struct nvm_rq *rqd)
{
struct nvm_tgt_dev *tgt_dev = rqd->dev;
@@ -1177,7 +1183,7 @@ static long nvm_ioctl_info(struct file *file, void __user *arg)
info->version[1] = NVM_VERSION_MINOR;
info->version[2] = NVM_VERSION_PATCH;
- down_write(&nvm_lock);
+ down_write(&nvm_tgtt_lock);
list_for_each_entry(tt, &nvm_tgt_types, list) {
struct nvm_ioctl_info_tgt *tgt = &info->tgts[tgt_iter];
@@ -1190,7 +1196,7 @@ static long nvm_ioctl_info(struct file *file, void __user *arg)
}
info->tgtsize = tgt_iter;
- up_write(&nvm_lock);
+ up_write(&nvm_tgtt_lock);
if (copy_to_user(arg, info, sizeof(struct nvm_ioctl_info))) {
kfree(info);
diff --git a/drivers/lightnvm/pblk-cache.c b/drivers/lightnvm/pblk-cache.c
index 024a8fc93069..0d227ef7d1b9 100644
--- a/drivers/lightnvm/pblk-cache.c
+++ b/drivers/lightnvm/pblk-cache.c
@@ -43,8 +43,10 @@ retry:
if (unlikely(!bio_has_data(bio)))
goto out;
- w_ctx.flags = flags;
pblk_ppa_set_empty(&w_ctx.ppa);
+ w_ctx.flags = flags;
+ if (bio->bi_opf & REQ_PREFLUSH)
+ w_ctx.flags |= PBLK_FLUSH_ENTRY;
for (i = 0; i < nr_entries; i++) {
void *data = bio_data(bio);
@@ -73,12 +75,11 @@ out:
* On GC the incoming lbas are not necessarily sequential. Also, some of the
* lbas might not be valid entries, which are marked as empty by the GC thread
*/
-int pblk_write_gc_to_cache(struct pblk *pblk, void *data, u64 *lba_list,
- unsigned int nr_entries, unsigned int nr_rec_entries,
- struct pblk_line *gc_line, unsigned long flags)
+int pblk_write_gc_to_cache(struct pblk *pblk, struct pblk_gc_rq *gc_rq)
{
struct pblk_w_ctx w_ctx;
unsigned int bpos, pos;
+ void *data = gc_rq->data;
int i, valid_entries;
/* Update the write buffer head (mem) with the entries that we can
@@ -86,28 +87,29 @@ int pblk_write_gc_to_cache(struct pblk *pblk, void *data, u64 *lba_list,
* rollback from here on.
*/
retry:
- if (!pblk_rb_may_write_gc(&pblk->rwb, nr_rec_entries, &bpos)) {
+ if (!pblk_rb_may_write_gc(&pblk->rwb, gc_rq->secs_to_gc, &bpos)) {
io_schedule();
goto retry;
}
- w_ctx.flags = flags;
+ w_ctx.flags = PBLK_IOTYPE_GC;
pblk_ppa_set_empty(&w_ctx.ppa);
- for (i = 0, valid_entries = 0; i < nr_entries; i++) {
- if (lba_list[i] == ADDR_EMPTY)
+ for (i = 0, valid_entries = 0; i < gc_rq->nr_secs; i++) {
+ if (gc_rq->lba_list[i] == ADDR_EMPTY)
continue;
- w_ctx.lba = lba_list[i];
+ w_ctx.lba = gc_rq->lba_list[i];
pos = pblk_rb_wrap_pos(&pblk->rwb, bpos + valid_entries);
- pblk_rb_write_entry_gc(&pblk->rwb, data, w_ctx, gc_line, pos);
+ pblk_rb_write_entry_gc(&pblk->rwb, data, w_ctx, gc_rq->line,
+ gc_rq->paddr_list[i], pos);
data += PBLK_EXPOSED_PAGE_SIZE;
valid_entries++;
}
- WARN_ONCE(nr_rec_entries != valid_entries,
+ WARN_ONCE(gc_rq->secs_to_gc != valid_entries,
"pblk: inconsistent GC write\n");
#ifdef CONFIG_NVM_DEBUG
diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
index 81501644fb15..ce90213a42fa 100644
--- a/drivers/lightnvm/pblk-core.c
+++ b/drivers/lightnvm/pblk-core.c
@@ -18,6 +18,31 @@
#include "pblk.h"
+static void pblk_line_mark_bb(struct work_struct *work)
+{
+ struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
+ ws);
+ struct pblk *pblk = line_ws->pblk;
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct ppa_addr *ppa = line_ws->priv;
+ int ret;
+
+ ret = nvm_set_tgt_bb_tbl(dev, ppa, 1, NVM_BLK_T_GRWN_BAD);
+ if (ret) {
+ struct pblk_line *line;
+ int pos;
+
+ line = &pblk->lines[pblk_dev_ppa_to_line(*ppa)];
+ pos = pblk_dev_ppa_to_pos(&dev->geo, *ppa);
+
+ pr_err("pblk: failed to mark bb, line:%d, pos:%d\n",
+ line->id, pos);
+ }
+
+ kfree(ppa);
+ mempool_free(line_ws, pblk->gen_ws_pool);
+}
+
static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line,
struct ppa_addr *ppa)
{
@@ -33,7 +58,8 @@ static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line,
pr_err("pblk: attempted to erase bb: line:%d, pos:%d\n",
line->id, pos);
- pblk_line_run_ws(pblk, NULL, ppa, pblk_line_mark_bb, pblk->bb_wq);
+ pblk_gen_run_ws(pblk, NULL, ppa, pblk_line_mark_bb,
+ GFP_ATOMIC, pblk->bb_wq);
}
static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd)
@@ -63,7 +89,7 @@ static void pblk_end_io_erase(struct nvm_rq *rqd)
struct pblk *pblk = rqd->private;
__pblk_end_io_erase(pblk, rqd);
- mempool_free(rqd, pblk->g_rq_pool);
+ mempool_free(rqd, pblk->e_rq_pool);
}
void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
@@ -77,11 +103,7 @@ void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
* that newer updates are not overwritten.
*/
spin_lock(&line->lock);
- if (line->state == PBLK_LINESTATE_GC ||
- line->state == PBLK_LINESTATE_FREE) {
- spin_unlock(&line->lock);
- return;
- }
+ WARN_ON(line->state == PBLK_LINESTATE_FREE);
if (test_and_set_bit(paddr, line->invalid_bitmap)) {
WARN_ONCE(1, "pblk: double invalidate\n");
@@ -98,8 +120,7 @@ void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
spin_lock(&l_mg->gc_lock);
spin_lock(&line->lock);
/* Prevent moving a line that has just been chosen for GC */
- if (line->state == PBLK_LINESTATE_GC ||
- line->state == PBLK_LINESTATE_FREE) {
+ if (line->state == PBLK_LINESTATE_GC) {
spin_unlock(&line->lock);
spin_unlock(&l_mg->gc_lock);
return;
@@ -150,17 +171,25 @@ static void pblk_invalidate_range(struct pblk *pblk, sector_t slba,
spin_unlock(&pblk->trans_lock);
}
-struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int rw)
+/* Caller must guarantee that the request is a valid type */
+struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int type)
{
mempool_t *pool;
struct nvm_rq *rqd;
int rq_size;
- if (rw == WRITE) {
+ switch (type) {
+ case PBLK_WRITE:
+ case PBLK_WRITE_INT:
pool = pblk->w_rq_pool;
rq_size = pblk_w_rq_size;
- } else {
- pool = pblk->g_rq_pool;
+ break;
+ case PBLK_READ:
+ pool = pblk->r_rq_pool;
+ rq_size = pblk_g_rq_size;
+ break;
+ default:
+ pool = pblk->e_rq_pool;
rq_size = pblk_g_rq_size;
}
@@ -170,15 +199,30 @@ struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int rw)
return rqd;
}
-void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int rw)
+/* Typically used on completion path. Cannot guarantee request consistency */
+void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int type)
{
+ struct nvm_tgt_dev *dev = pblk->dev;
mempool_t *pool;
- if (rw == WRITE)
+ switch (type) {
+ case PBLK_WRITE:
+ kfree(((struct pblk_c_ctx *)nvm_rq_to_pdu(rqd))->lun_bitmap);
+ case PBLK_WRITE_INT:
pool = pblk->w_rq_pool;
- else
- pool = pblk->g_rq_pool;
+ break;
+ case PBLK_READ:
+ pool = pblk->r_rq_pool;
+ break;
+ case PBLK_ERASE:
+ pool = pblk->e_rq_pool;
+ break;
+ default:
+ pr_err("pblk: trying to free unknown rqd type\n");
+ return;
+ }
+ nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
mempool_free(rqd, pool);
}
@@ -190,10 +234,9 @@ void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off,
WARN_ON(off + nr_pages != bio->bi_vcnt);
- bio_advance(bio, off * PBLK_EXPOSED_PAGE_SIZE);
for (i = off; i < nr_pages + off; i++) {
bv = bio->bi_io_vec[i];
- mempool_free(bv.bv_page, pblk->page_pool);
+ mempool_free(bv.bv_page, pblk->page_bio_pool);
}
}
@@ -205,14 +248,12 @@ int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags,
int i, ret;
for (i = 0; i < nr_pages; i++) {
- page = mempool_alloc(pblk->page_pool, flags);
- if (!page)
- goto err;
+ page = mempool_alloc(pblk->page_bio_pool, flags);
ret = bio_add_pc_page(q, bio, page, PBLK_EXPOSED_PAGE_SIZE, 0);
if (ret != PBLK_EXPOSED_PAGE_SIZE) {
pr_err("pblk: could not add page to bio\n");
- mempool_free(page, pblk->page_pool);
+ mempool_free(page, pblk->page_bio_pool);
goto err;
}
}
@@ -245,13 +286,6 @@ void pblk_write_should_kick(struct pblk *pblk)
pblk_write_kick(pblk);
}
-void pblk_end_bio_sync(struct bio *bio)
-{
- struct completion *waiting = bio->bi_private;
-
- complete(waiting);
-}
-
void pblk_end_io_sync(struct nvm_rq *rqd)
{
struct completion *waiting = rqd->private;
@@ -259,7 +293,7 @@ void pblk_end_io_sync(struct nvm_rq *rqd)
complete(waiting);
}
-void pblk_wait_for_meta(struct pblk *pblk)
+static void pblk_wait_for_meta(struct pblk *pblk)
{
do {
if (!atomic_read(&pblk->inflight_io))
@@ -336,17 +370,6 @@ void pblk_discard(struct pblk *pblk, struct bio *bio)
pblk_invalidate_range(pblk, slba, nr_secs);
}
-struct ppa_addr pblk_get_lba_map(struct pblk *pblk, sector_t lba)
-{
- struct ppa_addr ppa;
-
- spin_lock(&pblk->trans_lock);
- ppa = pblk_trans_map_get(pblk, lba);
- spin_unlock(&pblk->trans_lock);
-
- return ppa;
-}
-
void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd)
{
atomic_long_inc(&pblk->write_failed);
@@ -389,39 +412,38 @@ int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd)
struct nvm_tgt_dev *dev = pblk->dev;
#ifdef CONFIG_NVM_DEBUG
- struct ppa_addr *ppa_list;
+ int ret;
- ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr;
- if (pblk_boundary_ppa_checks(dev, ppa_list, rqd->nr_ppas)) {
- WARN_ON(1);
- return -EINVAL;
- }
+ ret = pblk_check_io(pblk, rqd);
+ if (ret)
+ return ret;
+#endif
- if (rqd->opcode == NVM_OP_PWRITE) {
- struct pblk_line *line;
- struct ppa_addr ppa;
- int i;
+ atomic_inc(&pblk->inflight_io);
- for (i = 0; i < rqd->nr_ppas; i++) {
- ppa = ppa_list[i];
- line = &pblk->lines[pblk_dev_ppa_to_line(ppa)];
+ return nvm_submit_io(dev, rqd);
+}
- spin_lock(&line->lock);
- if (line->state != PBLK_LINESTATE_OPEN) {
- pr_err("pblk: bad ppa: line:%d,state:%d\n",
- line->id, line->state);
- WARN_ON(1);
- spin_unlock(&line->lock);
- return -EINVAL;
- }
- spin_unlock(&line->lock);
- }
- }
+int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd)
+{
+ struct nvm_tgt_dev *dev = pblk->dev;
+
+#ifdef CONFIG_NVM_DEBUG
+ int ret;
+
+ ret = pblk_check_io(pblk, rqd);
+ if (ret)
+ return ret;
#endif
atomic_inc(&pblk->inflight_io);
- return nvm_submit_io(dev, rqd);
+ return nvm_submit_io_sync(dev, rqd);
+}
+
+static void pblk_bio_map_addr_endio(struct bio *bio)
+{
+ bio_put(bio);
}
struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
@@ -460,6 +482,8 @@ struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
kaddr += PAGE_SIZE;
}
+
+ bio->bi_end_io = pblk_bio_map_addr_endio;
out:
return bio;
}
@@ -486,12 +510,14 @@ void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
u64 addr;
int i;
+ spin_lock(&line->lock);
addr = find_next_zero_bit(line->map_bitmap,
pblk->lm.sec_per_line, line->cur_sec);
line->cur_sec = addr - nr_secs;
for (i = 0; i < nr_secs; i++, line->cur_sec--)
WARN_ON(!test_and_clear_bit(line->cur_sec, line->map_bitmap));
+ spin_unlock(&line->lock);
}
u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
@@ -565,12 +591,11 @@ static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line,
int cmd_op, bio_op;
int i, j;
int ret;
- DECLARE_COMPLETION_ONSTACK(wait);
- if (dir == WRITE) {
+ if (dir == PBLK_WRITE) {
bio_op = REQ_OP_WRITE;
cmd_op = NVM_OP_PWRITE;
- } else if (dir == READ) {
+ } else if (dir == PBLK_READ) {
bio_op = REQ_OP_READ;
cmd_op = NVM_OP_PREAD;
} else
@@ -607,13 +632,11 @@ next_rq:
rqd.dma_ppa_list = dma_ppa_list;
rqd.opcode = cmd_op;
rqd.nr_ppas = rq_ppas;
- rqd.end_io = pblk_end_io_sync;
- rqd.private = &wait;
- if (dir == WRITE) {
+ if (dir == PBLK_WRITE) {
struct pblk_sec_meta *meta_list = rqd.meta_list;
- rqd.flags = pblk_set_progr_mode(pblk, WRITE);
+ rqd.flags = pblk_set_progr_mode(pblk, PBLK_WRITE);
for (i = 0; i < rqd.nr_ppas; ) {
spin_lock(&line->lock);
paddr = __pblk_alloc_page(pblk, line, min);
@@ -662,25 +685,17 @@ next_rq:
}
}
- ret = pblk_submit_io(pblk, &rqd);
+ ret = pblk_submit_io_sync(pblk, &rqd);
if (ret) {
pr_err("pblk: emeta I/O submission failed: %d\n", ret);
bio_put(bio);
goto free_rqd_dma;
}
- if (!wait_for_completion_io_timeout(&wait,
- msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
- pr_err("pblk: emeta I/O timed out\n");
- }
atomic_dec(&pblk->inflight_io);
- reinit_completion(&wait);
-
- if (likely(pblk->l_mg.emeta_alloc_type == PBLK_VMALLOC_META))
- bio_put(bio);
if (rqd.error) {
- if (dir == WRITE)
+ if (dir == PBLK_WRITE)
pblk_log_write_err(pblk, &rqd);
else
pblk_log_read_err(pblk, &rqd);
@@ -721,14 +736,13 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
int i, ret;
int cmd_op, bio_op;
int flags;
- DECLARE_COMPLETION_ONSTACK(wait);
- if (dir == WRITE) {
+ if (dir == PBLK_WRITE) {
bio_op = REQ_OP_WRITE;
cmd_op = NVM_OP_PWRITE;
- flags = pblk_set_progr_mode(pblk, WRITE);
+ flags = pblk_set_progr_mode(pblk, PBLK_WRITE);
lba_list = emeta_to_lbas(pblk, line->emeta->buf);
- } else if (dir == READ) {
+ } else if (dir == PBLK_READ) {
bio_op = REQ_OP_READ;
cmd_op = NVM_OP_PREAD;
flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
@@ -758,15 +772,13 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
rqd.opcode = cmd_op;
rqd.flags = flags;
rqd.nr_ppas = lm->smeta_sec;
- rqd.end_io = pblk_end_io_sync;
- rqd.private = &wait;
for (i = 0; i < lm->smeta_sec; i++, paddr++) {
struct pblk_sec_meta *meta_list = rqd.meta_list;
rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
- if (dir == WRITE) {
+ if (dir == PBLK_WRITE) {
__le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
meta_list[i].lba = lba_list[paddr] = addr_empty;
@@ -778,21 +790,17 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
* the write thread is the only one sending write and erase commands,
* there is no need to take the LUN semaphore.
*/
- ret = pblk_submit_io(pblk, &rqd);
+ ret = pblk_submit_io_sync(pblk, &rqd);
if (ret) {
pr_err("pblk: smeta I/O submission failed: %d\n", ret);
bio_put(bio);
goto free_ppa_list;
}
- if (!wait_for_completion_io_timeout(&wait,
- msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
- pr_err("pblk: smeta I/O timed out\n");
- }
atomic_dec(&pblk->inflight_io);
if (rqd.error) {
- if (dir == WRITE)
+ if (dir == PBLK_WRITE)
pblk_log_write_err(pblk, &rqd);
else
pblk_log_read_err(pblk, &rqd);
@@ -808,14 +816,14 @@ int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line)
{
u64 bpaddr = pblk_line_smeta_start(pblk, line);
- return pblk_line_submit_smeta_io(pblk, line, bpaddr, READ);
+ return pblk_line_submit_smeta_io(pblk, line, bpaddr, PBLK_READ);
}
int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line,
void *emeta_buf)
{
return pblk_line_submit_emeta_io(pblk, line, emeta_buf,
- line->emeta_ssec, READ);
+ line->emeta_ssec, PBLK_READ);
}
static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd,
@@ -824,7 +832,7 @@ static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd,
rqd->opcode = NVM_OP_ERASE;
rqd->ppa_addr = ppa;
rqd->nr_ppas = 1;
- rqd->flags = pblk_set_progr_mode(pblk, ERASE);
+ rqd->flags = pblk_set_progr_mode(pblk, PBLK_ERASE);
rqd->bio = NULL;
}
@@ -832,19 +840,15 @@ static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa)
{
struct nvm_rq rqd;
int ret = 0;
- DECLARE_COMPLETION_ONSTACK(wait);
memset(&rqd, 0, sizeof(struct nvm_rq));
pblk_setup_e_rq(pblk, &rqd, ppa);
- rqd.end_io = pblk_end_io_sync;
- rqd.private = &wait;
-
/* The write thread schedules erases so that it minimizes disturbances
* with writes. Thus, there is no need to take the LUN semaphore.
*/
- ret = pblk_submit_io(pblk, &rqd);
+ ret = pblk_submit_io_sync(pblk, &rqd);
if (ret) {
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
@@ -857,11 +861,6 @@ static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa)
goto out;
}
- if (!wait_for_completion_io_timeout(&wait,
- msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
- pr_err("pblk: sync erase timed out\n");
- }
-
out:
rqd.private = pblk;
__pblk_end_io_erase(pblk, &rqd);
@@ -976,7 +975,7 @@ static int pblk_line_init_metadata(struct pblk *pblk, struct pblk_line *line,
memcpy(smeta_buf->header.uuid, pblk->instance_uuid, 16);
smeta_buf->header.id = cpu_to_le32(line->id);
smeta_buf->header.type = cpu_to_le16(line->type);
- smeta_buf->header.version = cpu_to_le16(1);
+ smeta_buf->header.version = SMETA_VERSION;
/* Start metadata */
smeta_buf->seq_nr = cpu_to_le64(line->seq_nr);
@@ -1046,7 +1045,7 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
line->smeta_ssec = off;
line->cur_sec = off + lm->smeta_sec;
- if (init && pblk_line_submit_smeta_io(pblk, line, off, WRITE)) {
+ if (init && pblk_line_submit_smeta_io(pblk, line, off, PBLK_WRITE)) {
pr_debug("pblk: line smeta I/O failed. Retry\n");
return 1;
}
@@ -1056,7 +1055,6 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
/* Mark emeta metadata sectors as bad sectors. We need to consider bad
* blocks to make sure that there are enough sectors to store emeta
*/
- bit = lm->sec_per_line;
off = lm->sec_per_line - lm->emeta_sec[0];
bitmap_set(line->invalid_bitmap, off, lm->emeta_sec[0]);
while (nr_bb) {
@@ -1093,25 +1091,21 @@ static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line)
struct pblk_line_meta *lm = &pblk->lm;
int blk_in_line = atomic_read(&line->blk_in_line);
- line->map_bitmap = mempool_alloc(pblk->line_meta_pool, GFP_ATOMIC);
+ line->map_bitmap = kzalloc(lm->sec_bitmap_len, GFP_ATOMIC);
if (!line->map_bitmap)
return -ENOMEM;
- memset(line->map_bitmap, 0, lm->sec_bitmap_len);
- /* invalid_bitmap is special since it is used when line is closed. No
- * need to zeroized; it will be initialized using bb info form
- * map_bitmap
- */
- line->invalid_bitmap = mempool_alloc(pblk->line_meta_pool, GFP_ATOMIC);
+ /* will be initialized using bb info from map_bitmap */
+ line->invalid_bitmap = kmalloc(lm->sec_bitmap_len, GFP_ATOMIC);
if (!line->invalid_bitmap) {
- mempool_free(line->map_bitmap, pblk->line_meta_pool);
+ kfree(line->map_bitmap);
return -ENOMEM;
}
spin_lock(&line->lock);
if (line->state != PBLK_LINESTATE_FREE) {
- mempool_free(line->invalid_bitmap, pblk->line_meta_pool);
- mempool_free(line->map_bitmap, pblk->line_meta_pool);
+ kfree(line->map_bitmap);
+ kfree(line->invalid_bitmap);
spin_unlock(&line->lock);
WARN(1, "pblk: corrupted line %d, state %d\n",
line->id, line->state);
@@ -1163,7 +1157,7 @@ int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line)
void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line)
{
- mempool_free(line->map_bitmap, pblk->line_meta_pool);
+ kfree(line->map_bitmap);
line->map_bitmap = NULL;
line->smeta = NULL;
line->emeta = NULL;
@@ -1328,6 +1322,41 @@ static void pblk_stop_writes(struct pblk *pblk, struct pblk_line *line)
pblk->state = PBLK_STATE_STOPPING;
}
+static void pblk_line_close