summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-05-16 19:08:15 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2019-05-16 19:08:15 -0700
commit1718de78e6235c04ecb7f87a6875fdf90aafe382 (patch)
treea8b5c2f89bd2c71bd5b1dc47a0fa46446ba2cd0f
parent815d469d8c9a3360ee0a8b7857dd95352a6c7bde (diff)
parent7a102d9044e720ac887c0cd82b6d5cad236f6d71 (diff)
Merge tag 'for-5.2/block-post-20190516' of git://git.kernel.dk/linux-block
Pull more block updates from Jens Axboe: "This is mainly some late lightnvm changes that came in just before the merge window, as well as fixes that have been queued up since the initial pull request was frozen. This contains: - lightnvm changes, fixing race conditions, improving memory utilization, and improving pblk compatability (Chansol, Igor, Marcin) - NVMe pull request with minor fixes all over the map (via Christoph) - remove redundant error print in sata_rcar (Geert) - struct_size() cleanup (Jackie) - dasd CONFIG_LBADF warning fix (Ming) - brd cond_resched() improvement (Mikulas)" * tag 'for-5.2/block-post-20190516' of git://git.kernel.dk/linux-block: (41 commits) block/bio-integrity: use struct_size() in kmalloc() nvme: validate cntlid during controller initialisation nvme: change locking for the per-subsystem controller list nvme: trace all async notice events nvme: fix typos in nvme status code values nvme-fabrics: remove unused argument nvme-multipath: avoid crash on invalid subsystem cntlid enumeration nvme-fc: use separate work queue to avoid warning nvme-rdma: remove redundant reference between ib_device and tagset nvme-pci: mark expected switch fall-through nvme-pci: add known admin effects to augument admin effects log page nvme-pci: init shadow doorbell after each reset brd: add cond_resched to brd_free_pages sata_rcar: Remove ata_host_alloc() error printing s390/dasd: fix build warning in dasd_eckd_build_cp_raw lightnvm: pblk: use nvm_rq_to_ppa_list() lightnvm: pblk: simplify partial read path lightnvm: do not remove instance under global lock lightnvm: track inflight target creations lightnvm: pblk: recover only written metadata ...
-rw-r--r--block/bio-integrity.c3
-rw-r--r--drivers/ata/sata_rcar.c1
-rw-r--r--drivers/block/brd.c6
-rw-r--r--drivers/lightnvm/core.c82
-rw-r--r--drivers/lightnvm/pblk-cache.c8
-rw-r--r--drivers/lightnvm/pblk-core.c65
-rw-r--r--drivers/lightnvm/pblk-gc.c52
-rw-r--r--drivers/lightnvm/pblk-init.c65
-rw-r--r--drivers/lightnvm/pblk-map.c1
-rw-r--r--drivers/lightnvm/pblk-rb.c13
-rw-r--r--drivers/lightnvm/pblk-read.c394
-rw-r--r--drivers/lightnvm/pblk-recovery.c74
-rw-r--r--drivers/lightnvm/pblk-write.c1
-rw-r--r--drivers/lightnvm/pblk.h28
-rw-r--r--drivers/nvme/host/core.c79
-rw-r--r--drivers/nvme/host/fabrics.c4
-rw-r--r--drivers/nvme/host/fc.c14
-rw-r--r--drivers/nvme/host/lightnvm.c1
-rw-r--r--drivers/nvme/host/multipath.c2
-rw-r--r--drivers/nvme/host/pci.c4
-rw-r--r--drivers/nvme/host/rdma.c34
-rw-r--r--drivers/nvme/host/trace.h1
-rw-r--r--drivers/s390/block/dasd_eckd.c2
-rw-r--r--include/linux/lightnvm.h2
-rw-r--r--include/linux/nvme.h4
25 files changed, 398 insertions, 542 deletions
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 42536674020a..4db620849515 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -43,8 +43,7 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
unsigned inline_vecs;
if (!bs || !mempool_initialized(&bs->bio_integrity_pool)) {
- bip = kmalloc(sizeof(struct bio_integrity_payload) +
- sizeof(struct bio_vec) * nr_vecs, gfp_mask);
+ bip = kmalloc(struct_size(bip, bip_inline_vecs, nr_vecs), gfp_mask);
inline_vecs = nr_vecs;
} else {
bip = mempool_alloc(&bs->bio_integrity_pool, gfp_mask);
diff --git a/drivers/ata/sata_rcar.c b/drivers/ata/sata_rcar.c
index 59b2317acea9..3495e1733a8e 100644
--- a/drivers/ata/sata_rcar.c
+++ b/drivers/ata/sata_rcar.c
@@ -909,7 +909,6 @@ static int sata_rcar_probe(struct platform_device *pdev)
host = ata_host_alloc(dev, 1);
if (!host) {
- dev_err(dev, "ata_host_alloc failed\n");
ret = -ENOMEM;
goto err_pm_put;
}
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 17defbf4f332..2da615b45b31 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -153,6 +153,12 @@ static void brd_free_pages(struct brd_device *brd)
pos++;
/*
+ * It takes 3.4 seconds to remove 80GiB ramdisk.
+ * So, we need cond_resched to avoid stalling the CPU.
+ */
+ cond_resched();
+
+ /*
* This assumes radix_tree_gang_lookup always returns as
* many pages as possible. If the radix-tree code changes,
* so will this have to.
diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 5f82036fe322..0df7454832ef 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -45,6 +45,8 @@ struct nvm_dev_map {
int num_ch;
};
+static void nvm_free(struct kref *ref);
+
static struct nvm_target *nvm_find_target(struct nvm_dev *dev, const char *name)
{
struct nvm_target *tgt;
@@ -325,6 +327,7 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
struct nvm_target *t;
struct nvm_tgt_dev *tgt_dev;
void *targetdata;
+ unsigned int mdts;
int ret;
switch (create->conf.type) {
@@ -412,8 +415,12 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
tdisk->private_data = targetdata;
tqueue->queuedata = targetdata;
- blk_queue_max_hw_sectors(tqueue,
- (dev->geo.csecs >> 9) * NVM_MAX_VLBA);
+ mdts = (dev->geo.csecs >> 9) * NVM_MAX_VLBA;
+ if (dev->geo.mdts) {
+ mdts = min_t(u32, dev->geo.mdts,
+ (dev->geo.csecs >> 9) * NVM_MAX_VLBA);
+ }
+ blk_queue_max_hw_sectors(tqueue, mdts);
set_capacity(tdisk, tt->capacity(targetdata));
add_disk(tdisk);
@@ -476,7 +483,6 @@ static void __nvm_remove_target(struct nvm_target *t, bool graceful)
/**
* nvm_remove_tgt - Removes a target from the media manager
- * @dev: device
* @remove: ioctl structure with target name to remove.
*
* Returns:
@@ -484,18 +490,28 @@ static void __nvm_remove_target(struct nvm_target *t, bool graceful)
* 1: on not found
* <0: on error
*/
-static int nvm_remove_tgt(struct nvm_dev *dev, struct nvm_ioctl_remove *remove)
+static int nvm_remove_tgt(struct nvm_ioctl_remove *remove)
{
struct nvm_target *t;
+ struct nvm_dev *dev;
- mutex_lock(&dev->mlock);
- t = nvm_find_target(dev, remove->tgtname);
- if (!t) {
+ down_read(&nvm_lock);
+ list_for_each_entry(dev, &nvm_devices, devices) {
+ mutex_lock(&dev->mlock);
+ t = nvm_find_target(dev, remove->tgtname);
+ if (t) {
+ mutex_unlock(&dev->mlock);
+ break;
+ }
mutex_unlock(&dev->mlock);
- return 1;
}
+ up_read(&nvm_lock);
+
+ if (!t)
+ return 1;
+
__nvm_remove_target(t, true);
- mutex_unlock(&dev->mlock);
+ kref_put(&dev->ref, nvm_free);
return 0;
}
@@ -1089,15 +1105,16 @@ err_fmtype:
return ret;
}
-static void nvm_free(struct nvm_dev *dev)
+static void nvm_free(struct kref *ref)
{
- if (!dev)
- return;
+ struct nvm_dev *dev = container_of(ref, struct nvm_dev, ref);
if (dev->dma_pool)
dev->ops->destroy_dma_pool(dev->dma_pool);
- nvm_unregister_map(dev);
+ if (dev->rmap)
+ nvm_unregister_map(dev);
+
kfree(dev->lun_map);
kfree(dev);
}
@@ -1134,7 +1151,13 @@ err:
struct nvm_dev *nvm_alloc_dev(int node)
{
- return kzalloc_node(sizeof(struct nvm_dev), GFP_KERNEL, node);
+ struct nvm_dev *dev;
+
+ dev = kzalloc_node(sizeof(struct nvm_dev), GFP_KERNEL, node);
+ if (dev)
+ kref_init(&dev->ref);
+
+ return dev;
}
EXPORT_SYMBOL(nvm_alloc_dev);
@@ -1142,12 +1165,16 @@ int nvm_register(struct nvm_dev *dev)
{
int ret, exp_pool_size;
- if (!dev->q || !dev->ops)
+ if (!dev->q || !dev->ops) {
+ kref_put(&dev->ref, nvm_free);
return -EINVAL;
+ }
ret = nvm_init(dev);
- if (ret)
+ if (ret) {
+ kref_put(&dev->ref, nvm_free);
return ret;
+ }
exp_pool_size = max_t(int, PAGE_SIZE,
(NVM_MAX_VLBA * (sizeof(u64) + dev->geo.sos)));
@@ -1157,7 +1184,7 @@ int nvm_register(struct nvm_dev *dev)
exp_pool_size);
if (!dev->dma_pool) {
pr_err("nvm: could not create dma pool\n");
- nvm_free(dev);
+ kref_put(&dev->ref, nvm_free);
return -ENOMEM;
}
@@ -1179,6 +1206,7 @@ void nvm_unregister(struct nvm_dev *dev)
if (t->dev->parent != dev)
continue;
__nvm_remove_target(t, false);
+ kref_put(&dev->ref, nvm_free);
}
mutex_unlock(&dev->mlock);
@@ -1186,13 +1214,14 @@ void nvm_unregister(struct nvm_dev *dev)
list_del(&dev->devices);
up_write(&nvm_lock);
- nvm_free(dev);
+ kref_put(&dev->ref, nvm_free);
}
EXPORT_SYMBOL(nvm_unregister);
static int __nvm_configure_create(struct nvm_ioctl_create *create)
{
struct nvm_dev *dev;
+ int ret;
down_write(&nvm_lock);
dev = nvm_find_nvm_dev(create->dev);
@@ -1203,7 +1232,12 @@ static int __nvm_configure_create(struct nvm_ioctl_create *create)
return -EINVAL;
}
- return nvm_create_tgt(dev, create);
+ kref_get(&dev->ref);
+ ret = nvm_create_tgt(dev, create);
+ if (ret)
+ kref_put(&dev->ref, nvm_free);
+
+ return ret;
}
static long nvm_ioctl_info(struct file *file, void __user *arg)
@@ -1322,8 +1356,6 @@ static long nvm_ioctl_dev_create(struct file *file, void __user *arg)
static long nvm_ioctl_dev_remove(struct file *file, void __user *arg)
{
struct nvm_ioctl_remove remove;
- struct nvm_dev *dev;
- int ret = 0;
if (copy_from_user(&remove, arg, sizeof(struct nvm_ioctl_remove)))
return -EFAULT;
@@ -1335,13 +1367,7 @@ static long nvm_ioctl_dev_remove(struct file *file, void __user *arg)
return -EINVAL;
}
- list_for_each_entry(dev, &nvm_devices, devices) {
- ret = nvm_remove_tgt(dev, &remove);
- if (!ret)
- break;
- }
-
- return ret;
+ return nvm_remove_tgt(&remove);
}
/* kept for compatibility reasons */
diff --git a/drivers/lightnvm/pblk-cache.c b/drivers/lightnvm/pblk-cache.c
index c9fa26f95659..5c1034c22197 100644
--- a/drivers/lightnvm/pblk-cache.c
+++ b/drivers/lightnvm/pblk-cache.c
@@ -18,7 +18,8 @@
#include "pblk.h"
-int pblk_write_to_cache(struct pblk *pblk, struct bio *bio, unsigned long flags)
+void pblk_write_to_cache(struct pblk *pblk, struct bio *bio,
+ unsigned long flags)
{
struct request_queue *q = pblk->dev->q;
struct pblk_w_ctx w_ctx;
@@ -43,6 +44,7 @@ retry:
goto retry;
case NVM_IO_ERR:
pblk_pipeline_stop(pblk);
+ bio_io_error(bio);
goto out;
}
@@ -79,7 +81,9 @@ retry:
out:
generic_end_io_acct(q, REQ_OP_WRITE, &pblk->disk->part0, start_time);
pblk_write_should_kick(pblk);
- return ret;
+
+ if (ret == NVM_IO_DONE)
+ bio_endio(bio);
}
/*
diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
index 6ca868868fee..773537804319 100644
--- a/drivers/lightnvm/pblk-core.c
+++ b/drivers/lightnvm/pblk-core.c
@@ -562,11 +562,9 @@ int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd)
int pblk_submit_io_sync_sem(struct pblk *pblk, struct nvm_rq *rqd)
{
- struct ppa_addr *ppa_list;
+ struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
int ret;
- ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr;
-
pblk_down_chunk(pblk, ppa_list[0]);
ret = pblk_submit_io_sync(pblk, rqd);
pblk_up_chunk(pblk, ppa_list[0]);
@@ -725,6 +723,7 @@ int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line)
struct nvm_tgt_dev *dev = pblk->dev;
struct pblk_line_meta *lm = &pblk->lm;
struct bio *bio;
+ struct ppa_addr *ppa_list;
struct nvm_rq rqd;
u64 paddr = pblk_line_smeta_start(pblk, line);
int i, ret;
@@ -748,9 +747,10 @@ int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line)
rqd.opcode = NVM_OP_PREAD;
rqd.nr_ppas = lm->smeta_sec;
rqd.is_seq = 1;
+ ppa_list = nvm_rq_to_ppa_list(&rqd);
for (i = 0; i < lm->smeta_sec; i++, paddr++)
- rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
+ ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
ret = pblk_submit_io_sync(pblk, &rqd);
if (ret) {
@@ -761,8 +761,10 @@ int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line)
atomic_dec(&pblk->inflight_io);
- if (rqd.error)
+ if (rqd.error && rqd.error != NVM_RSP_WARN_HIGHECC) {
pblk_log_read_err(pblk, &rqd);
+ ret = -EIO;
+ }
clear_rqd:
pblk_free_rqd_meta(pblk, &rqd);
@@ -775,6 +777,7 @@ static int pblk_line_smeta_write(struct pblk *pblk, struct pblk_line *line,
struct nvm_tgt_dev *dev = pblk->dev;
struct pblk_line_meta *lm = &pblk->lm;
struct bio *bio;
+ struct ppa_addr *ppa_list;
struct nvm_rq rqd;
__le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf);
__le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
@@ -799,12 +802,13 @@ static int pblk_line_smeta_write(struct pblk *pblk, struct pblk_line *line,
rqd.opcode = NVM_OP_PWRITE;
rqd.nr_ppas = lm->smeta_sec;
rqd.is_seq = 1;
+ ppa_list = nvm_rq_to_ppa_list(&rqd);
for (i = 0; i < lm->smeta_sec; i++, paddr++) {
struct pblk_sec_meta *meta = pblk_get_meta(pblk,
rqd.meta_list, i);
- rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
+ ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
meta->lba = lba_list[paddr] = addr_empty;
}
@@ -834,8 +838,9 @@ int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line,
struct nvm_geo *geo = &dev->geo;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line_meta *lm = &pblk->lm;
- void *ppa_list, *meta_list;
+ void *ppa_list_buf, *meta_list;
struct bio *bio;
+ struct ppa_addr *ppa_list;
struct nvm_rq rqd;
u64 paddr = line->emeta_ssec;
dma_addr_t dma_ppa_list, dma_meta_list;
@@ -851,7 +856,7 @@ int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line,
if (!meta_list)
return -ENOMEM;
- ppa_list = meta_list + pblk_dma_meta_size(pblk);
+ ppa_list_buf = meta_list + pblk_dma_meta_size(pblk);
dma_ppa_list = dma_meta_list + pblk_dma_meta_size(pblk);
next_rq:
@@ -872,11 +877,12 @@ next_rq:
rqd.bio = bio;
rqd.meta_list = meta_list;
- rqd.ppa_list = ppa_list;
+ rqd.ppa_list = ppa_list_buf;
rqd.dma_meta_list = dma_meta_list;
rqd.dma_ppa_list = dma_ppa_list;
rqd.opcode = NVM_OP_PREAD;
rqd.nr_ppas = rq_ppas;
+ ppa_list = nvm_rq_to_ppa_list(&rqd);
for (i = 0; i < rqd.nr_ppas; ) {
struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, line_id);
@@ -904,7 +910,7 @@ next_rq:
}
for (j = 0; j < min; j++, i++, paddr++)
- rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line_id);
+ ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line_id);
}
ret = pblk_submit_io_sync(pblk, &rqd);
@@ -916,8 +922,11 @@ next_rq:
atomic_dec(&pblk->inflight_io);
- if (rqd.error)
+ if (rqd.error && rqd.error != NVM_RSP_WARN_HIGHECC) {
pblk_log_read_err(pblk, &rqd);
+ ret = -EIO;
+ goto free_rqd_dma;
+ }
emeta_buf += rq_len;
left_ppas -= rq_ppas;
@@ -1162,7 +1171,6 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
off = bit * geo->ws_opt;
bitmap_set(line->map_bitmap, off, lm->smeta_sec);
line->sec_in_line -= lm->smeta_sec;
- line->smeta_ssec = off;
line->cur_sec = off + lm->smeta_sec;
if (init && pblk_line_smeta_write(pblk, line, off)) {
@@ -1521,11 +1529,9 @@ void pblk_ppa_to_line_put(struct pblk *pblk, struct ppa_addr ppa)
void pblk_rq_to_line_put(struct pblk *pblk, struct nvm_rq *rqd)
{
- struct ppa_addr *ppa_list;
+ struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
int i;
- ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr;
-
for (i = 0; i < rqd->nr_ppas; i++)
pblk_ppa_to_line_put(pblk, ppa_list[i]);
}
@@ -1699,6 +1705,14 @@ static void __pblk_line_put(struct pblk *pblk, struct pblk_line *line)
spin_lock(&line->lock);
WARN_ON(line->state != PBLK_LINESTATE_GC);
+ if (line->w_err_gc->has_gc_err) {
+ spin_unlock(&line->lock);
+ pblk_err(pblk, "line %d had errors during GC\n", line->id);
+ pblk_put_line_back(pblk, line);
+ line->w_err_gc->has_gc_err = 0;
+ return;
+ }
+
line->state = PBLK_LINESTATE_FREE;
trace_pblk_line_state(pblk_disk_name(pblk), line->id,
line->state);
@@ -2023,7 +2037,7 @@ void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa)
struct ppa_addr ppa_l2p;
/* logic error: lba out-of-bounds. Ignore update */
- if (!(lba < pblk->rl.nr_secs)) {
+ if (!(lba < pblk->capacity)) {
WARN(1, "pblk: corrupted L2P map request\n");
return;
}
@@ -2063,7 +2077,7 @@ int pblk_update_map_gc(struct pblk *pblk, sector_t lba, struct ppa_addr ppa_new,
#endif
/* logic error: lba out-of-bounds. Ignore update */
- if (!(lba < pblk->rl.nr_secs)) {
+ if (!(lba < pblk->capacity)) {
WARN(1, "pblk: corrupted L2P map request\n");
return 0;
}
@@ -2109,7 +2123,7 @@ void pblk_update_map_dev(struct pblk *pblk, sector_t lba,
}
/* logic error: lba out-of-bounds. Ignore update */
- if (!(lba < pblk->rl.nr_secs)) {
+ if (!(lba < pblk->capacity)) {
WARN(1, "pblk: corrupted L2P map request\n");
return;
}
@@ -2135,8 +2149,8 @@ out:
spin_unlock(&pblk->trans_lock);
}
-void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas,
- sector_t blba, int nr_secs)
+int pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas,
+ sector_t blba, int nr_secs, bool *from_cache)
{
int i;
@@ -2150,10 +2164,19 @@ void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas,
if (!pblk_ppa_empty(ppa) && !pblk_addr_in_cache(ppa)) {
struct pblk_line *line = pblk_ppa_to_line(pblk, ppa);
+ if (i > 0 && *from_cache)
+ break;
+ *from_cache = false;
+
kref_get(&line->ref);
+ } else {
+ if (i > 0 && !*from_cache)
+ break;
+ *from_cache = true;
}
}
spin_unlock(&pblk->trans_lock);
+ return i;
}
void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas,
@@ -2167,7 +2190,7 @@ void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas,
lba = lba_list[i];
if (lba != ADDR_EMPTY) {
/* logic error: lba out-of-bounds. Ignore update */
- if (!(lba < pblk->rl.nr_secs)) {
+ if (!(lba < pblk->capacity)) {
WARN(1, "pblk: corrupted L2P map request\n");
continue;
}
diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c
index 26a52ea7ec45..63ee205b41c4 100644
--- a/drivers/lightnvm/pblk-gc.c
+++ b/drivers/lightnvm/pblk-gc.c
@@ -59,24 +59,28 @@ static void pblk_gc_writer_kick(struct pblk_gc *gc)
wake_up_process(gc->gc_writer_ts);
}
-static void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line)
+void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line)
{
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct list_head *move_list;
+ spin_lock(&l_mg->gc_lock);
spin_lock(&line->lock);
WARN_ON(line->state != PBLK_LINESTATE_GC);
line->state = PBLK_LINESTATE_CLOSED;
trace_pblk_line_state(pblk_disk_name(pblk), line->id,
line->state);
+
+ /* We need to reset gc_group in order to ensure that
+ * pblk_line_gc_list will return proper move_list
+ * since right now current line is not on any of the
+ * gc lists.
+ */
+ line->gc_group = PBLK_LINEGC_NONE;
move_list = pblk_line_gc_list(pblk, line);
spin_unlock(&line->lock);
-
- if (move_list) {
- spin_lock(&l_mg->gc_lock);
- list_add_tail(&line->list, move_list);
- spin_unlock(&l_mg->gc_lock);
- }
+ list_add_tail(&line->list, move_list);
+ spin_unlock(&l_mg->gc_lock);
}
static void pblk_gc_line_ws(struct work_struct *work)
@@ -84,8 +88,6 @@ static void pblk_gc_line_ws(struct work_struct *work)
struct pblk_line_ws *gc_rq_ws = container_of(work,
struct pblk_line_ws, ws);
struct pblk *pblk = gc_rq_ws->pblk;
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
struct pblk_gc *gc = &pblk->gc;
struct pblk_line *line = gc_rq_ws->line;
struct pblk_gc_rq *gc_rq = gc_rq_ws->priv;
@@ -93,18 +95,10 @@ static void pblk_gc_line_ws(struct work_struct *work)
up(&gc->gc_sem);
- gc_rq->data = vmalloc(array_size(gc_rq->nr_secs, geo->csecs));
- if (!gc_rq->data) {
- pblk_err(pblk, "could not GC line:%d (%d/%d)\n",
- line->id, *line->vsc, gc_rq->nr_secs);
- goto out;
- }
-
/* Read from GC victim block */
ret = pblk_submit_read_gc(pblk, gc_rq);
if (ret) {
- pblk_err(pblk, "failed GC read in line:%d (err:%d)\n",
- line->id, ret);
+ line->w_err_gc->has_gc_err = 1;
goto out;
}
@@ -189,6 +183,8 @@ static void pblk_gc_line_prepare_ws(struct work_struct *work)
struct pblk_line *line = line_ws->line;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line_meta *lm = &pblk->lm;
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
struct pblk_gc *gc = &pblk->gc;
struct pblk_line_ws *gc_rq_ws;
struct pblk_gc_rq *gc_rq;
@@ -247,9 +243,13 @@ next_rq:
gc_rq->nr_secs = nr_secs;
gc_rq->line = line;
+ gc_rq->data = vmalloc(array_size(gc_rq->nr_secs, geo->csecs));
+ if (!gc_rq->data)
+ goto fail_free_gc_rq;
+
gc_rq_ws = kmalloc(sizeof(struct pblk_line_ws), GFP_KERNEL);
if (!gc_rq_ws)
- goto fail_free_gc_rq;
+ goto fail_free_gc_data;
gc_rq_ws->pblk = pblk;
gc_rq_ws->line = line;
@@ -281,6 +281,8 @@ out:
return;
+fail_free_gc_data:
+ vfree(gc_rq->data);
fail_free_gc_rq:
kfree(gc_rq);
fail_free_lba_list:
@@ -290,8 +292,11 @@ fail_free_invalid_bitmap:
fail_free_ws:
kfree(line_ws);
+ /* Line goes back to closed state, so we cannot release additional
+ * reference for line, since we do that only when we want to do
+ * gc to free line state transition.
+ */
pblk_put_line_back(pblk, line);
- kref_put(&line->ref, pblk_line_put);
atomic_dec(&gc->read_inflight_gc);
pblk_err(pblk, "failed to GC line %d\n", line->id);
@@ -355,8 +360,13 @@ static int pblk_gc_read(struct pblk *pblk)
pblk_gc_kick(pblk);
- if (pblk_gc_line(pblk, line))
+ if (pblk_gc_line(pblk, line)) {
pblk_err(pblk, "failed to GC line %d\n", line->id);
+ /* rollback */
+ spin_lock(&gc->r_lock);
+ list_add_tail(&line->list, &gc->r_list);
+ spin_unlock(&gc->r_lock);
+ }
return 0;
}
diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
index 8b643d0bffae..b351c7f002de 100644
--- a/drivers/lightnvm/pblk-init.c
+++ b/drivers/lightnvm/pblk-init.c
@@ -47,33 +47,6 @@ static struct pblk_global_caches pblk_caches = {
struct bio_set pblk_bio_set;
-static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
- struct bio *bio)
-{
- int ret;
-
- /* Read requests must be <= 256kb due to NVMe's 64 bit completion bitmap
- * constraint. Writes can be of arbitrary size.
- */
- if (bio_data_dir(bio) == READ) {
- blk_queue_split(q, &bio);
- ret = pblk_submit_read(pblk, bio);
- if (ret == NVM_IO_DONE && bio_flagged(bio, BIO_CLONED))
- bio_put(bio);
-
- return ret;
- }
-
- /* Prevent deadlock in the case of a modest LUN configuration and large
- * user I/Os. Unless stalled, the rate limiter leaves at least 256KB
- * available for user I/O.
- */
- if (pblk_get_secs(bio) > pblk_rl_max_io(&pblk->rl))
- blk_queue_split(q, &bio);
-
- return pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER);
-}
-
static blk_qc_t pblk_make_rq(struct request_queue *q, struct bio *bio)
{
struct pblk *pblk = q->queuedata;
@@ -86,13 +59,21 @@ static blk_qc_t pblk_make_rq(struct request_queue *q, struct bio *bio)
}
}
- switch (pblk_rw_io(q, pblk, bio)) {
- case NVM_IO_ERR:
- bio_io_error(bio);
- break;
- case NVM_IO_DONE:
- bio_endio(bio);
- break;
+ /* Read requests must be <= 256kb due to NVMe's 64 bit completion bitmap
+ * constraint. Writes can be of arbitrary size.
+ */
+ if (bio_data_dir(bio) == READ) {
+ blk_queue_split(q, &bio);
+ pblk_submit_read(pblk, bio);
+ } else {
+ /* Prevent deadlock in the case of a modest LUN configuration
+ * and large user I/Os. Unless stalled, the rate limiter
+ * leaves at least 256KB available for user I/O.
+ */
+ if (pblk_get_secs(bio) > pblk_rl_max_io(&pblk->rl))
+ blk_queue_split(q, &bio);
+
+ pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER);
}
return BLK_QC_T_NONE;
@@ -105,7 +86,7 @@ static size_t pblk_trans_map_size(struct pblk *pblk)
if (pblk->addrf_len < 32)
entry_size = 4;
- return entry_size * pblk->rl.nr_secs;
+ return entry_size * pblk->capacity;
}
#ifdef CONFIG_NVM_PBLK_DEBUG
@@ -164,13 +145,18 @@ static int pblk_l2p_init(struct pblk *pblk, bool factory_init)
int ret = 0;
map_size = pblk_trans_map_size(pblk);
- pblk->trans_map = vmalloc(map_size);
- if (!pblk->trans_map)
+ pblk->trans_map = __vmalloc(map_size, GFP_KERNEL | __GFP_NOWARN
+ | __GFP_RETRY_MAYFAIL | __GFP_HIGHMEM,
+ PAGE_KERNEL);
+ if (!pblk->trans_map) {
+ pblk_err(pblk, "failed to allocate L2P (need %zu of memory)\n",
+ map_size);
return -ENOMEM;
+ }
pblk_ppa_set_empty(&ppa);
- for (i = 0; i < pblk->rl.nr_secs; i++)
+ for (i = 0; i < pblk->capacity; i++)
pblk_trans_map_set(pblk, i, ppa);
ret = pblk_l2p_recover(pblk, factory_init);
@@ -701,7 +687,6 @@ static int pblk_set_provision(struct pblk *pblk, int nr_free_chks)
* on user capacity consider only provisioned blocks
*/
pblk->rl.total_blocks = nr_free_chks;
- pblk->rl.nr_secs = nr_free_chks * geo->clba;
/* Consider sectors used for metadata */
sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines;
@@ -1284,7 +1269,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
pblk_info(pblk, "luns:%u, lines:%d, secs:%llu, buf entries:%u\n",
geo->all_luns, pblk->l_mg.nr_lines,
- (unsigned long long)pblk->rl.nr_secs,
+ (unsigned long long)pblk->capacity,
pblk->rwb.nr_entries);
wake_up_process(pblk->writer_ts);
diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c
index 7fbc99b60cac..5408e32b2f13 100644
--- a/drivers/lightnvm/pblk-map.c
+++ b/drivers/lightnvm/pblk-map.c
@@ -162,6 +162,7 @@ int pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
*erase_ppa = ppa_list[i];
erase_ppa->a.blk = e_line->id;
+ erase_ppa->a.reserved = 0;
spin_unlock(&e_line->lock);
diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c
index 03c241b340ea..5abb1705b039 100644
--- a/drivers/lightnvm/pblk-rb.c
+++ b/drivers/lightnvm/pblk-rb.c
@@ -642,7 +642,7 @@ try:
* be directed to disk.
*/
int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
- struct ppa_addr ppa, int bio_iter, bool advanced_bio)
+ struct ppa_addr ppa)
{
struct pblk *pblk = container_of(rb, struct pblk, rwb);
struct pblk_rb_entry *entry;
@@ -673,15 +673,6 @@ int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
ret = 0;
goto out;
}
-
- /* Only advance the bio if it hasn't been advanced already. If advanced,
- * this bio is at least a partial bio (i.e., it has partially been
- * filled with data from the cache). If part of the data resides on the
- * media, we will read later on
- */
- if (unlikely(!advanced_bio))
- bio_advance(bio, bio_iter * PBLK_EXPOSED_PAGE_SIZE);
-
data = bio_data(bio);
memcpy(data, entry->data, rb->seg_size);
@@ -799,8 +790,8 @@ int pblk_rb_tear_down_check(struct pblk_rb *rb)
}
out:
- spin_unlock(&rb->w_lock);
spin_unlock_irq(&rb->s_lock);
+ spin_unlock(&rb->w_lock);
return ret;
}
diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
index 0b7d5fb4548d..d98ea392fe33 100644
--- a/drivers/lightnvm/pblk-read.c
+++ b/drivers/lightnvm/pblk-read.c
@@ -26,8 +26,7 @@
* issued.
*/
static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio,
- sector_t lba, struct ppa_addr ppa,
- int bio_iter, bool advanced_bio)
+ sector_t lba, struct ppa_addr ppa)
{
#ifdef CONFIG_NVM_PBLK_DEBUG
/* Callers must ensure that the ppa points to a cache address */
@@ -35,73 +34,75 @@ static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio,
BUG_ON(!pblk_addr_in_cache(ppa));
#endif
- return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba, ppa,
- bio_iter, advanced_bio);
+ return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba, ppa);
}
-static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
+static int pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
struct bio *bio, sector_t blba,
- unsigned long *read_bitmap)
+ bool *from_cache)
{
void *meta_list = rqd->meta_list;
- struct ppa_addr ppas[NVM_MAX_VLBA];
- int nr_secs = rqd->nr_ppas;
- bool advanced_bio = false;
- int i, j = 0;
+ int nr_secs, i;
- pblk_lookup_l2p_seq(pblk, ppas, blba, nr_secs);
+retry:
+ nr_secs = pblk_lookup_l2p_seq(pblk, rqd->ppa_list, blba, rqd->nr_ppas,
+ from_cache);
+
+ if (!*from_cache)
+ goto end;
for (i = 0; i < nr_secs; i++) {
- struct ppa_addr p = ppas[i];
struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i);
sector_t lba = blba + i;
-retry:
- if (pblk_ppa_empty(p)) {
+ if (pblk_ppa_empty(rqd->ppa_list[i])) {
__le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
- WARN_ON(test_and_set_bit(i, read_bitmap));
meta->lba = addr_empty;
-
- if (unlikely(!advanced_bio)) {
- bio_advance(bio, (i) * PBLK_EXPOSED_PAGE_SIZE);
- advanced_bio = true;
+ } else if (pblk_addr_in_cache(rqd->ppa_list[i])) {
+ /*
+ * Try to rea