summaryrefslogtreecommitdiffstats
path: root/drivers/md/dm-integrity.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/dm-integrity.c')
-rw-r--r--drivers/md/dm-integrity.c3238
1 files changed, 3238 insertions, 0 deletions
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
new file mode 100644
index 000000000000..c7f7c8d76576
--- /dev/null
+++ b/drivers/md/dm-integrity.c
@@ -0,0 +1,3238 @@
+/*
+ * Copyright (C) 2016-2017 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2016-2017 Milan Broz
+ * Copyright (C) 2016-2017 Mikulas Patocka
+ *
+ * This file is released under the GPL.
+ */
+
+#include <linux/module.h>
+#include <linux/device-mapper.h>
+#include <linux/dm-io.h>
+#include <linux/vmalloc.h>
+#include <linux/sort.h>
+#include <linux/rbtree.h>
+#include <linux/delay.h>
+#include <linux/random.h>
+#include <crypto/hash.h>
+#include <crypto/skcipher.h>
+#include <linux/async_tx.h>
+#include "dm-bufio.h"
+
+#define DM_MSG_PREFIX "integrity"
+
+#define DEFAULT_INTERLEAVE_SECTORS 32768
+#define DEFAULT_JOURNAL_SIZE_FACTOR 7
+#define DEFAULT_BUFFER_SECTORS 128
+#define DEFAULT_JOURNAL_WATERMARK 50
+#define DEFAULT_SYNC_MSEC 10000
+#define DEFAULT_MAX_JOURNAL_SECTORS 131072
+#define MIN_LOG2_INTERLEAVE_SECTORS 3
+#define MAX_LOG2_INTERLEAVE_SECTORS 31
+#define METADATA_WORKQUEUE_MAX_ACTIVE 16
+
+/*
+ * Warning - DEBUG_PRINT prints security-sensitive data to the log,
+ * so it should not be enabled in the official kernel
+ */
+//#define DEBUG_PRINT
+//#define INTERNAL_VERIFY
+
+/*
+ * On disk structures
+ */
+
+#define SB_MAGIC "integrt"
+#define SB_VERSION 1
+#define SB_SECTORS 8
+#define MAX_SECTORS_PER_BLOCK 8
+
+struct superblock {
+ __u8 magic[8];
+ __u8 version;
+ __u8 log2_interleave_sectors;
+ __u16 integrity_tag_size;
+ __u32 journal_sections;
+ __u64 provided_data_sectors; /* userspace uses this value */
+ __u32 flags;
+ __u8 log2_sectors_per_block;
+};
+
+#define SB_FLAG_HAVE_JOURNAL_MAC 0x1
+
+#define JOURNAL_ENTRY_ROUNDUP 8
+
+typedef __u64 commit_id_t;
+#define JOURNAL_MAC_PER_SECTOR 8
+
+struct journal_entry {
+ union {
+ struct {
+ __u32 sector_lo;
+ __u32 sector_hi;
+ } s;
+ __u64 sector;
+ } u;
+ commit_id_t last_bytes[0];
+ /* __u8 tag[0]; */
+};
+
+#define journal_entry_tag(ic, je) ((__u8 *)&(je)->last_bytes[(ic)->sectors_per_block])
+
+#if BITS_PER_LONG == 64
+#define journal_entry_set_sector(je, x) do { smp_wmb(); ACCESS_ONCE((je)->u.sector) = cpu_to_le64(x); } while (0)
+#define journal_entry_get_sector(je) le64_to_cpu((je)->u.sector)
+#elif defined(CONFIG_LBDAF)
+#define journal_entry_set_sector(je, x) do { (je)->u.s.sector_lo = cpu_to_le32(x); smp_wmb(); ACCESS_ONCE((je)->u.s.sector_hi) = cpu_to_le32((x) >> 32); } while (0)
+#define journal_entry_get_sector(je) le64_to_cpu((je)->u.sector)
+#else
+#define journal_entry_set_sector(je, x) do { (je)->u.s.sector_lo = cpu_to_le32(x); smp_wmb(); ACCESS_ONCE((je)->u.s.sector_hi) = cpu_to_le32(0); } while (0)
+#define journal_entry_get_sector(je) le32_to_cpu((je)->u.s.sector_lo)
+#endif
+#define journal_entry_is_unused(je) ((je)->u.s.sector_hi == cpu_to_le32(-1))
+#define journal_entry_set_unused(je) do { ((je)->u.s.sector_hi = cpu_to_le32(-1)); } while (0)
+#define journal_entry_is_inprogress(je) ((je)->u.s.sector_hi == cpu_to_le32(-2))
+#define journal_entry_set_inprogress(je) do { ((je)->u.s.sector_hi = cpu_to_le32(-2)); } while (0)
+
+#define JOURNAL_BLOCK_SECTORS 8
+#define JOURNAL_SECTOR_DATA ((1 << SECTOR_SHIFT) - sizeof(commit_id_t))
+#define JOURNAL_MAC_SIZE (JOURNAL_MAC_PER_SECTOR * JOURNAL_BLOCK_SECTORS)
+
+struct journal_sector {
+ __u8 entries[JOURNAL_SECTOR_DATA - JOURNAL_MAC_PER_SECTOR];
+ __u8 mac[JOURNAL_MAC_PER_SECTOR];
+ commit_id_t commit_id;
+};
+
+#define MAX_TAG_SIZE (JOURNAL_SECTOR_DATA - JOURNAL_MAC_PER_SECTOR - offsetof(struct journal_entry, last_bytes[MAX_SECTORS_PER_BLOCK]))
+
+#define METADATA_PADDING_SECTORS 8
+
+#define N_COMMIT_IDS 4
+
+static unsigned char prev_commit_seq(unsigned char seq)
+{
+ return (seq + N_COMMIT_IDS - 1) % N_COMMIT_IDS;
+}
+
+static unsigned char next_commit_seq(unsigned char seq)
+{
+ return (seq + 1) % N_COMMIT_IDS;
+}
+
+/*
+ * In-memory structures
+ */
+
+struct journal_node {
+ struct rb_node node;
+ sector_t sector;
+};
+
+struct alg_spec {
+ char *alg_string;
+ char *key_string;
+ __u8 *key;
+ unsigned key_size;
+};
+
+struct dm_integrity_c {
+ struct dm_dev *dev;
+ unsigned tag_size;
+ __s8 log2_tag_size;
+ sector_t start;
+ mempool_t *journal_io_mempool;
+ struct dm_io_client *io;
+ struct dm_bufio_client *bufio;
+ struct workqueue_struct *metadata_wq;
+ struct superblock *sb;
+ unsigned journal_pages;
+ struct page_list *journal;
+ struct page_list *journal_io;
+ struct page_list *journal_xor;
+
+ struct crypto_skcipher *journal_crypt;
+ struct scatterlist **journal_scatterlist;
+ struct scatterlist **journal_io_scatterlist;
+ struct skcipher_request **sk_requests;
+
+ struct crypto_shash *journal_mac;
+
+ struct journal_node *journal_tree;
+ struct rb_root journal_tree_root;
+
+ sector_t provided_data_sectors;
+
+ unsigned short journal_entry_size;
+ unsigned char journal_entries_per_sector;
+ unsigned char journal_section_entries;
+ unsigned short journal_section_sectors;
+ unsigned journal_sections;
+ unsigned journal_entries;
+ sector_t device_sectors;
+ unsigned initial_sectors;
+ unsigned metadata_run;
+ __s8 log2_metadata_run;
+ __u8 log2_buffer_sectors;
+ __u8 sectors_per_block;
+
+ unsigned char mode;
+ bool suspending;
+
+ int failed;
+
+ struct crypto_shash *internal_hash;
+
+ /* these variables are locked with endio_wait.lock */
+ struct rb_root in_progress;
+ wait_queue_head_t endio_wait;
+ struct workqueue_struct *wait_wq;
+
+ unsigned char commit_seq;
+ commit_id_t commit_ids[N_COMMIT_IDS];
+
+ unsigned committed_section;
+ unsigned n_committed_sections;
+
+ unsigned uncommitted_section;
+ unsigned n_uncommitted_sections;
+
+ unsigned free_section;
+ unsigned char free_section_entry;
+ unsigned free_sectors;
+
+ unsigned free_sectors_threshold;
+
+ struct workqueue_struct *commit_wq;
+ struct work_struct commit_work;
+
+ struct workqueue_struct *writer_wq;
+ struct work_struct writer_work;
+
+ struct bio_list flush_bio_list;
+
+ unsigned long autocommit_jiffies;
+ struct timer_list autocommit_timer;
+ unsigned autocommit_msec;
+
+ wait_queue_head_t copy_to_journal_wait;
+
+ struct completion crypto_backoff;
+
+ bool journal_uptodate;
+ bool just_formatted;
+
+ struct alg_spec internal_hash_alg;
+ struct alg_spec journal_crypt_alg;
+ struct alg_spec journal_mac_alg;
+};
+
+struct dm_integrity_range {
+ sector_t logical_sector;
+ unsigned n_sectors;
+ struct rb_node node;
+};
+
+struct dm_integrity_io {
+ struct work_struct work;
+
+ struct dm_integrity_c *ic;
+ bool write;
+ bool fua;
+
+ struct dm_integrity_range range;
+
+ sector_t metadata_block;
+ unsigned metadata_offset;
+
+ atomic_t in_flight;
+ int bi_error;
+
+ struct completion *completion;
+
+ struct block_device *orig_bi_bdev;
+ bio_end_io_t *orig_bi_end_io;
+ struct bio_integrity_payload *orig_bi_integrity;
+ struct bvec_iter orig_bi_iter;
+};
+
+struct journal_completion {
+ struct dm_integrity_c *ic;
+ atomic_t in_flight;
+ struct completion comp;
+};
+
+struct journal_io {
+ struct dm_integrity_range range;
+ struct journal_completion *comp;
+};
+
+static struct kmem_cache *journal_io_cache;
+
+#define JOURNAL_IO_MEMPOOL 32
+
+#ifdef DEBUG_PRINT
+#define DEBUG_print(x, ...) printk(KERN_DEBUG x, ##__VA_ARGS__)
+static void __DEBUG_bytes(__u8 *bytes, size_t len, const char *msg, ...)
+{
+ va_list args;
+ va_start(args, msg);
+ vprintk(msg, args);
+ va_end(args);
+ if (len)
+ pr_cont(":");
+ while (len) {
+ pr_cont(" %02x", *bytes);
+ bytes++;
+ len--;
+ }
+ pr_cont("\n");
+}
+#define DEBUG_bytes(bytes, len, msg, ...) __DEBUG_bytes(bytes, len, KERN_DEBUG msg, ##__VA_ARGS__)
+#else
+#define DEBUG_print(x, ...) do { } while (0)
+#define DEBUG_bytes(bytes, len, msg, ...) do { } while (0)
+#endif
+
+/*
+ * DM Integrity profile, protection is performed layer above (dm-crypt)
+ */
+static struct blk_integrity_profile dm_integrity_profile = {
+ .name = "DM-DIF-EXT-TAG",
+ .generate_fn = NULL,
+ .verify_fn = NULL,
+};
+
+static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map);
+static void integrity_bio_wait(struct work_struct *w);
+static void dm_integrity_dtr(struct dm_target *ti);
+
+static void dm_integrity_io_error(struct dm_integrity_c *ic, const char *msg, int err)
+{
+ if (!cmpxchg(&ic->failed, 0, err))
+ DMERR("Error on %s: %d", msg, err);
+}
+
+static int dm_integrity_failed(struct dm_integrity_c *ic)
+{
+ return ACCESS_ONCE(ic->failed);
+}
+
+static commit_id_t dm_integrity_commit_id(struct dm_integrity_c *ic, unsigned i,
+ unsigned j, unsigned char seq)
+{
+ /*
+ * Xor the number with section and sector, so that if a piece of
+ * journal is written at wrong place, it is detected.
+ */
+ return ic->commit_ids[seq] ^ cpu_to_le64(((__u64)i << 32) ^ j);
+}
+
+static void get_area_and_offset(struct dm_integrity_c *ic, sector_t data_sector,
+ sector_t *area, sector_t *offset)
+{
+ __u8 log2_interleave_sectors = ic->sb->log2_interleave_sectors;
+
+ *area = data_sector >> log2_interleave_sectors;
+ *offset = (unsigned)data_sector & ((1U << log2_interleave_sectors) - 1);
+}
+
+#define sector_to_block(ic, n) \
+do { \
+ BUG_ON((n) & (unsigned)((ic)->sectors_per_block - 1)); \
+ (n) >>= (ic)->sb->log2_sectors_per_block; \
+} while (0)
+
+static __u64 get_metadata_sector_and_offset(struct dm_integrity_c *ic, sector_t area,
+ sector_t offset, unsigned *metadata_offset)
+{
+ __u64 ms;
+ unsigned mo;
+
+ ms = area << ic->sb->log2_interleave_sectors;
+ if (likely(ic->log2_metadata_run >= 0))
+ ms += area << ic->log2_metadata_run;
+ else
+ ms += area * ic->metadata_run;
+ ms >>= ic->log2_buffer_sectors;
+
+ sector_to_block(ic, offset);
+
+ if (likely(ic->log2_tag_size >= 0)) {
+ ms += offset >> (SECTOR_SHIFT + ic->log2_buffer_sectors - ic->log2_tag_size);
+ mo = (offset << ic->log2_tag_size) & ((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - 1);
+ } else {
+ ms += (__u64)offset * ic->tag_size >> (SECTOR_SHIFT + ic->log2_buffer_sectors);
+ mo = (offset * ic->tag_size) & ((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - 1);
+ }
+ *metadata_offset = mo;
+ return ms;
+}
+
+static sector_t get_data_sector(struct dm_integrity_c *ic, sector_t area, sector_t offset)
+{
+ sector_t result;
+
+ result = area << ic->sb->log2_interleave_sectors;
+ if (likely(ic->log2_metadata_run >= 0))
+ result += (area + 1) << ic->log2_metadata_run;
+ else
+ result += (area + 1) * ic->metadata_run;
+
+ result += (sector_t)ic->initial_sectors + offset;
+ return result;
+}
+
+static void wraparound_section(struct dm_integrity_c *ic, unsigned *sec_ptr)
+{
+ if (unlikely(*sec_ptr >= ic->journal_sections))
+ *sec_ptr -= ic->journal_sections;
+}
+
+static int sync_rw_sb(struct dm_integrity_c *ic, int op, int op_flags)
+{
+ struct dm_io_request io_req;
+ struct dm_io_region io_loc;
+
+ io_req.bi_op = op;
+ io_req.bi_op_flags = op_flags;
+ io_req.mem.type = DM_IO_KMEM;
+ io_req.mem.ptr.addr = ic->sb;
+ io_req.notify.fn = NULL;
+ io_req.client = ic->io;
+ io_loc.bdev = ic->dev->bdev;
+ io_loc.sector = ic->start;
+ io_loc.count = SB_SECTORS;
+
+ return dm_io(&io_req, 1, &io_loc, NULL);
+}
+
+static void access_journal_check(struct dm_integrity_c *ic, unsigned section, unsigned offset,
+ bool e, const char *function)
+{
+#if defined(CONFIG_DM_DEBUG) || defined(INTERNAL_VERIFY)
+ unsigned limit = e ? ic->journal_section_entries : ic->journal_section_sectors;
+
+ if (unlikely(section >= ic->journal_sections) ||
+ unlikely(offset >= limit)) {
+ printk(KERN_CRIT "%s: invalid access at (%u,%u), limit (%u,%u)\n",
+ function, section, offset, ic->journal_sections, limit);
+ BUG();
+ }
+#endif
+}
+
+static void page_list_location(struct dm_integrity_c *ic, unsigned section, unsigned offset,
+ unsigned *pl_index, unsigned *pl_offset)
+{
+ unsigned sector;
+
+ access_journal_check(ic, section, offset, false, "page_list_location");
+
+ sector = section * ic->journal_section_sectors + offset;
+
+ *pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT);
+ *pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1);
+}
+
+static struct journal_sector *access_page_list(struct dm_integrity_c *ic, struct page_list *pl,
+ unsigned section, unsigned offset, unsigned *n_sectors)
+{
+ unsigned pl_index, pl_offset;
+ char *va;
+
+ page_list_location(ic, section, offset, &pl_index, &pl_offset);
+
+ if (n_sectors)
+ *n_sectors = (PAGE_SIZE - pl_offset) >> SECTOR_SHIFT;
+
+ va = lowmem_page_address(pl[pl_index].page);
+
+ return (struct journal_sector *)(va + pl_offset);
+}
+
+static struct journal_sector *access_journal(struct dm_integrity_c *ic, unsigned section, unsigned offset)
+{
+ return access_page_list(ic, ic->journal, section, offset, NULL);
+}
+
+static struct journal_entry *access_journal_entry(struct dm_integrity_c *ic, unsigned section, unsigned n)
+{
+ unsigned rel_sector, offset;
+ struct journal_sector *js;
+
+ access_journal_check(ic, section, n, true, "access_journal_entry");
+
+ rel_sector = n % JOURNAL_BLOCK_SECTORS;
+ offset = n / JOURNAL_BLOCK_SECTORS;
+
+ js = access_journal(ic, section, rel_sector);
+ return (struct journal_entry *)((char *)js + offset * ic->journal_entry_size);
+}
+
+static struct journal_sector *access_journal_data(struct dm_integrity_c *ic, unsigned section, unsigned n)
+{
+ n <<= ic->sb->log2_sectors_per_block;
+
+ n += JOURNAL_BLOCK_SECTORS;
+
+ access_journal_check(ic, section, n, false, "access_journal_data");
+
+ return access_journal(ic, section, n);
+}
+
+static void section_mac(struct dm_integrity_c *ic, unsigned section, __u8 result[JOURNAL_MAC_SIZE])
+{
+ SHASH_DESC_ON_STACK(desc, ic->journal_mac);
+ int r;
+ unsigned j, size;
+
+ desc->tfm = ic->journal_mac;
+ desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+
+ r = crypto_shash_init(desc);
+ if (unlikely(r)) {
+ dm_integrity_io_error(ic, "crypto_shash_init", r);
+ goto err;
+ }
+
+ for (j = 0; j < ic->journal_section_entries; j++) {
+ struct journal_entry *je = access_journal_entry(ic, section, j);
+ r = crypto_shash_update(desc, (__u8 *)&je->u.sector, sizeof je->u.sector);
+ if (unlikely(r)) {
+ dm_integrity_io_error(ic, "crypto_shash_update", r);
+ goto err;
+ }
+ }
+
+ size = crypto_shash_digestsize(ic->journal_mac);
+
+ if (likely(size <= JOURNAL_MAC_SIZE)) {
+ r = crypto_shash_final(desc, result);
+ if (unlikely(r)) {
+ dm_integrity_io_error(ic, "crypto_shash_final", r);
+ goto err;
+ }
+ memset(result + size, 0, JOURNAL_MAC_SIZE - size);
+ } else {
+ __u8 digest[size];
+ r = crypto_shash_final(desc, digest);
+ if (unlikely(r)) {
+ dm_integrity_io_error(ic, "crypto_shash_final", r);
+ goto err;
+ }
+ memcpy(result, digest, JOURNAL_MAC_SIZE);
+ }
+
+ return;
+err:
+ memset(result, 0, JOURNAL_MAC_SIZE);
+}
+
+static void rw_section_mac(struct dm_integrity_c *ic, unsigned section, bool wr)
+{
+ __u8 result[JOURNAL_MAC_SIZE];
+ unsigned j;
+
+ if (!ic->journal_mac)
+ return;
+
+ section_mac(ic, section, result);
+
+ for (j = 0; j < JOURNAL_BLOCK_SECTORS; j++) {
+ struct journal_sector *js = access_journal(ic, section, j);
+
+ if (likely(wr))
+ memcpy(&js->mac, result + (j * JOURNAL_MAC_PER_SECTOR), JOURNAL_MAC_PER_SECTOR);
+ else {
+ if (memcmp(&js->mac, result + (j * JOURNAL_MAC_PER_SECTOR), JOURNAL_MAC_PER_SECTOR))
+ dm_integrity_io_error(ic, "journal mac", -EILSEQ);
+ }
+ }
+}
+
+static void complete_journal_op(void *context)
+{
+ struct journal_completion *comp = context;
+ BUG_ON(!atomic_read(&comp->in_flight));
+ if (likely(atomic_dec_and_test(&comp->in_flight)))
+ complete(&comp->comp);
+}
+
+static void xor_journal(struct dm_integrity_c *ic, bool encrypt, unsigned section,
+ unsigned n_sections, struct journal_completion *comp)
+{
+ struct async_submit_ctl submit;
+ size_t n_bytes = (size_t)(n_sections * ic->journal_section_sectors) << SECTOR_SHIFT;
+ unsigned pl_index, pl_offset, section_index;
+ struct page_list *source_pl, *target_pl;
+
+ if (likely(encrypt)) {
+ source_pl = ic->journal;
+ target_pl = ic->journal_io;
+ } else {
+ source_pl = ic->journal_io;
+ target_pl = ic->journal;
+ }
+
+ page_list_location(ic, section, 0, &pl_index, &pl_offset);
+
+ atomic_add(roundup(pl_offset + n_bytes, PAGE_SIZE) >> PAGE_SHIFT, &comp->in_flight);
+
+ init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL, complete_journal_op, comp, NULL);
+
+ section_index = pl_index;
+
+ do {
+ size_t this_step;
+ struct page *src_pages[2];
+ struct page *dst_page;
+
+ while (unlikely(pl_index == section_index)) {
+ unsigned dummy;
+ if (likely(encrypt))
+ rw_section_mac(ic, section, true);
+ section++;
+ n_sections--;
+ if (!n_sections)
+ break;
+ page_list_location(ic, section, 0, &section_index, &dummy);
+ }
+
+ this_step = min(n_bytes, (size_t)PAGE_SIZE - pl_offset);
+ dst_page = target_pl[pl_index].page;
+ src_pages[0] = source_pl[pl_index].page;
+ src_pages[1] = ic->journal_xor[pl_index].page;
+
+ async_xor(dst_page, src_pages, pl_offset, 2, this_step, &submit);
+
+ pl_index++;
+ pl_offset = 0;
+ n_bytes -= this_step;
+ } while (n_bytes);
+
+ BUG_ON(n_sections);
+
+ async_tx_issue_pending_all();
+}
+
+static void complete_journal_encrypt(struct crypto_async_request *req, int err)
+{
+ struct journal_completion *comp = req->data;
+ if (unlikely(err)) {
+ if (likely(err == -EINPROGRESS)) {
+ complete(&comp->ic->crypto_backoff);
+ return;
+ }
+ dm_integrity_io_error(comp->ic, "asynchronous encrypt", err);
+ }
+ complete_journal_op(comp);
+}
+
+static bool do_crypt(bool encrypt, struct skcipher_request *req, struct journal_completion *comp)
+{
+ int r;
+ skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
+ complete_journal_encrypt, comp);
+ if (likely(encrypt))
+ r = crypto_skcipher_encrypt(req);
+ else
+ r = crypto_skcipher_decrypt(req);
+ if (likely(!r))
+ return false;
+ if (likely(r == -EINPROGRESS))
+ return true;
+ if (likely(r == -EBUSY)) {
+ wait_for_completion(&comp->ic->crypto_backoff);
+ reinit_completion(&comp->ic->crypto_backoff);
+ return true;
+ }
+ dm_integrity_io_error(comp->ic, "encrypt", r);
+ return false;
+}
+
+static void crypt_journal(struct dm_integrity_c *ic, bool encrypt, unsigned section,
+ unsigned n_sections, struct journal_completion *comp)
+{
+ struct scatterlist **source_sg;
+ struct scatterlist **target_sg;
+
+ atomic_add(2, &comp->in_flight);
+
+ if (likely(encrypt)) {
+ source_sg = ic->journal_scatterlist;
+ target_sg = ic->journal_io_scatterlist;
+ } else {
+ source_sg = ic->journal_io_scatterlist;
+ target_sg = ic->journal_scatterlist;
+ }
+
+ do {
+ struct skcipher_request *req;
+ unsigned ivsize;
+ char *iv;
+
+ if (likely(encrypt))
+ rw_section_mac(ic, section, true);
+
+ req = ic->sk_requests[section];
+ ivsize = crypto_skcipher_ivsize(ic->journal_crypt);
+ iv = req->iv;
+
+ memcpy(iv, iv + ivsize, ivsize);
+
+ req->src = source_sg[section];
+ req->dst = target_sg[section];
+
+ if (unlikely(do_crypt(encrypt, req, comp)))
+ atomic_inc(&comp->in_flight);
+
+ section++;
+ n_sections--;
+ } while (n_sections);
+
+ atomic_dec(&comp->in_flight);
+ complete_journal_op(comp);
+}
+
+static void encrypt_journal(struct dm_integrity_c *ic, bool encrypt, unsigned section,
+ unsigned n_sections, struct journal_completion *comp)
+{
+ if (ic->journal_xor)
+ return xor_journal(ic, encrypt, section, n_sections, comp);
+ else
+ return crypt_journal(ic, encrypt, section, n_sections, comp);
+}
+
+static void complete_journal_io(unsigned long error, void *context)
+{
+ struct journal_completion *comp = context;
+ if (unlikely(error != 0))
+ dm_integrity_io_error(comp->ic, "writing journal", -EIO);
+ complete_journal_op(comp);
+}
+
+static void rw_journal(struct dm_integrity_c *ic, int op, int op_flags, unsigned section,
+ unsigned n_sections, struct journal_completion *comp)
+{
+ struct dm_io_request io_req;
+ struct dm_io_region io_loc;
+ unsigned sector, n_sectors, pl_index, pl_offset;
+ int r;
+
+ if (unlikely(dm_integrity_failed(ic))) {
+ if (comp)
+ complete_journal_io(-1UL, comp);
+ return;
+ }
+
+ sector = section * ic->journal_section_sectors;
+ n_sectors = n_sections * ic->journal_section_sectors;
+
+ pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT);
+ pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1);
+
+ io_req.bi_op = op;
+ io_req.bi_op_flags = op_flags;
+ io_req.mem.type = DM_IO_PAGE_LIST;
+ if (ic->journal_io)
+ io_req.mem.ptr.pl = &ic->journal_io[pl_index];
+ else
+ io_req.mem.ptr.pl = &ic->journal[pl_index];
+ io_req.mem.offset = pl_offset;
+ if (likely(comp != NULL)) {
+ io_req.notify.fn = complete_journal_io;
+ io_req.notify.context = comp;
+ } else {
+ io_req.notify.fn = NULL;
+ }
+ io_req.client = ic->io;
+ io_loc.bdev = ic->dev->bdev;
+ io_loc.sector = ic->start + SB_SECTORS + sector;
+ io_loc.count = n_sectors;
+
+ r = dm_io(&io_req, 1, &io_loc, NULL);
+ if (unlikely(r)) {
+ dm_integrity_io_error(ic, op == REQ_OP_READ ? "reading journal" : "writing journal", r);
+ if (comp) {
+ WARN_ONCE(1, "asynchronous dm_io failed: %d", r);
+ complete_journal_io(-1UL, comp);
+ }
+ }
+}
+
+static void write_journal(struct dm_integrity_c *ic, unsigned commit_start, unsigned commit_sections)
+{
+ struct journal_completion io_comp;
+ struct journal_completion crypt_comp_1;
+ struct journal_completion crypt_comp_2;
+ unsigned i;
+
+ io_comp.ic = ic;
+ io_comp.comp = COMPLETION_INITIALIZER_ONSTACK(io_comp.comp);
+
+ if (commit_start + commit_sections <= ic->journal_sections) {
+ io_comp.in_flight = (atomic_t)ATOMIC_INIT(1);
+ if (ic->journal_io) {
+ crypt_comp_1.ic = ic;
+ crypt_comp_1.comp = COMPLETION_INITIALIZER_ONSTACK(crypt_comp_1.comp);
+ crypt_comp_1.in_flight = (atomic_t)ATOMIC_INIT(0);
+ encrypt_journal(ic, true, commit_start, commit_sections, &crypt_comp_1);
+ wait_for_completion_io(&crypt_comp_1.comp);
+ } else {
+ for (i = 0; i < commit_sections; i++)
+ rw_section_mac(ic, commit_start + i, true);
+ }
+ rw_journal(ic, REQ_OP_WRITE, REQ_FUA, commit_start, commit_sections, &io_comp);
+ } else {
+ unsigned to_end;
+ io_comp.in_flight = (atomic_t)ATOMIC_INIT(2);
+ to_end = ic->journal_sections - commit_start;
+ if (ic->journal_io) {
+ crypt_comp_1.ic = ic;
+ crypt_comp_1.comp = COMPLETION_INITIALIZER_ONSTACK(crypt_comp_1.comp);
+ crypt_comp_1.in_flight = (atomic_t)ATOMIC_INIT(0);
+ encrypt_journal(ic, true, commit_start, to_end, &crypt_comp_1);
+ if (try_wait_for_completion(&crypt_comp_1.comp)) {
+ rw_journal(ic, REQ_OP_WRITE, REQ_FUA, commit_start, to_end, &io_comp);
+ crypt_comp_1.comp = COMPLETION_INITIALIZER_ONSTACK(crypt_comp_1.comp);
+ crypt_comp_1.in_flight = (atomic_t)ATOMIC_INIT(0);
+ encrypt_journal(ic, true, 0, commit_sections - to_end, &crypt_comp_1);
+ wait_for_completion_io(&crypt_comp_1.comp);
+ } else {
+ crypt_comp_2.ic = ic;
+ crypt_comp_2.comp = COMPLETION_INITIALIZER_ONSTACK(crypt_comp_2.comp);
+ crypt_comp_2.in_flight = (atomic_t)ATOMIC_INIT(0);
+ encrypt_journal(ic, true, 0, commit_sections - to_end, &crypt_comp_2);
+ wait_for_completion_io(&crypt_comp_1.comp);
+ rw_journal(ic, REQ_OP_WRITE, REQ_FUA, commit_start, to_end, &io_comp);
+ wait_for_completion_io(&crypt_comp_2.comp);
+ }
+ } else {
+ for (i = 0; i < to_end; i++)
+ rw_section_mac(ic, commit_start + i, true);
+ rw_journal(ic, REQ_OP_WRITE, REQ_FUA, commit_start, to_end, &io_comp);
+ for (i = 0; i < commit_sections - to_end; i++)
+ rw_section_mac(ic, i, true);
+ }
+ rw_journal(ic, REQ_OP_WRITE, REQ_FUA, 0, commit_sections - to_end, &io_comp);
+ }
+
+ wait_for_completion_io(&io_comp.comp);
+}
+
+static void copy_from_journal(struct dm_integrity_c *ic, unsigned section, unsigned offset,
+ unsigned n_sectors, sector_t target, io_notify_fn fn, void *data)
+{
+ struct dm_io_request io_req;
+ struct dm_io_region io_loc;
+ int r;
+ unsigned sector, pl_index, pl_offset;
+
+ BUG_ON((target | n_sectors | offset) & (unsigned)(ic->sectors_per_block - 1));
+
+ if (unlikely(dm_integrity_failed(ic))) {
+ fn(-1UL, data);
+ return;
+ }
+
+ sector = section * ic->journal_section_sectors + JOURNAL_BLOCK_SECTORS + offset;
+
+ pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT);
+ pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1);
+
+ io_req.bi_op = REQ_OP_WRITE;
+ io_req.bi_op_flags = 0;
+ io_req.mem.type = DM_IO_PAGE_LIST;
+ io_req.mem.ptr.pl = &ic->journal[pl_index];
+ io_req.mem.offset = pl_offset;
+ io_req.notify.fn = fn;
+ io_req.notify.context = data;
+ io_req.client = ic->io;
+ io_loc.bdev = ic->dev->bdev;
+ io_loc.sector = ic->start + target;
+ io_loc.count = n_sectors;
+
+ r = dm_io(&io_req, 1, &io_loc, NULL);
+ if (unlikely(r)) {
+ WARN_ONCE(1, "asynchronous dm_io failed: %d", r);
+ fn(-1UL, data);
+ }
+}
+
+static bool add_new_range(struct dm_integrity_c *ic, struct dm_integrity_range *new_range)
+{
+ struct rb_node **n = &ic->in_progress.rb_node;
+ struct rb_node *parent;
+
+ BUG_ON((new_range->logical_sector | new_range->n_sectors) & (unsigned)(ic->sectors_per_block - 1));
+
+ parent = NULL;
+
+ while (*n) {
+ struct dm_integrity_range *range = container_of(*n, struct dm_integrity_range, node);
+
+ parent = *n;
+ if (new_range->logical_sector + new_range->n_sectors <= range->logical_sector) {
+ n = &range->node.rb_left;
+ } else if (new_range->logical_sector >= range->logical_sector + range->n_sectors) {
+ n = &range->node.rb_right;
+ } else {
+ return false;
+ }
+ }
+
+ rb_link_node(&new_range->node, parent, n);
+ rb_insert_color(&new_range->node, &ic->in_progress);
+
+ return true;
+}
+
+static void remove_range_unlocked(struct dm_integrity_c *ic, struct dm_integrity_range *range)
+{
+ rb_erase(&range->node, &ic->in_progress);
+ wake_up_locked(&ic->endio_wait);
+}
+
+static void remove_range(struct dm_integrity_c *ic, struct dm_integrity_range *range)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&ic->endio_wait.lock, flags);
+ remove_range_unlocked(ic, range);
+ spin_unlock_irqrestore(&ic->endio_wait.lock, flags);
+}
+
+static void init_journal_node(struct journal_node *node)
+{
+ RB_CLEAR_NODE(&node->node);
+ node->sector = (sector_t)-1;
+}
+
+static void add_journal_node(struct dm_integrity_c *ic, struct journal_node *node, sector_t sector)
+{
+ struct rb_node **link;
+ struct rb_node *parent;
+
+ node->sector = sector;
+ BUG_ON(!RB_EMPTY_NODE(&node->node));
+
+ link = &ic->journal_tree_root.rb_node;
+ parent = NULL;
+
+ while (*link) {
+ struct journal_node *j;
+ parent = *link;
+ j = container_of(parent, struct journal_node, node);
+ if (sector < j->sector)
+ link = &j->node.rb_left;
+ else
+ link = &j->node.rb_right;
+ }
+
+ rb_link_node(&node->node, parent, link);
+ rb_insert_color(&node->node, &ic->journal_tree_root);
+}
+
+static void remove_journal_node(struct dm_integrity_c *ic, struct journal_node *node)
+{
+ BUG_ON(RB_EMPTY_NODE(&node->node));
+ rb_erase(&node->node, &ic->journal_tree_root);
+ init_journal_node(node);
+}
+
+#define NOT_FOUND (-1U)
+
+static unsigned find_journal_node(struct dm_integrity_c *ic, sector_t sector, sector_t *next_sector)
+{
+ struct rb_node *n = ic->journal_tree_root.rb_node;
+ unsigned found = NOT_FOUND;
+ *next_sector = (sector_t)-1;
+ while (n) {
+ struct journal_node *j = container_of(n, struct journal_node, node);
+ if (sector == j->sector) {
+ found = j - ic->journal_tree;
+ }
+ if (sector < j->sector) {
+ *next_sector = j->sector;
+ n = j->node.rb_left;
+ } else {
+ n = j->node.rb_right;
+ }
+ }
+
+ return found;
+}
+
+static bool test_journal_node(struct dm_integrity_c *ic, unsigned pos, sector_t sector)
+{
+ struct journal_node *node, *next_node;
+ struct rb_node *next;
+
+ if (unlikely(pos >= ic->journal_entries))
+ return false;
+ node = &ic->journal_tree[pos];
+ if (unlikely(RB_EMPTY_NODE(&node->node)))
+ return false;
+ if (unlikely(node->sector != sector))
+ return false;
+
+ next = rb_next(&node->node);
+ if (unlikely(!next))
+ return true;
+
+ next_node = container_of(next, struct journal_node, node);
+ return next_node->sector != sector;
+}
+
+static bool find_newer_committed_node(struct dm_integrity_c *ic, struct journal_node *node)
+{
+ struct rb_node *next;
+ struct journal_node *next_node;
+ unsigned next_section;
+
+ BUG_ON(RB_EMPTY_NODE(&node->node));
+
+ next = rb_next(&node->node);
+ if (unlikely(!next))
+ return false;
+
+ next_node = container_of(next, struct journal_node, node);
+
+ if (next_node->sector != node->sector)
+ return false;
+
+ next_section = (unsigned)(next_node - ic->journal_tree) / ic->journal_section_entries;
+ if (next_section >= ic->committed_section &&
+ next_section < ic->committed_section + ic->n_committed_sections)
+ return true;
+ if (next_section + ic->journal_sections < ic->committed_section + ic->n_committed_sections)
+ return true;
+
+ return false;
+}
+
+#define TAG_READ 0
+#define TAG_WRITE 1
+#define TAG_CMP 2
+
+static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, sector_t *metadata_block,
+ unsigned *metadata_offset, unsigned total_size, int op)
+{
+ do {
+ unsigned char *data, *dp;
+ struct dm_buffer *b;
+ unsigned to_copy;
+ int r;
+
+ r = dm_integrity_failed(ic);
+ if (unlikely(r))
+ return r;
+
+ data = dm_bufio_read(ic->bufio, *metadata_block, &b);
+ if (unlikely(IS_ERR(data)))
+ return PTR_ERR(data);
+
+ to_copy = min((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - *metadata_offset, total_size);
+ dp = data + *metadata_offset;
+ if (op == TAG_READ) {
+ memcpy(tag, dp, to_copy);
+ } else if (op == TAG_WRITE) {
+ memcpy(dp, tag, to_copy);
+ dm_bufio_mark_buffer_dirty(b);
+ } else {
+ /* e.g.: op == TAG_CMP */
+ if (unlikely(memcmp(dp, tag, to_copy))) {
+ unsigned i;
+
+ for (i = 0; i < to_copy; i++) {
+ if (dp[i] != tag[i])
+ break;
+ total_size--;
+ }
+ dm_bufio_release(b);
+ return total_size;
+ }
+ }
+ dm_bufio_release(b);
+
+ tag += to_copy;
+ *metadata_offset += to_copy;
+ if (unlikely(*metadata_offset == 1U << SECTOR_SHIFT << ic->log2_buffer_sectors)) {
+ (*metadata_block)++;
+ *metadata_offset = 0;
+ }
+ total_size -= to_copy;
+ } while (unlikely(total_size));
+
+ return 0;
+}
+
+static void dm_integrity_flush_buffers(struct dm_integrity_c *ic)
+{
+ int r;
+ r = dm_bufio_write_dirty_buffers(ic->bufio);
+ if (unlikely(r))
+ dm_integrity_io_error(ic, "writing tags", r);
+}
+
+static void sleep_on_endio_wait(struct dm_integrity_c *ic)
+{
+ DECLARE_WAITQUEUE(wait, current);
+ __add_wait_queue(&ic->endio_wait, &wait);
+ __set_current_state(TASK_UNINTERRUPTIBLE);
+ spin_unlock_irq(&ic->endio_wait.lock);
+ io_schedule();
+ spin_lock_irq(&ic->endio_wait.lock);
+ __remove_wait_queue(&ic->endio_wait, &wait);
+}
+
+static void autocommit_fn(unsigned long data)
+{
+ struct dm_integrity_c *ic = (struct dm_integrity_c *)data;
+
+ if (likely(!dm_integrity_failed(ic)))
+ queue_work(ic->commit_wq, &ic->commit_work);
+}
+
+static void schedule_autocommit(struct dm_integrity_c *ic)
+{
+ if (!timer_pending(&ic->autocommit_timer))
+ mod_timer(&ic->autocommit_timer, jiffies + ic->autocommit_jiffies);
+}
+
+static void submit_flush_bio(struct dm_integrity_c *ic, struct dm_integrity_io *dio)
+{
+ struct bio *bio;
+ spin_lock_irq(&ic->endio_wait.lock);
+ bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
+ bio_list_add(&ic->flush_bio_list, bio);
+ spin_unlock_irq(&ic->endio_wait.lock);
+ queue_work(ic->commit_wq, &ic->commit_work);
+}
+
+static void do_endio(struct dm_integrity_c *ic, struct bio *bio)
+{
+ int r = dm_integrity_failed(ic);
+ if (unlikely(r) && !bio->bi_error)
+ bio->bi_error = r;
+ bio_endio(bio);
+}
+
+static void do_endio_flush(struct dm_integrity_c *ic, struct dm_integrity_io *dio)
+{
+ struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
+
+ if (unlikely(dio->fua) && likely(!bio->bi_error) && likely(!dm_integrity_failed(ic)))
+ submit_flush_bio(ic, dio);
+ else
+ do_endio(ic, bio);
+}
+
+static void dec_in_flight(struct dm_integrity_io *dio)
+{
+ if (atomic_dec_and_test(&dio->in_flight)) {
+ struct dm_integrity_c *ic = dio->ic;
+ struct bio *bio;
+
+ remove_range(ic, &dio->range);
+
+ if (unlikely(dio->write))
+ schedule_autocommit(ic);
+
+ bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
+
+ if (unlikely(dio->bi_error) && !bio->bi_error)
+ bio->bi_error = dio->bi_error;
+ if (likely(!bio->bi_error) && unlikely(bio_sectors(bio) != dio->range.n_sectors)) {
+ dio->range.logical_sector += dio->range.n_sectors;
+ bio_advance(bio, dio->range.n_sectors << SECTOR_SHIFT);
+ INIT_WORK(&dio->work, integrity_bio_wait);
+ queue_work(ic->wait_wq, &dio->work);
+ return;
+ }
+ do_endio_flush(ic, dio);
+ }
+}
+