Add support for sTec's pci-e flash card Kronos

Signed-off-by: Akhil Bhansali <abhansali@stec-inc.com> Signed-off-by: Ramprasad Chinthekindi <rchinthekindi@stec-inc.com> Reviewed-by: Jeff Moyer <jmoyer@redhat.com> Folded patch, contributions to clean up this driver from: Jens Axboe Dan Carpenter Andrew Morton Signed-off-by: Jens Axboe <axboe@kernel.dk>
author: Akhil Bhansali <abhansali@stec-inc.com> 2013-10-15 14:19:07 -0600
committer: Jens Axboe <axboe@kernel.dk> 2013-11-08 09:10:28 -0700
commit: e67f86b31ae5be8a88bec27b5ecb18dc2ffc9c56 (patch)
tree: 8e0cf5c5dd6a266edbce015ffc3e23b7c3e8cf37 /drivers/block/skd_main.c
parent: 0317cd6de852a70e0374e7eb40a013072274386f (diff)
1 files changed, 5817 insertions, 0 deletions
diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c
new file mode 100644
index 000000000000..3110f68ecedd
--- /dev/null
+++ b/drivers/block/skd_main.c
@@ -0,0 +1,5817 @@
+/* Copyright 2012 STEC, Inc.
+ *
+ * This file is licensed under the terms of the 3-clause
+ * BSD License (http://opensource.org/licenses/BSD-3-Clause)
+ * or the GNU GPL-2.0 (http://www.gnu.org/licenses/gpl-2.0.html),
+ * at your option. Both licenses are also available in the LICENSE file
+ * distributed with this project. This file may not be copied, modified,
+ * or distributed except in accordance with those terms.
+ * Gordoni Waidhofer <gwaidhofer@stec-inc.com>
+ * Initial Driver Design!
+ * Thomas Swann <tswann@stec-inc.com>
+ * Interrupt handling.
+ * Ramprasad Chinthekindi <rchinthekindi@stec-inc.com>
+ * biomode implementation.
+ * Akhil Bhansali <abhansali@stec-inc.com>
+ * Added support for DISCARD / FLUSH and FUA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/blkdev.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/compiler.h>
+#include <linux/workqueue.h>
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/time.h>
+#include <linux/hdreg.h>
+#include <linux/dma-mapping.h>
+#include <linux/completion.h>
+#include <linux/scatterlist.h>
+#include <linux/version.h>
+#include <linux/err.h>
+#include <linux/scatterlist.h>
+#include <linux/aer.h>
+#include <linux/ctype.h>
+#include <linux/wait.h>
+#include <linux/uio.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_tcq.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/sg.h>
+#include <linux/io.h>
+#include <linux/uaccess.h>
+#include <asm-generic/unaligned.h>
+
+#include "skd_s1120.h"
+
+static int skd_dbg_level;
+static int skd_isr_comp_limit = 4;
+
+enum {
+	STEC_LINK_2_5GTS = 0,
+	STEC_LINK_5GTS = 1,
+	STEC_LINK_8GTS = 2,
+	STEC_LINK_UNKNOWN = 0xFF
+};
+
+enum {
+	SKD_FLUSH_INITIALIZER,
+	SKD_FLUSH_ZERO_SIZE_FIRST,
+	SKD_FLUSH_DATA_SECOND,
+};
+
+#define DPRINTK(skdev, fmt, args ...) \
+	do { \
+		if (unlikely((skdev)->dbg_level > 0)) {	\
+			pr_err("%s:%s:%d " fmt, (skdev)->name,	\
+			       __func__, __LINE__, ## args); \
+		} \
+	} while (0)
+
+#define SKD_ASSERT(expr) \
+	do { \
+		if (unlikely(!(expr))) { \
+			pr_err("Assertion failed! %s,%s,%s,line=%d\n",	\
+			       # expr, __FILE__, __func__, __LINE__); \
+		} \
+	} while (0)
+
+#define VPRINTK(skdev, fmt, args ...) \
+	do { \
+		if (unlikely((skdev)->dbg_level > 1)) {	\
+			pr_err("%s:%s:%d " fmt, (skdev)->name,	\
+			       __func__, __LINE__, ## args); \
+		} \
+	} while (0)
+
+
+#define DRV_NAME "skd"
+#define DRV_VERSION "2.2.1"
+#define DRV_BUILD_ID "0260"
+#define PFX DRV_NAME ": "
+#define DRV_BIN_VERSION 0x100
+#define DRV_VER_COMPL   "2.2.1." DRV_BUILD_ID
+
+MODULE_AUTHOR("bug-reports: support@stec-inc.com");
+MODULE_LICENSE("Dual BSD/GPL");
+
+MODULE_DESCRIPTION("STEC s1120 PCIe SSD block/BIO driver (b" DRV_BUILD_ID ")");
+MODULE_VERSION(DRV_VERSION "-" DRV_BUILD_ID);
+
+#define PCI_VENDOR_ID_STEC      0x1B39
+#define PCI_DEVICE_ID_S1120     0x0001
+
+#define SKD_FUA_NV		(1 << 1)
+#define SKD_MINORS_PER_DEVICE   16
+
+#define SKD_MAX_QUEUE_DEPTH     200u
+
+#define SKD_PAUSE_TIMEOUT       (5 * 1000)
+
+#define SKD_N_FITMSG_BYTES      (512u)
+
+#define SKD_N_SPECIAL_CONTEXT   32u
+#define SKD_N_SPECIAL_FITMSG_BYTES      (128u)
+
+/* SG elements are 32 bytes, so we can make this 4096 and still be under the
+ * 128KB limit.  That allows 4096*4K = 16M xfer size
+ */
+#define SKD_N_SG_PER_REQ_DEFAULT 256u
+#define SKD_N_SG_PER_SPECIAL    256u
+
+#define SKD_N_COMPLETION_ENTRY  256u
+#define SKD_N_READ_CAP_BYTES    (8u)
+
+#define SKD_N_INTERNAL_BYTES    (512u)
+
+/* 5 bits of uniqifier, 0xF800 */
+#define SKD_ID_INCR             (0x400)
+#define SKD_ID_TABLE_MASK       (3u << 8u)
+#define  SKD_ID_RW_REQUEST      (0u << 8u)
+#define  SKD_ID_INTERNAL        (1u << 8u)
+#define  SKD_ID_SPECIAL_REQUEST (2u << 8u)
+#define  SKD_ID_FIT_MSG         (3u << 8u)
+#define SKD_ID_SLOT_MASK        0x00FFu
+#define SKD_ID_SLOT_AND_TABLE_MASK 0x03FFu
+
+#define SKD_N_TIMEOUT_SLOT      4u
+#define SKD_TIMEOUT_SLOT_MASK   3u
+
+#define SKD_N_MAX_SECTORS 2048u
+
+#define SKD_MAX_RETRIES 2u
+
+#define SKD_TIMER_SECONDS(seconds) (seconds)
+#define SKD_TIMER_MINUTES(minutes) ((minutes) * (60))
+
+#define INQ_STD_NBYTES 36
+#define SKD_DISCARD_CDB_LENGTH	24
+
+enum skd_drvr_state {
+	SKD_DRVR_STATE_LOAD,
+	SKD_DRVR_STATE_IDLE,
+	SKD_DRVR_STATE_BUSY,
+	SKD_DRVR_STATE_STARTING,
+	SKD_DRVR_STATE_ONLINE,
+	SKD_DRVR_STATE_PAUSING,
+	SKD_DRVR_STATE_PAUSED,
+	SKD_DRVR_STATE_DRAINING_TIMEOUT,
+	SKD_DRVR_STATE_RESTARTING,
+	SKD_DRVR_STATE_RESUMING,
+	SKD_DRVR_STATE_STOPPING,
+	SKD_DRVR_STATE_FAULT,
+	SKD_DRVR_STATE_DISAPPEARED,
+	SKD_DRVR_STATE_PROTOCOL_MISMATCH,
+	SKD_DRVR_STATE_BUSY_ERASE,
+	SKD_DRVR_STATE_BUSY_SANITIZE,
+	SKD_DRVR_STATE_BUSY_IMMINENT,
+	SKD_DRVR_STATE_WAIT_BOOT,
+	SKD_DRVR_STATE_SYNCING,
+};
+
+#define SKD_WAIT_BOOT_TIMO      SKD_TIMER_SECONDS(90u)
+#define SKD_STARTING_TIMO       SKD_TIMER_SECONDS(8u)
+#define SKD_RESTARTING_TIMO     SKD_TIMER_MINUTES(4u)
+#define SKD_DRAINING_TIMO       SKD_TIMER_SECONDS(6u)
+#define SKD_BUSY_TIMO           SKD_TIMER_MINUTES(20u)
+#define SKD_STARTED_BUSY_TIMO   SKD_TIMER_SECONDS(60u)
+#define SKD_START_WAIT_SECONDS  90u
+
+enum skd_req_state {
+	SKD_REQ_STATE_IDLE,
+	SKD_REQ_STATE_SETUP,
+	SKD_REQ_STATE_BUSY,
+	SKD_REQ_STATE_COMPLETED,
+	SKD_REQ_STATE_TIMEOUT,
+	SKD_REQ_STATE_ABORTED,
+};
+
+enum skd_fit_msg_state {
+	SKD_MSG_STATE_IDLE,
+	SKD_MSG_STATE_BUSY,
+};
+
+enum skd_check_status_action {
+	SKD_CHECK_STATUS_REPORT_GOOD,
+	SKD_CHECK_STATUS_REPORT_SMART_ALERT,
+	SKD_CHECK_STATUS_REQUEUE_REQUEST,
+	SKD_CHECK_STATUS_REPORT_ERROR,
+	SKD_CHECK_STATUS_BUSY_IMMINENT,
+};
+
+struct skd_fitmsg_context {
+	enum skd_fit_msg_state state;
+
+	struct skd_fitmsg_context *next;
+
+	u32 id;
+	u16 outstanding;
+
+	u32 length;
+	u32 offset;
+
+	u8 *msg_buf;
+	dma_addr_t mb_dma_address;
+};
+
+struct skd_request_context {
+	enum skd_req_state state;
+
+	struct skd_request_context *next;
+
+	u16 id;
+	u32 fitmsg_id;
+
+	struct request *req;
+	struct bio *bio;
+	unsigned long start_time;
+	u8 flush_cmd;
+	u8 discard_page;
+
+	u32 timeout_stamp;
+	u8 sg_data_dir;
+	struct scatterlist *sg;
+	u32 n_sg;
+	u32 sg_byte_count;
+
+	struct fit_sg_descriptor *sksg_list;
+	dma_addr_t sksg_dma_address;
+
+	struct fit_completion_entry_v1 completion;
+
+	struct fit_comp_error_info err_info;
+
+};
+#define SKD_DATA_DIR_HOST_TO_CARD       1
+#define SKD_DATA_DIR_CARD_TO_HOST       2
+#define SKD_DATA_DIR_NONE		3	/* especially for DISCARD requests. */
+
+struct skd_special_context {
+	struct skd_request_context req;
+
+	u8 orphaned;
+
+	void *data_buf;
+	dma_addr_t db_dma_address;
+
+	u8 *msg_buf;
+	dma_addr_t mb_dma_address;
+};
+
+struct skd_sg_io {
+	fmode_t mode;
+	void __user *argp;
+
+	struct sg_io_hdr sg;
+
+	u8 cdb[16];
+
+	u32 dxfer_len;
+	u32 iovcnt;
+	struct sg_iovec *iov;
+	struct sg_iovec no_iov_iov;
+
+	struct skd_special_context *skspcl;
+};
+
+typedef enum skd_irq_type {
+	SKD_IRQ_LEGACY,
+	SKD_IRQ_MSI,
+	SKD_IRQ_MSIX
+} skd_irq_type_t;
+
+#define SKD_MAX_BARS                    2
+
+struct skd_device {
+	volatile void __iomem *mem_map[SKD_MAX_BARS];
+	resource_size_t mem_phys[SKD_MAX_BARS];
+	u32 mem_size[SKD_MAX_BARS];
+
+	skd_irq_type_t irq_type;
+	u32 msix_count;
+	struct skd_msix_entry *msix_entries;
+
+	struct pci_dev *pdev;
+	int pcie_error_reporting_is_enabled;
+
+	spinlock_t lock;
+	struct gendisk *disk;
+	struct request_queue *queue;
+	struct device *class_dev;
+	int gendisk_on;
+	int sync_done;
+
+	atomic_t device_count;
+	u32 devno;
+	u32 major;
+	char name[32];
+	char isr_name[30];
+
+	enum skd_drvr_state state;
+	u32 drive_state;
+
+	u32 in_flight;
+	u32 cur_max_queue_depth;
+	u32 queue_low_water_mark;
+	u32 dev_max_queue_depth;
+
+	u32 num_fitmsg_context;
+	u32 num_req_context;
+
+	u32 timeout_slot[SKD_N_TIMEOUT_SLOT];
+	u32 timeout_stamp;
+	struct skd_fitmsg_context *skmsg_free_list;
+	struct skd_fitmsg_context *skmsg_table;
+
+	struct skd_request_context *skreq_free_list;
+	struct skd_request_context *skreq_table;
+
+	struct skd_special_context *skspcl_free_list;
+	struct skd_special_context *skspcl_table;
+
+	struct skd_special_context internal_skspcl;
+	u32 read_cap_blocksize;
+	u32 read_cap_last_lba;
+	int read_cap_is_valid;
+	int inquiry_is_valid;
+	u8 inq_serial_num[13];  /*12 chars plus null term */
+	u8 id_str[80];          /* holds a composite name (pci + sernum) */
+
+	u8 skcomp_cycle;
+	u32 skcomp_ix;
+	struct fit_completion_entry_v1 *skcomp_table;
+	struct fit_comp_error_info *skerr_table;
+	dma_addr_t cq_dma_address;
+
+	wait_queue_head_t waitq;
+
+	struct timer_list timer;
+	u32 timer_countdown;
+	u32 timer_substate;
+
+	int n_special;
+	int sgs_per_request;
+	u32 last_mtd;
+
+	u32 proto_ver;
+
+	int dbg_level;
+	u32 connect_time_stamp;
+	int connect_retries;
+#define SKD_MAX_CONNECT_RETRIES 16
+	u32 drive_jiffies;
+
+	u32 timo_slot;
+
+
+	struct work_struct completion_worker;
+
+	struct bio_list bio_queue;
+	int queue_stopped;
+
+	struct list_head flush_list;
+};
+
+#define SKD_FLUSH_JOB   "skd-flush-jobs"
+struct kmem_cache *skd_flush_slab;
+
+/*
+ * These commands hold "nonzero size FLUSH bios",
+ * which are enqueud in skdev->flush_list during
+ * completion of "zero size FLUSH commands".
+ * It will be active in biomode.
+ */
+struct skd_flush_cmd {
+	void *cmd;
+	struct list_head flist;
+};
+
+#define SKD_WRITEL(DEV, VAL, OFF) skd_reg_write32(DEV, VAL, OFF)
+#define SKD_READL(DEV, OFF)      skd_reg_read32(DEV, OFF)
+#define SKD_WRITEQ(DEV, VAL, OFF) skd_reg_write64(DEV, VAL, OFF)
+
+static inline u32 skd_reg_read32(struct skd_device *skdev, u32 offset)
+{
+	u32 val;
+
+	if (likely(skdev->dbg_level < 2))
+		return readl(skdev->mem_map[1] + offset);
+	else {
+		barrier();
+		val = readl(skdev->mem_map[1] + offset);
+		barrier();
+		VPRINTK(skdev, "offset %x = %x\n", offset, val);
+		return val;
+	}
+
+}
+
+static inline void skd_reg_write32(struct skd_device *skdev, u32 val,
+				   u32 offset)
+{
+	if (likely(skdev->dbg_level < 2)) {
+		writel(val, skdev->mem_map[1] + offset);
+		barrier();
+		readl(skdev->mem_map[1] + offset);
+		barrier();
+	} else {
+		barrier();
+		writel(val, skdev->mem_map[1] + offset);
+		barrier();
+		readl(skdev->mem_map[1] + offset);
+		barrier();
+		VPRINTK(skdev, "offset %x = %x\n", offset, val);
+	}
+}
+
+static inline void skd_reg_write64(struct skd_device *skdev, u64 val,
+				   u32 offset)
+{
+	if (likely(skdev->dbg_level < 2)) {
+		writeq(val, skdev->mem_map[1] + offset);
+		barrier();
+		readq(skdev->mem_map[1] + offset);
+		barrier();
+	} else {
+		barrier();
+		writeq(val, skdev->mem_map[1] + offset);
+		barrier();
+		readq(skdev->mem_map[1] + offset);
+		barrier();
+		VPRINTK(skdev, "offset %x = %016llx\n", offset, val);
+	}
+}
+
+
+#define SKD_IRQ_DEFAULT SKD_IRQ_MSI
+static int skd_isr_type = SKD_IRQ_DEFAULT;
+
+module_param(skd_isr_type, int, 0444);
+MODULE_PARM_DESC(skd_isr_type, "Interrupt type capability."
+		 " (0==legacy, 1==MSI, 2==MSI-X, default==1)");
+
+#define SKD_MAX_REQ_PER_MSG_DEFAULT 1
+static int skd_max_req_per_msg = SKD_MAX_REQ_PER_MSG_DEFAULT;
+
+module_param(skd_max_req_per_msg, int, 0444);
+MODULE_PARM_DESC(skd_max_req_per_msg,
+		 "Maximum SCSI requests packed in a single message."
+		 " (1-14, default==1)");
+
+#define SKD_MAX_QUEUE_DEPTH_DEFAULT 64
+#define SKD_MAX_QUEUE_DEPTH_DEFAULT_STR "64"
+static int skd_max_queue_depth = SKD_MAX_QUEUE_DEPTH_DEFAULT;
+
+module_param(skd_max_queue_depth, int, 0444);
+MODULE_PARM_DESC(skd_max_queue_depth,
+		 "Maximum SCSI requests issued to s1120."
+		 " (1-200, default==" SKD_MAX_QUEUE_DEPTH_DEFAULT_STR ")");
+
+static int skd_sgs_per_request = SKD_N_SG_PER_REQ_DEFAULT;
+module_param(skd_sgs_per_request, int, 0444);
+MODULE_PARM_DESC(skd_sgs_per_request,
+		 "Maximum SG elements per block request."
+		 " (1-4096, default==256)");
+
+static int skd_max_pass_thru = SKD_N_SPECIAL_CONTEXT;
+module_param(skd_max_pass_thru, int, 0444);
+MODULE_PARM_DESC(skd_max_pass_thru,
+		 "Maximum SCSI pass-thru at a time." " (1-50, default==32)");
+
+module_param(skd_dbg_level, int, 0444);
+MODULE_PARM_DESC(skd_dbg_level, "s1120 debug level (0,1,2)");
+
+module_param(skd_isr_comp_limit, int, 0444);
+MODULE_PARM_DESC(skd_isr_comp_limit, "s1120 isr comp limit (0=none) default=4");
+
+static int skd_bio;
+module_param(skd_bio, int, 0444);
+MODULE_PARM_DESC(skd_bio,
+		 "Register as a bio device instead of block (0, 1) default=0");
+
+/* Major device number dynamically assigned. */
+static u32 skd_major;
+
+static struct skd_device *skd_construct(struct pci_dev *pdev);
+static void skd_destruct(struct skd_device *skdev);
+static const struct block_device_operations skd_blockdev_ops;
+static void skd_send_fitmsg(struct skd_device *skdev,
+			    struct skd_fitmsg_context *skmsg);
+static void skd_send_special_fitmsg(struct skd_device *skdev,
+				    struct skd_special_context *skspcl);
+static void skd_request_fn(struct request_queue *rq);
+static void skd_end_request(struct skd_device *skdev,
+			    struct skd_request_context *skreq, int error);
+static int skd_preop_sg_list(struct skd_device *skdev,
+			     struct skd_request_context *skreq);
+static void skd_postop_sg_list(struct skd_device *skdev,
+			       struct skd_request_context *skreq);
+
+static void skd_restart_device(struct skd_device *skdev);
+static int skd_quiesce_dev(struct skd_device *skdev);
+static int skd_unquiesce_dev(struct skd_device *skdev);
+static void skd_release_special(struct skd_device *skdev,
+				struct skd_special_context *skspcl);
+static void skd_disable_interrupts(struct skd_device *skdev);
+static void skd_isr_fwstate(struct skd_device *skdev);
+static void skd_recover_requests(struct skd_device *skdev, int requeue);
+static void skd_soft_reset(struct skd_device *skdev);
+
+static const char *skd_name(struct skd_device *skdev);
+const char *skd_drive_state_to_str(int state);
+const char *skd_skdev_state_to_str(enum skd_drvr_state state);
+static void skd_log_skdev(struct skd_device *skdev, const char *event);
+static void skd_log_skmsg(struct skd_device *skdev,
+			  struct skd_fitmsg_context *skmsg, const char *event);
+static void skd_log_skreq(struct skd_device *skdev,
+			  struct skd_request_context *skreq, const char *event);
+
+/* FLUSH FUA flag handling. */
+static int skd_flush_cmd_enqueue(struct skd_device *, void *);
+static void *skd_flush_cmd_dequeue(struct skd_device *);
+
+
+/*
+ *****************************************************************************
+ * READ/WRITE REQUESTS
+ *****************************************************************************
+ */
+static void skd_stop_queue(struct skd_device *skdev)
+{
+	if (!skd_bio)
+		blk_stop_queue(skdev->queue);
+	else
+		skdev->queue_stopped = 1;
+}
+
+static void skd_unstop_queue(struct skd_device *skdev)
+{
+	if (!skd_bio)
+		queue_flag_clear(QUEUE_FLAG_STOPPED, skdev->queue);
+	else
+		skdev->queue_stopped = 0;
+}
+
+static void skd_start_queue(struct skd_device *skdev)
+{
+	if (!skd_bio) {
+		blk_start_queue(skdev->queue);
+	} else {
+		pr_err("(%s): Starting queue\n", skd_name(skdev));
+		skdev->queue_stopped = 0;
+		skd_request_fn(skdev->queue);
+	}
+}
+
+static int skd_queue_stopped(struct skd_device *skdev)
+{
+	if (!skd_bio)
+		return blk_queue_stopped(skdev->queue);
+	else
+		return skdev->queue_stopped;
+}
+
+static void skd_fail_all_pending_blk(struct skd_device *skdev)
+{
+	struct request_queue *q = skdev->queue;
+	struct request *req;
+
+	for (;; ) {
+		req = blk_peek_request(q);
+		if (req == NULL)
+			break;
+		blk_start_request(req);
+		__blk_end_request_all(req, -EIO);
+	}
+}
+
+static void skd_fail_all_pending_bio(struct skd_device *skdev)
+{
+	struct bio *bio;
+	int error = -EIO;
+
+	for (;; ) {
+		bio = bio_list_pop(&skdev->bio_queue);
+
+		if (bio == NULL)
+			break;
+
+		bio_endio(bio, error);
+	}
+}
+
+static void skd_fail_all_pending(struct skd_device *skdev)
+{
+	if (!skd_bio)
+		skd_fail_all_pending_blk(skdev);
+	else
+		skd_fail_all_pending_bio(skdev);
+}
+
+static void skd_make_request(struct request_queue *q, struct bio *bio)
+{
+	struct skd_device *skdev = q->queuedata;
+	unsigned long flags;
+
+	spin_lock_irqsave(&skdev->lock, flags);
+
+	bio_list_add(&skdev->bio_queue, bio);
+	skd_request_fn(skdev->queue);
+
+	spin_unlock_irqrestore(&skdev->lock, flags);
+}
+
+static void
+skd_prep_rw_cdb(struct skd_scsi_request *scsi_req,
+		int data_dir, unsigned lba,
+		unsigned count)
+{
+	if (data_dir == READ)
+		scsi_req->cdb[0] = 0x28;
+	else
+		scsi_req->cdb[0] = 0x2a;
+
+	scsi_req->cdb[1] = 0;
+	scsi_req->cdb[2] = (lba & 0xff000000) >> 24;
+	scsi_req->cdb[3] = (lba & 0xff0000) >> 16;
+	scsi_req->cdb[4] = (lba & 0xff00) >> 8;
+	scsi_req->cdb[5] = (lba & 0xff);
+	scsi_req->cdb[6] = 0;
+	scsi_req->cdb[7] = (count & 0xff00) >> 8;
+	scsi_req->cdb[8] = count & 0xff;
+	scsi_req->cdb[9] = 0;
+}
+
+static void
+skd_prep_zerosize_flush_cdb(struct skd_scsi_request *scsi_req,
+			struct skd_request_context *skreq)
+{
+	skreq->flush_cmd = 1;
+
+	scsi_req->cdb[0] = 0x35;
+	scsi_req->cdb[1] = 0;
+	scsi_req->cdb[2] = 0;
+	scsi_req->cdb[3] = 0;
+	scsi_req->cdb[4] = 0;
+	scsi_req->cdb[5] = 0;
+	scsi_req->cdb[6] = 0;
+	scsi_req->cdb[7] = 0;
+	scsi_req->cdb[8] = 0;
+	scsi_req->cdb[9] = 0;
+}
+
+static void
+skd_prep_discard_cdb(struct skd_scsi_request *scsi_req,
+			struct skd_request_context *skreq,
+			struct page *page,
+			u32 lba, u32 count)
+{
+	char *buf;
+	unsigned long len;
+	struct request *req;
+
+	buf = page_address(page);
+	len = SKD_DISCARD_CDB_LENGTH;
+
+	scsi_req->cdb[0] = UNMAP;
+	scsi_req->cdb[8] = len;
+
+	put_unaligned_be16(6 + 16, &buf[0]);
+	put_unaligned_be16(16, &buf[2]);
+	put_unaligned_be64(lba, &buf[8]);
+	put_unaligned_be32(count, &buf[16]);
+
+	if (!skd_bio) {
+		req = skreq->req;
+		blk_add_request_payload(req, page, len);
+		req->buffer = buf;
+	} else {
+		skreq->bio->bi_io_vec->bv_page = page;
+		skreq->bio->bi_io_vec->bv_offset = 0;
+		skreq->bio->bi_io_vec->bv_len = len;
+
+		skreq->bio->bi_vcnt = 1;
+		skreq->bio->bi_phys_segments = 1;
+	}
+}
+
+static void skd_request_fn_not_online(struct request_queue *q);
+
+static void skd_request_fn(struct request_queue *q)
+{
+	struct skd_device *skdev = q->queuedata;
+	struct skd_fitmsg_context *skmsg = NULL;
+	struct fit_msg_hdr *fmh = NULL;
+	struct skd_request_context *skreq;
+	struct request *req = NULL;
+	struct bio *bio = NULL;
+	struct skd_scsi_request *scsi_req;
+	struct page *page;
+	unsigned long io_flags;
+	int error;
+	u32 lba;
+	u32 count;
+	int data_dir;
+	u32 be_lba;
+	u32 be_count;
+	u64 be_dmaa;
+	u64 cmdctxt;
+	u32 timo_slot;
+	void *cmd_ptr;
+	int flush, fua;
+
+	if (skdev->state != SKD_DRVR_STATE_ONLINE) {
+		skd_request_fn_not_online(q);
+		return;
+	}
+
+	if (skd_queue_stopped(skdev)) {
+		if (skdev->skmsg_free_list == NULL ||
+		    skdev->skreq_free_list == NULL ||
+		    skdev->in_flight >= skdev->queue_low_water_mark)
+			/* There is still some kind of shortage */
+			return;
+
+		skd_unstop_queue(skdev);
+	}
+
+	/*
+	 * Stop conditions:
+	 *  - There are no more native requests
+	 *  - There are already the maximum number of requests in progress
+	 *  - There are no more skd_request_context entries
+	 *  - There are no more FIT msg buffers
+	 */
+	for (;; ) {
+
+		flush = fua = 0;
+
+		if (!skd_bio) {
+			req = blk_peek_request(q);
+
+			/* Are there any native requests to start? */
+			if (req == NULL)
+				break;
+
+			lba = (u32)blk_rq_pos(req);
+			count = blk_rq_sectors(req);
+			data_dir = rq_data_dir(req);
+			io_flags = req->cmd_flags;
+
+			if (io_flags & REQ_FLUSH)
+				flush++;
+
+			if (io_flags & REQ_FUA)
+				fua++;
+
+			VPRINTK(skdev,
+				"new req=%p lba=%u(0x%x) count=%u(0x%x) dir=%d\n",
+				req, lba, lba, count, count, data_dir);
+		} else {
+			if (!list_empty(&skdev->flush_list)) {
+				/* Process data part of FLUSH request. */
+				bio = (struct bio *)skd_flush_cmd_dequeue(skdev);
+				flush++;
+				VPRINTK(skdev, "processing FLUSH request with data.\n");
+			} else {
+				/* peek at our bio queue */
+				bio = bio_list_peek(&skdev->bio_queue);
+			}
+
+			/* Are there any native requests to start? */
+			if (bio == NULL)
+				break;
+
+			lba = (u32)bio->bi_sector;
+			count = bio_sectors(bio);
+			data_dir = bio_data_dir(bio);
+			io_flags = bio->bi_rw;
+
+			VPRINTK(skdev,
+				"new bio=%p lba=%u(0x%x) count=%u(0x%x) dir=%d\n",
+				bio, lba, lba, count, count, data_dir);
+
+			if (io_flags & REQ_FLUSH)
+				flush++;
+
+			if (io_flags & REQ_FUA)
+				fua++;
+		}
+
+		/* At this point we know there is a request
+		 * (from our bio q or req q depending on the way
+		 * the driver is built do checks for resources.
+		 */
+
+		/* Are too many requets already in progress? */
+		if (skdev->in_flight >= skdev->cur_max_queue_depth) {
+			VPRINTK(skdev, "qdepth %d, limit %d\n",
+				skdev->in_flight, skdev->cur_max_queue_depth);
+			break;
+		}
+
+		/* Is a skd_request_context available? */
+		skreq = skdev->skreq_free_list;
+		if (skreq == NULL) {
+			VPRINTK(skdev, "Out of req=%p\n", q);
+			break;
+		}
+		SKD_ASSERT(skreq->state == SKD_REQ_STATE_IDLE);
+		SKD_ASSERT((skreq->id & SKD_ID_INCR) == 0);
+
+		/* Now we check to see if we can get a fit msg */
+		if (skmsg == NULL) {
+			if (skdev->skmsg_free_list == NULL) {
+				VPRINTK(skdev, "Out of msg\n");
+				break;
+			}
+		}
+
+		skreq->flush_cmd = 0;
+		skreq->n_sg = 0;
+		skreq->sg_byte_count = 0;
+		skreq->discard_page = 0;
+
+		/*
+		 * OK to now dequeue request from either bio or q.
+		 *
+		 * At this point we are comitted to either start or reject
+		 * the native request. Note that skd_request_context is
+		 * available but is still at the head of the free list.
+		 */
+		if (!skd_bio) {
+			blk_start_request(req);
+			skreq->req = req;
+			skreq->fitmsg_id = 0;
+		} else {
+			if (unlikely(flush == SKD_FLUSH_DATA_SECOND)) {
+				skreq->bio = bio;
+			} else {
+				skreq->bio = bio_list_pop(&skdev->bio_queue);
+				SKD_ASSERT(skreq->bio == bio);
+				skreq->start_time = jiffies;
+				part_inc_in_flight(&skdev->disk->part0,
+						   bio_data_dir(bio));
+			}
+
+			skreq->fitmsg_id = 0;
+		}
+
+		/* Either a FIT msg is in progress or we have to start one. */
+		if (skmsg == NULL) {
+			/* Are there any FIT msg buffers available? */
+			skmsg = skdev->skmsg_free_list;
+			if (skmsg == NULL) {
+				VPRINTK(skdev, "Out of msg skdev=%p\n", skdev);
+				break;
+			}
+			SKD_ASSERT(skmsg->state == SKD_MSG_STATE_IDLE);
+			SKD_ASSERT((skmsg->id & SKD_ID_INCR) == 0);
+
+			skdev->skmsg_free_list = skmsg->next;
+
+			skmsg->state = SKD_MSG_STATE_BUSY;
+			skmsg->id += SKD_ID_INCR;
+
+			/* Initialize the FIT msg header */
+			fmh = (struct fit_msg_hdr *)skmsg->msg_buf;
+			memset(fmh, 0, sizeof(*fmh));
+			fmh->protocol_id = FIT_PROTOCOL_ID_SOFIT;
+			skmsg->length = sizeof(*fmh);
+		}
+
+		skreq->fitmsg_id = skmsg->id;
+
+		/*
+		 * Note that a FIT msg may have just been started
+		 * but contains no SoFIT requests yet.
+		 */
+
+		/*
+		 * Transcode the request, checking as we go. The outcome of
+		 * the transcoding is represented by the error variable.
+		 */
+		cmd_ptr = &skmsg->msg_buf[skmsg->length];
+		memset(cmd_ptr, 0, 32);
+
+		be_lba = cpu_to_be32(lba);
+		be_count = cpu_to_be32(count);
+		be_dmaa = cpu_to_be64((u64)skreq->sksg_dma_address);
+		cmdctxt = skreq->id + SKD_ID_INCR;
+
+		scsi_req = cmd_ptr;
+		scsi_req->hdr.tag = cmdctxt;
+		scsi_req->hdr.sg_list_dma_address = be_dmaa;
+
+		if (data_dir == READ)
+			skreq->sg_data_dir = SKD_DATA_DIR_CARD_TO_HOST;
+		else
+			skreq->sg_data_dir = SKD_DATA_DIR_HOST_TO_CARD;
+
+		if (io_flags & REQ_DISCARD) {
+			page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
+			if (!page) {
+				pr_err("request_fn:Page allocation failed.\n");
+				skd_end_request(skdev, skreq, -ENOMEM);
+				break;
+			}
+			skreq->discard_page = 1;
+			skd_prep_discard_cdb(scsi_req, skreq, page, lba, count);
+
+		} else if (flush == SKD_FLUSH_ZERO_SIZE_FIRST) {
+			skd_prep_zerosize_flush_cdb(scsi_req, skreq);
+			SKD_ASSERT(skreq->flush_cmd == 1);
+
+		} else {
+			skd_prep_rw_cdb(scsi_req, data_dir, lba, count);
+		}
+
+		if (fua)
+			scsi_req->cdb[1] |= SKD_FUA_NV;
+
+		if ((!skd_bio && !req->bio) ||
+			(skd_bio && flush == SKD_FLUSH_ZERO_SIZE_FIRST))
+			goto skip_sg;
+
+		error = skd_preop_sg_list(skdev, skreq);
+
+		if (error != 0) {
+			/*
+			 * Complete the native request with error.
+			 * Note that the request context is still at the
+			 * head of the free list, and that the SoFIT request
+			 * was encoded into the FIT msg buffer but the FIT
+			 * msg length has not been updated. In short, the
+			 * only resource that has been allocated but might
+			 * not be used is that the FIT msg could be empty.
+			 */
+			DPRINTK(skdev, "error Out\n");
+			skd_end_request(skdev, skreq, error);
+			continue;
+		}
+
+skip_sg:
+		scsi_req->hdr.sg_list_len_bytes =
+			cpu_to_be32(skreq->sg_byte_count);
+
+		/* Complete resource allocations. */
+		skdev->skreq_free_list = skreq->next;
+		skreq->state = SKD_REQ_STATE_BUSY;
+		skreq->id += SKD_ID_INCR;
+
+		skmsg->length += sizeof(struct skd_scsi_request);
+		fmh->num_protocol_cmds_coalesced++;
+
+		/*
+		 * Update the active request counts.
+		 * Capture the timeout timestamp.
+		 */
+		skreq->timeout_stamp = skdev->timeout_stamp;
+		timo_slot = skreq->timeout_stamp & SKD_TIMEOUT_SLOT_MASK;
+		skdev->timeout_slot[timo_slot]++;
+		skdev->in_flight++;
+		VPRINTK(skdev, "req=0x%x busy=%d\n",
+			skreq->id, skdev->in_flight);
+
+		/*
+		 * If the FIT msg buffer is full send it.
+		 */
+		if (skmsg->length >= SKD_N_FITMSG_BYTES ||
+		    fmh->num_protocol_cmds_coalesced >= skd_max_req_per_msg) {
+			skd_send_fitmsg(skdev, skmsg);
+			skmsg = NULL;
+			fmh = NULL;
+		}
+	}
+
+	/*
+	 * Is a FIT msg in progress? If it is empty put the buffer back
+	 * on the free list. If it is non-empty send what we got.
+	 * This minimizes latency when there are fewer requests than
+	 * what fits in a FIT msg.
+	 */
+	if (skmsg != NULL) {
+		/* Bigger than just a FIT msg header? */
+		if (skmsg->length > sizeof(struct fit_msg_hdr)) {
+			VPRINTK(skdev, "sending msg=%p, len %d\n",
+				skmsg, skmsg->length);
+			skd_send_fitmsg(skdev, skmsg);
+		} else {
+			/*
+			 * The FIT msg is empty. It means we got started
+			 * on the msg, but the requests were rejected.
+			 */
+			skmsg->state = SKD_MSG_STATE_IDLE;
+			skmsg->id += SKD_ID_INCR;
+			skmsg->next = skdev->skmsg_free_list;
+			skdev->skmsg_free_list = skmsg;
+		}
+		skmsg = NULL;
+		fmh = NULL;
+	}
+
+	/*
+	 * If req is non-NULL it means there is something to do but
+	 * we are out of a resource.
+	 */
+	if (((!skd_bio) && req) ||
+	    ((skd_bio) && bio_list_peek(&skdev->bio_queue)))
+		skd_stop_queue(skdev);
+}
+
+static void skd_end_request_blk(struct skd_device *skdev,
+				struct skd_request_context *skreq, int error)
+{
+	struct request *req = skreq->req;
+	unsigned int io_flags = req->cmd_flags;
+
+	if ((io_flags & REQ_DISCARD) &&
+		(skreq->discard_page == 1)) {
+		VPRINTK(skdev, "skd_end_request_blk, free the page!");
+		free_page((unsigned long)req->buffer);
+		req->buffer = NULL;
+	}
+
+	if (unlikely(error)) {
+		struct request *req = skreq->req;
+		char *cmd = (rq_data_dir(req) == READ) ? "read" : "write";
+		u32 lba = (u32)blk_rq_pos(req);
+		u32 count = blk_rq_sectors(req);
+
+		pr_err("(%s): Error cmd=%s sect=%u count=%u id=0x%x\n",
+		       skd_name(skdev), cmd, lba, count, skreq->id);
+	} else
+		VPRINTK(skdev, "id=0x%x error=%d\n", skreq->id, error);
+
+	__blk_end_request_all(skreq->req, error);
+}
+
+static int skd_preop_sg_list_blk(struct skd_device *skdev,
+				 struct skd_request_context *skreq)
+{
+	struct request *req = skreq->req;
+	int writing = skreq->sg_data_dir == SKD_DATA_DIR_HOST_TO_CARD;
+	int pci_dir = writing ? PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE;
+	struct scatterlist *sg = &skreq->sg[0];
+	int n_sg;
+	int i;
+
+	skreq->sg_byte_count = 0;
+
+	/* SKD_ASSERT(skreq->sg_data_dir == SKD_DATA_DIR_HOST_TO_CARD ||
+		   skreq->sg_data_dir == SKD_DATA_DIR_CARD_TO_HOST); */
+
+	n_sg = blk_rq_map_sg(skdev->queue, req, sg);
+	if (n_sg <= 0)
+		return -EINVAL;
+
+	/*
+	 * Map scatterlist to PCI bus addresses.
+	 * Note PCI might change the number of entries.
+	 */
+	n_sg = pci_map_sg(skdev->pdev, sg, n_sg, pci_dir);
+	if (n_sg <= 0)
+		return -EINVAL;
+
+	SKD_ASSERT(n_sg <= skdev->sgs_per_request);
+
+	skreq->n_sg = n_sg;
+
+	for (i = 0; i < n_sg; i++) {
+		struct fit_sg_descriptor *sgd = &skreq->sksg_list[i];
+		u32 cnt = sg_dma_len(&sg[i]);
+		uint64_t dma_addr = sg_dma_address(&sg[i]);
+
+		sgd->control = FIT_SGD_CONTROL_NOT_LAST;
+		sgd->byte_count = cnt;
+		skreq->sg_byte_count += cnt;
+		sgd->host_side_addr = dma_addr;
+		sgd->dev_side_addr = 0;
+	}
+
+	skreq->sksg_list[n_sg - 1].next_desc_ptr = 0LL;
+	skreq->sksg_list[n_sg - 1].control = FIT_SGD_CONTROL_LAST;
+
+	if (unlikely(skdev->dbg_level > 1)) {
+		VPRINTK(skdev, "skreq=%x sksg_list=%p sksg_dma=%llx\n",
+			skreq->id, skreq->sksg_list, skreq->sksg_dma_address);
+		for (i = 0; i < n_sg; i++) {
+			struct fit_sg_descriptor *sgd = &skreq->sksg_list[i];
+			VPRINTK(skdev, "  sg[%d] count=%u ctrl=0x%x "
+				"addr=0x%llx next=0x%llx\n",
+				i, sgd->byte_count, sgd->control,
+				sgd->host_side_addr, sgd->next_desc_ptr);
+		}
+	}
+
+	return 0;
+}
+
+static void skd_postop_sg_list_blk(struct skd_device *skdev,
+				   struct skd_request_context *skreq)
+{
+	int writing = skreq->sg_data_dir == SKD_DATA_DIR_HOST_TO_CARD;
+	int pci_dir = writing ? PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE;
+
+	/*
+	 * restore the next ptr for next IO request so we
+	 * don't have to set it every time.
+	 */
+	skreq->sksg_list[skreq->n_sg - 1].next_desc_ptr =
+		skreq->sksg_dma_address +
+		((skreq->n_sg) * sizeof(struct fit_sg_descriptor));
+	pci_unmap_sg(skdev->pdev, &skreq->sg[0], skreq->n_sg, pci_dir);
+}
+
+static void skd_end_request_bio(struct skd_device *skdev,
+				struct skd_request_context *skreq, int error)
+{
+	struct bio *bio = skreq->bio;
+	int rw = bio_data_dir(bio);
+	unsigned long io_flags = bio->bi_rw;
+
+	if ((io_flags & REQ_DISCARD) &&
+		(skreq->discard_page == 1)) {
+		VPRINTK(skdev, "biomode: skd_end_request: freeing DISCARD page.\n");
+		free_page((unsigned long)page_address(bio->bi_io_vec->bv_page));
+	}
+
+	if (unlikely(error)) {
+		u32 lba = (u32)skreq->bio->bi_sector;
+		u32 count = bio_sectors(skreq->bio);
+		char *cmd = (rw == WRITE) ? "write" : "read";
+		pr_err("(%s): Error cmd=%s sect=%u count=%u id=0x%x\n",
+		       skd_name(skdev), cmd, lba, count, skreq->id);
+	}
+	{
+		int cpu = part_stat_lock();
+
+		if (likely(!error)) {
+			part_stat_inc(cpu, &skdev->disk->part0, ios[rw]);
+			part_stat_add(cpu, &skdev->disk->part0, sectors[rw],
+				      bio_sectors(bio));
+		}
+		part_stat_add(cpu, &skdev->disk->part0, ticks[rw],
+			      jiffies - skreq->start_time);
+		part_dec_in_flight(&skdev->disk->part0, rw);
+		part_stat_unlock();
+	}
+
+	VPRINTK(skdev, "id=0x%x error=%d\n", skreq->id, error);
+
+	bio_endio(skreq->bio, error);
+}
+
+static int skd_preop_sg_list_bio(struct skd_device *skdev,
+				 struct skd_request_context *skreq)
+{
author	Akhil Bhansali <abhansali@stec-inc.com>	2013-10-15 14:19:07 -0600
committer	Jens Axboe <axboe@kernel.dk>	2013-11-08 09:10:28 -0700
commit	e67f86b31ae5be8a88bec27b5ecb18dc2ffc9c56 (patch)
tree	8e0cf5c5dd6a266edbce015ffc3e23b7c3e8cf37 /drivers/block/skd_main.c
parent	0317cd6de852a70e0374e7eb40a013072274386f (diff)