summaryrefslogtreecommitdiffstats
path: root/drivers/block/skd_main.c
diff options
context:
space:
mode:
authorAkhil Bhansali <abhansali@stec-inc.com>2013-10-15 14:19:07 -0600
committerJens Axboe <axboe@kernel.dk>2013-11-08 09:10:28 -0700
commite67f86b31ae5be8a88bec27b5ecb18dc2ffc9c56 (patch)
tree8e0cf5c5dd6a266edbce015ffc3e23b7c3e8cf37 /drivers/block/skd_main.c
parent0317cd6de852a70e0374e7eb40a013072274386f (diff)
Add support for sTec's pci-e flash card Kronos
Signed-off-by: Akhil Bhansali <abhansali@stec-inc.com> Signed-off-by: Ramprasad Chinthekindi <rchinthekindi@stec-inc.com> Reviewed-by: Jeff Moyer <jmoyer@redhat.com> Folded patch, contributions to clean up this driver from: Jens Axboe Dan Carpenter Andrew Morton Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'drivers/block/skd_main.c')
-rw-r--r--drivers/block/skd_main.c5817
1 files changed, 5817 insertions, 0 deletions
diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c
new file mode 100644
index 000000000000..3110f68ecedd
--- /dev/null
+++ b/drivers/block/skd_main.c
@@ -0,0 +1,5817 @@
+/* Copyright 2012 STEC, Inc.
+ *
+ * This file is licensed under the terms of the 3-clause
+ * BSD License (http://opensource.org/licenses/BSD-3-Clause)
+ * or the GNU GPL-2.0 (http://www.gnu.org/licenses/gpl-2.0.html),
+ * at your option. Both licenses are also available in the LICENSE file
+ * distributed with this project. This file may not be copied, modified,
+ * or distributed except in accordance with those terms.
+ * Gordoni Waidhofer <gwaidhofer@stec-inc.com>
+ * Initial Driver Design!
+ * Thomas Swann <tswann@stec-inc.com>
+ * Interrupt handling.
+ * Ramprasad Chinthekindi <rchinthekindi@stec-inc.com>
+ * biomode implementation.
+ * Akhil Bhansali <abhansali@stec-inc.com>
+ * Added support for DISCARD / FLUSH and FUA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/blkdev.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/compiler.h>
+#include <linux/workqueue.h>
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/time.h>
+#include <linux/hdreg.h>
+#include <linux/dma-mapping.h>
+#include <linux/completion.h>
+#include <linux/scatterlist.h>
+#include <linux/version.h>
+#include <linux/err.h>
+#include <linux/scatterlist.h>
+#include <linux/aer.h>
+#include <linux/ctype.h>
+#include <linux/wait.h>
+#include <linux/uio.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_tcq.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/sg.h>
+#include <linux/io.h>
+#include <linux/uaccess.h>
+#include <asm-generic/unaligned.h>
+
+#include "skd_s1120.h"
+
+static int skd_dbg_level;
+static int skd_isr_comp_limit = 4;
+
+enum {
+ STEC_LINK_2_5GTS = 0,
+ STEC_LINK_5GTS = 1,
+ STEC_LINK_8GTS = 2,
+ STEC_LINK_UNKNOWN = 0xFF
+};
+
+enum {
+ SKD_FLUSH_INITIALIZER,
+ SKD_FLUSH_ZERO_SIZE_FIRST,
+ SKD_FLUSH_DATA_SECOND,
+};
+
+#define DPRINTK(skdev, fmt, args ...) \
+ do { \
+ if (unlikely((skdev)->dbg_level > 0)) { \
+ pr_err("%s:%s:%d " fmt, (skdev)->name, \
+ __func__, __LINE__, ## args); \
+ } \
+ } while (0)
+
+#define SKD_ASSERT(expr) \
+ do { \
+ if (unlikely(!(expr))) { \
+ pr_err("Assertion failed! %s,%s,%s,line=%d\n", \
+ # expr, __FILE__, __func__, __LINE__); \
+ } \
+ } while (0)
+
+#define VPRINTK(skdev, fmt, args ...) \
+ do { \
+ if (unlikely((skdev)->dbg_level > 1)) { \
+ pr_err("%s:%s:%d " fmt, (skdev)->name, \
+ __func__, __LINE__, ## args); \
+ } \
+ } while (0)
+
+
+#define DRV_NAME "skd"
+#define DRV_VERSION "2.2.1"
+#define DRV_BUILD_ID "0260"
+#define PFX DRV_NAME ": "
+#define DRV_BIN_VERSION 0x100
+#define DRV_VER_COMPL "2.2.1." DRV_BUILD_ID
+
+MODULE_AUTHOR("bug-reports: support@stec-inc.com");
+MODULE_LICENSE("Dual BSD/GPL");
+
+MODULE_DESCRIPTION("STEC s1120 PCIe SSD block/BIO driver (b" DRV_BUILD_ID ")");
+MODULE_VERSION(DRV_VERSION "-" DRV_BUILD_ID);
+
+#define PCI_VENDOR_ID_STEC 0x1B39
+#define PCI_DEVICE_ID_S1120 0x0001
+
+#define SKD_FUA_NV (1 << 1)
+#define SKD_MINORS_PER_DEVICE 16
+
+#define SKD_MAX_QUEUE_DEPTH 200u
+
+#define SKD_PAUSE_TIMEOUT (5 * 1000)
+
+#define SKD_N_FITMSG_BYTES (512u)
+
+#define SKD_N_SPECIAL_CONTEXT 32u
+#define SKD_N_SPECIAL_FITMSG_BYTES (128u)
+
+/* SG elements are 32 bytes, so we can make this 4096 and still be under the
+ * 128KB limit. That allows 4096*4K = 16M xfer size
+ */
+#define SKD_N_SG_PER_REQ_DEFAULT 256u
+#define SKD_N_SG_PER_SPECIAL 256u
+
+#define SKD_N_COMPLETION_ENTRY 256u
+#define SKD_N_READ_CAP_BYTES (8u)
+
+#define SKD_N_INTERNAL_BYTES (512u)
+
+/* 5 bits of uniqifier, 0xF800 */
+#define SKD_ID_INCR (0x400)
+#define SKD_ID_TABLE_MASK (3u << 8u)
+#define SKD_ID_RW_REQUEST (0u << 8u)
+#define SKD_ID_INTERNAL (1u << 8u)
+#define SKD_ID_SPECIAL_REQUEST (2u << 8u)
+#define SKD_ID_FIT_MSG (3u << 8u)
+#define SKD_ID_SLOT_MASK 0x00FFu
+#define SKD_ID_SLOT_AND_TABLE_MASK 0x03FFu
+
+#define SKD_N_TIMEOUT_SLOT 4u
+#define SKD_TIMEOUT_SLOT_MASK 3u
+
+#define SKD_N_MAX_SECTORS 2048u
+
+#define SKD_MAX_RETRIES 2u
+
+#define SKD_TIMER_SECONDS(seconds) (seconds)
+#define SKD_TIMER_MINUTES(minutes) ((minutes) * (60))
+
+#define INQ_STD_NBYTES 36
+#define SKD_DISCARD_CDB_LENGTH 24
+
+enum skd_drvr_state {
+ SKD_DRVR_STATE_LOAD,
+ SKD_DRVR_STATE_IDLE,
+ SKD_DRVR_STATE_BUSY,
+ SKD_DRVR_STATE_STARTING,
+ SKD_DRVR_STATE_ONLINE,
+ SKD_DRVR_STATE_PAUSING,
+ SKD_DRVR_STATE_PAUSED,
+ SKD_DRVR_STATE_DRAINING_TIMEOUT,
+ SKD_DRVR_STATE_RESTARTING,
+ SKD_DRVR_STATE_RESUMING,
+ SKD_DRVR_STATE_STOPPING,
+ SKD_DRVR_STATE_FAULT,
+ SKD_DRVR_STATE_DISAPPEARED,
+ SKD_DRVR_STATE_PROTOCOL_MISMATCH,
+ SKD_DRVR_STATE_BUSY_ERASE,
+ SKD_DRVR_STATE_BUSY_SANITIZE,
+ SKD_DRVR_STATE_BUSY_IMMINENT,
+ SKD_DRVR_STATE_WAIT_BOOT,
+ SKD_DRVR_STATE_SYNCING,
+};
+
+#define SKD_WAIT_BOOT_TIMO SKD_TIMER_SECONDS(90u)
+#define SKD_STARTING_TIMO SKD_TIMER_SECONDS(8u)
+#define SKD_RESTARTING_TIMO SKD_TIMER_MINUTES(4u)
+#define SKD_DRAINING_TIMO SKD_TIMER_SECONDS(6u)
+#define SKD_BUSY_TIMO SKD_TIMER_MINUTES(20u)
+#define SKD_STARTED_BUSY_TIMO SKD_TIMER_SECONDS(60u)
+#define SKD_START_WAIT_SECONDS 90u
+
+enum skd_req_state {
+ SKD_REQ_STATE_IDLE,
+ SKD_REQ_STATE_SETUP,
+ SKD_REQ_STATE_BUSY,
+ SKD_REQ_STATE_COMPLETED,
+ SKD_REQ_STATE_TIMEOUT,
+ SKD_REQ_STATE_ABORTED,
+};
+
+enum skd_fit_msg_state {
+ SKD_MSG_STATE_IDLE,
+ SKD_MSG_STATE_BUSY,
+};
+
+enum skd_check_status_action {
+ SKD_CHECK_STATUS_REPORT_GOOD,
+ SKD_CHECK_STATUS_REPORT_SMART_ALERT,
+ SKD_CHECK_STATUS_REQUEUE_REQUEST,
+ SKD_CHECK_STATUS_REPORT_ERROR,
+ SKD_CHECK_STATUS_BUSY_IMMINENT,
+};
+
+struct skd_fitmsg_context {
+ enum skd_fit_msg_state state;
+
+ struct skd_fitmsg_context *next;
+
+ u32 id;
+ u16 outstanding;
+
+ u32 length;
+ u32 offset;
+
+ u8 *msg_buf;
+ dma_addr_t mb_dma_address;
+};
+
+struct skd_request_context {
+ enum skd_req_state state;
+
+ struct skd_request_context *next;
+
+ u16 id;
+ u32 fitmsg_id;
+
+ struct request *req;
+ struct bio *bio;
+ unsigned long start_time;
+ u8 flush_cmd;
+ u8 discard_page;
+
+ u32 timeout_stamp;
+ u8 sg_data_dir;
+ struct scatterlist *sg;
+ u32 n_sg;
+ u32 sg_byte_count;
+
+ struct fit_sg_descriptor *sksg_list;
+ dma_addr_t sksg_dma_address;
+
+ struct fit_completion_entry_v1 completion;
+
+ struct fit_comp_error_info err_info;
+
+};
+#define SKD_DATA_DIR_HOST_TO_CARD 1
+#define SKD_DATA_DIR_CARD_TO_HOST 2
+#define SKD_DATA_DIR_NONE 3 /* especially for DISCARD requests. */
+
+struct skd_special_context {
+ struct skd_request_context req;
+
+ u8 orphaned;
+
+ void *data_buf;
+ dma_addr_t db_dma_address;
+
+ u8 *msg_buf;
+ dma_addr_t mb_dma_address;
+};
+
+struct skd_sg_io {
+ fmode_t mode;
+ void __user *argp;
+
+ struct sg_io_hdr sg;
+
+ u8 cdb[16];
+
+ u32 dxfer_len;
+ u32 iovcnt;
+ struct sg_iovec *iov;
+ struct sg_iovec no_iov_iov;
+
+ struct skd_special_context *skspcl;
+};
+
+typedef enum skd_irq_type {
+ SKD_IRQ_LEGACY,
+ SKD_IRQ_MSI,
+ SKD_IRQ_MSIX
+} skd_irq_type_t;
+
+#define SKD_MAX_BARS 2
+
+struct skd_device {
+ volatile void __iomem *mem_map[SKD_MAX_BARS];
+ resource_size_t mem_phys[SKD_MAX_BARS];
+ u32 mem_size[SKD_MAX_BARS];
+
+ skd_irq_type_t irq_type;
+ u32 msix_count;
+ struct skd_msix_entry *msix_entries;
+
+ struct pci_dev *pdev;
+ int pcie_error_reporting_is_enabled;
+
+ spinlock_t lock;
+ struct gendisk *disk;
+ struct request_queue *queue;
+ struct device *class_dev;
+ int gendisk_on;
+ int sync_done;
+
+ atomic_t device_count;
+ u32 devno;
+ u32 major;
+ char name[32];
+ char isr_name[30];
+
+ enum skd_drvr_state state;
+ u32 drive_state;
+
+ u32 in_flight;
+ u32 cur_max_queue_depth;
+ u32 queue_low_water_mark;
+ u32 dev_max_queue_depth;
+
+ u32 num_fitmsg_context;
+ u32 num_req_context;
+
+ u32 timeout_slot[SKD_N_TIMEOUT_SLOT];
+ u32 timeout_stamp;
+ struct skd_fitmsg_context *skmsg_free_list;
+ struct skd_fitmsg_context *skmsg_table;
+
+ struct skd_request_context *skreq_free_list;
+ struct skd_request_context *skreq_table;
+
+ struct skd_special_context *skspcl_free_list;
+ struct skd_special_context *skspcl_table;
+
+ struct skd_special_context internal_skspcl;
+ u32 read_cap_blocksize;
+ u32 read_cap_last_lba;
+ int read_cap_is_valid;
+ int inquiry_is_valid;
+ u8 inq_serial_num[13]; /*12 chars plus null term */
+ u8 id_str[80]; /* holds a composite name (pci + sernum) */
+
+ u8 skcomp_cycle;
+ u32 skcomp_ix;
+ struct fit_completion_entry_v1 *skcomp_table;
+ struct fit_comp_error_info *skerr_table;
+ dma_addr_t cq_dma_address;
+
+ wait_queue_head_t waitq;
+
+ struct timer_list timer;
+ u32 timer_countdown;
+ u32 timer_substate;
+
+ int n_special;
+ int sgs_per_request;
+ u32 last_mtd;
+
+ u32 proto_ver;
+
+ int dbg_level;
+ u32 connect_time_stamp;
+ int connect_retries;
+#define SKD_MAX_CONNECT_RETRIES 16
+ u32 drive_jiffies;
+
+ u32 timo_slot;
+
+
+ struct work_struct completion_worker;
+
+ struct bio_list bio_queue;
+ int queue_stopped;
+
+ struct list_head flush_list;
+};
+
+#define SKD_FLUSH_JOB "skd-flush-jobs"
+struct kmem_cache *skd_flush_slab;
+
+/*
+ * These commands hold "nonzero size FLUSH bios",
+ * which are enqueud in skdev->flush_list during
+ * completion of "zero size FLUSH commands".
+ * It will be active in biomode.
+ */
+struct skd_flush_cmd {
+ void *cmd;
+ struct list_head flist;
+};
+
+#define SKD_WRITEL(DEV, VAL, OFF) skd_reg_write32(DEV, VAL, OFF)
+#define SKD_READL(DEV, OFF) skd_reg_read32(DEV, OFF)
+#define SKD_WRITEQ(DEV, VAL, OFF) skd_reg_write64(DEV, VAL, OFF)
+
+static inline u32 skd_reg_read32(struct skd_device *skdev, u32 offset)
+{
+ u32 val;
+
+ if (likely(skdev->dbg_level < 2))
+ return readl(skdev->mem_map[1] + offset);
+ else {
+ barrier();
+ val = readl(skdev->mem_map[1] + offset);
+ barrier();
+ VPRINTK(skdev, "offset %x = %x\n", offset, val);
+ return val;
+ }
+
+}
+
+static inline void skd_reg_write32(struct skd_device *skdev, u32 val,
+ u32 offset)
+{
+ if (likely(skdev->dbg_level < 2)) {
+ writel(val, skdev->mem_map[1] + offset);
+ barrier();
+ readl(skdev->mem_map[1] + offset);
+ barrier();
+ } else {
+ barrier();
+ writel(val, skdev->mem_map[1] + offset);
+ barrier();
+ readl(skdev->mem_map[1] + offset);
+ barrier();
+ VPRINTK(skdev, "offset %x = %x\n", offset, val);
+ }
+}
+
+static inline void skd_reg_write64(struct skd_device *skdev, u64 val,
+ u32 offset)
+{
+ if (likely(skdev->dbg_level < 2)) {
+ writeq(val, skdev->mem_map[1] + offset);
+ barrier();
+ readq(skdev->mem_map[1] + offset);
+ barrier();
+ } else {
+ barrier();
+ writeq(val, skdev->mem_map[1] + offset);
+ barrier();
+ readq(skdev->mem_map[1] + offset);
+ barrier();
+ VPRINTK(skdev, "offset %x = %016llx\n", offset, val);
+ }
+}
+
+
+#define SKD_IRQ_DEFAULT SKD_IRQ_MSI
+static int skd_isr_type = SKD_IRQ_DEFAULT;
+
+module_param(skd_isr_type, int, 0444);
+MODULE_PARM_DESC(skd_isr_type, "Interrupt type capability."
+ " (0==legacy, 1==MSI, 2==MSI-X, default==1)");
+
+#define SKD_MAX_REQ_PER_MSG_DEFAULT 1
+static int skd_max_req_per_msg = SKD_MAX_REQ_PER_MSG_DEFAULT;
+
+module_param(skd_max_req_per_msg, int, 0444);
+MODULE_PARM_DESC(skd_max_req_per_msg,
+ "Maximum SCSI requests packed in a single message."
+ " (1-14, default==1)");
+
+#define SKD_MAX_QUEUE_DEPTH_DEFAULT 64
+#define SKD_MAX_QUEUE_DEPTH_DEFAULT_STR "64"
+static int skd_max_queue_depth = SKD_MAX_QUEUE_DEPTH_DEFAULT;
+
+module_param(skd_max_queue_depth, int, 0444);
+MODULE_PARM_DESC(skd_max_queue_depth,
+ "Maximum SCSI requests issued to s1120."
+ " (1-200, default==" SKD_MAX_QUEUE_DEPTH_DEFAULT_STR ")");
+
+static int skd_sgs_per_request = SKD_N_SG_PER_REQ_DEFAULT;
+module_param(skd_sgs_per_request, int, 0444);
+MODULE_PARM_DESC(skd_sgs_per_request,
+ "Maximum SG elements per block request."
+ " (1-4096, default==256)");
+
+static int skd_max_pass_thru = SKD_N_SPECIAL_CONTEXT;
+module_param(skd_max_pass_thru, int, 0444);
+MODULE_PARM_DESC(skd_max_pass_thru,
+ "Maximum SCSI pass-thru at a time." " (1-50, default==32)");
+
+module_param(skd_dbg_level, int, 0444);
+MODULE_PARM_DESC(skd_dbg_level, "s1120 debug level (0,1,2)");
+
+module_param(skd_isr_comp_limit, int, 0444);
+MODULE_PARM_DESC(skd_isr_comp_limit, "s1120 isr comp limit (0=none) default=4");
+
+static int skd_bio;
+module_param(skd_bio, int, 0444);
+MODULE_PARM_DESC(skd_bio,
+ "Register as a bio device instead of block (0, 1) default=0");
+
+/* Major device number dynamically assigned. */
+static u32 skd_major;
+
+static struct skd_device *skd_construct(struct pci_dev *pdev);
+static void skd_destruct(struct skd_device *skdev);
+static const struct block_device_operations skd_blockdev_ops;
+static void skd_send_fitmsg(struct skd_device *skdev,
+ struct skd_fitmsg_context *skmsg);
+static void skd_send_special_fitmsg(struct skd_device *skdev,
+ struct skd_special_context *skspcl);
+static void skd_request_fn(struct request_queue *rq);
+static void skd_end_request(struct skd_device *skdev,
+ struct skd_request_context *skreq, int error);
+static int skd_preop_sg_list(struct skd_device *skdev,
+ struct skd_request_context *skreq);
+static void skd_postop_sg_list(struct skd_device *skdev,
+ struct skd_request_context *skreq);
+
+static void skd_restart_device(struct skd_device *skdev);
+static int skd_quiesce_dev(struct skd_device *skdev);
+static int skd_unquiesce_dev(struct skd_device *skdev);
+static void skd_release_special(struct skd_device *skdev,
+ struct skd_special_context *skspcl);
+static void skd_disable_interrupts(struct skd_device *skdev);
+static void skd_isr_fwstate(struct skd_device *skdev);
+static void skd_recover_requests(struct skd_device *skdev, int requeue);
+static void skd_soft_reset(struct skd_device *skdev);
+
+static const char *skd_name(struct skd_device *skdev);
+const char *skd_drive_state_to_str(int state);
+const char *skd_skdev_state_to_str(enum skd_drvr_state state);
+static void skd_log_skdev(struct skd_device *skdev, const char *event);
+static void skd_log_skmsg(struct skd_device *skdev,
+ struct skd_fitmsg_context *skmsg, const char *event);
+static void skd_log_skreq(struct skd_device *skdev,
+ struct skd_request_context *skreq, const char *event);
+
+/* FLUSH FUA flag handling. */
+static int skd_flush_cmd_enqueue(struct skd_device *, void *);
+static void *skd_flush_cmd_dequeue(struct skd_device *);
+
+
+/*
+ *****************************************************************************
+ * READ/WRITE REQUESTS
+ *****************************************************************************
+ */
+static void skd_stop_queue(struct skd_device *skdev)
+{
+ if (!skd_bio)
+ blk_stop_queue(skdev->queue);
+ else
+ skdev->queue_stopped = 1;
+}
+
+static void skd_unstop_queue(struct skd_device *skdev)
+{
+ if (!skd_bio)
+ queue_flag_clear(QUEUE_FLAG_STOPPED, skdev->queue);
+ else
+ skdev->queue_stopped = 0;
+}
+
+static void skd_start_queue(struct skd_device *skdev)
+{
+ if (!skd_bio) {
+ blk_start_queue(skdev->queue);
+ } else {
+ pr_err("(%s): Starting queue\n", skd_name(skdev));
+ skdev->queue_stopped = 0;
+ skd_request_fn(skdev->queue);
+ }
+}
+
+static int skd_queue_stopped(struct skd_device *skdev)
+{
+ if (!skd_bio)
+ return blk_queue_stopped(skdev->queue);
+ else
+ return skdev->queue_stopped;
+}
+
+static void skd_fail_all_pending_blk(struct skd_device *skdev)
+{
+ struct request_queue *q = skdev->queue;
+ struct request *req;
+
+ for (;; ) {
+ req = blk_peek_request(q);
+ if (req == NULL)
+ break;
+ blk_start_request(req);
+ __blk_end_request_all(req, -EIO);
+ }
+}
+
+static void skd_fail_all_pending_bio(struct skd_device *skdev)
+{
+ struct bio *bio;
+ int error = -EIO;
+
+ for (;; ) {
+ bio = bio_list_pop(&skdev->bio_queue);
+
+ if (bio == NULL)
+ break;
+
+ bio_endio(bio, error);
+ }
+}
+
+static void skd_fail_all_pending(struct skd_device *skdev)
+{
+ if (!skd_bio)
+ skd_fail_all_pending_blk(skdev);
+ else
+ skd_fail_all_pending_bio(skdev);
+}
+
+static void skd_make_request(struct request_queue *q, struct bio *bio)
+{
+ struct skd_device *skdev = q->queuedata;
+ unsigned long flags;
+
+ spin_lock_irqsave(&skdev->lock, flags);
+
+ bio_list_add(&skdev->bio_queue, bio);
+ skd_request_fn(skdev->queue);
+
+ spin_unlock_irqrestore(&skdev->lock, flags);
+}
+
+static void
+skd_prep_rw_cdb(struct skd_scsi_request *scsi_req,
+ int data_dir, unsigned lba,
+ unsigned count)
+{
+ if (data_dir == READ)
+ scsi_req->cdb[0] = 0x28;
+ else
+ scsi_req->cdb[0] = 0x2a;
+
+ scsi_req->cdb[1] = 0;
+ scsi_req->cdb[2] = (lba & 0xff000000) >> 24;
+ scsi_req->cdb[3] = (lba & 0xff0000) >> 16;
+ scsi_req->cdb[4] = (lba & 0xff00) >> 8;
+ scsi_req->cdb[5] = (lba & 0xff);
+ scsi_req->cdb[6] = 0;
+ scsi_req->cdb[7] = (count & 0xff00) >> 8;
+ scsi_req->cdb[8] = count & 0xff;
+ scsi_req->cdb[9] = 0;
+}
+
+static void
+skd_prep_zerosize_flush_cdb(struct skd_scsi_request *scsi_req,
+ struct skd_request_context *skreq)
+{
+ skreq->flush_cmd = 1;
+
+ scsi_req->cdb[0] = 0x35;
+ scsi_req->cdb[1] = 0;
+ scsi_req->cdb[2] = 0;
+ scsi_req->cdb[3] = 0;
+ scsi_req->cdb[4] = 0;
+ scsi_req->cdb[5] = 0;
+ scsi_req->cdb[6] = 0;
+ scsi_req->cdb[7] = 0;
+ scsi_req->cdb[8] = 0;
+ scsi_req->cdb[9] = 0;
+}
+
+static void
+skd_prep_discard_cdb(struct skd_scsi_request *scsi_req,
+ struct skd_request_context *skreq,
+ struct page *page,
+ u32 lba, u32 count)
+{
+ char *buf;
+ unsigned long len;
+ struct request *req;
+
+ buf = page_address(page);
+ len = SKD_DISCARD_CDB_LENGTH;
+
+ scsi_req->cdb[0] = UNMAP;
+ scsi_req->cdb[8] = len;
+
+ put_unaligned_be16(6 + 16, &buf[0]);
+ put_unaligned_be16(16, &buf[2]);
+ put_unaligned_be64(lba, &buf[8]);
+ put_unaligned_be32(count, &buf[16]);
+
+ if (!skd_bio) {
+ req = skreq->req;
+ blk_add_request_payload(req, page, len);
+ req->buffer = buf;
+ } else {
+ skreq->bio->bi_io_vec->bv_page = page;
+ skreq->bio->bi_io_vec->bv_offset = 0;
+ skreq->bio->bi_io_vec->bv_len = len;
+
+ skreq->bio->bi_vcnt = 1;
+ skreq->bio->bi_phys_segments = 1;
+ }
+}
+
+static void skd_request_fn_not_online(struct request_queue *q);
+
+static void skd_request_fn(struct request_queue *q)
+{
+ struct skd_device *skdev = q->queuedata;
+ struct skd_fitmsg_context *skmsg = NULL;
+ struct fit_msg_hdr *fmh = NULL;
+ struct skd_request_context *skreq;
+ struct request *req = NULL;
+ struct bio *bio = NULL;
+ struct skd_scsi_request *scsi_req;
+ struct page *page;
+ unsigned long io_flags;
+ int error;
+ u32 lba;
+ u32 count;
+ int data_dir;
+ u32 be_lba;
+ u32 be_count;
+ u64 be_dmaa;
+ u64 cmdctxt;
+ u32 timo_slot;
+ void *cmd_ptr;
+ int flush, fua;
+
+ if (skdev->state != SKD_DRVR_STATE_ONLINE) {
+ skd_request_fn_not_online(q);
+ return;
+ }
+
+ if (skd_queue_stopped(skdev)) {
+ if (skdev->skmsg_free_list == NULL ||
+ skdev->skreq_free_list == NULL ||
+ skdev->in_flight >= skdev->queue_low_water_mark)
+ /* There is still some kind of shortage */
+ return;
+
+ skd_unstop_queue(skdev);
+ }
+
+ /*
+ * Stop conditions:
+ * - There are no more native requests
+ * - There are already the maximum number of requests in progress
+ * - There are no more skd_request_context entries
+ * - There are no more FIT msg buffers
+ */
+ for (;; ) {
+
+ flush = fua = 0;
+
+ if (!skd_bio) {
+ req = blk_peek_request(q);
+
+ /* Are there any native requests to start? */
+ if (req == NULL)
+ break;
+
+ lba = (u32)blk_rq_pos(req);
+ count = blk_rq_sectors(req);
+ data_dir = rq_data_dir(req);
+ io_flags = req->cmd_flags;
+
+ if (io_flags & REQ_FLUSH)
+ flush++;
+
+ if (io_flags & REQ_FUA)
+ fua++;
+
+ VPRINTK(skdev,
+ "new req=%p lba=%u(0x%x) count=%u(0x%x) dir=%d\n",
+ req, lba, lba, count, count, data_dir);
+ } else {
+ if (!list_empty(&skdev->flush_list)) {
+ /* Process data part of FLUSH request. */
+ bio = (struct bio *)skd_flush_cmd_dequeue(skdev);
+ flush++;
+ VPRINTK(skdev, "processing FLUSH request with data.\n");
+ } else {
+ /* peek at our bio queue */
+ bio = bio_list_peek(&skdev->bio_queue);
+ }
+
+ /* Are there any native requests to start? */
+ if (bio == NULL)
+ break;
+
+ lba = (u32)bio->bi_sector;
+ count = bio_sectors(bio);
+ data_dir = bio_data_dir(bio);
+ io_flags = bio->bi_rw;
+
+ VPRINTK(skdev,
+ "new bio=%p lba=%u(0x%x) count=%u(0x%x) dir=%d\n",
+ bio, lba, lba, count, count, data_dir);
+
+ if (io_flags & REQ_FLUSH)
+ flush++;
+
+ if (io_flags & REQ_FUA)
+ fua++;
+ }
+
+ /* At this point we know there is a request
+ * (from our bio q or req q depending on the way
+ * the driver is built do checks for resources.
+ */
+
+ /* Are too many requets already in progress? */
+ if (skdev->in_flight >= skdev->cur_max_queue_depth) {
+ VPRINTK(skdev, "qdepth %d, limit %d\n",
+ skdev->in_flight, skdev->cur_max_queue_depth);
+ break;
+ }
+
+ /* Is a skd_request_context available? */
+ skreq = skdev->skreq_free_list;
+ if (skreq == NULL) {
+ VPRINTK(skdev, "Out of req=%p\n", q);
+ break;
+ }
+ SKD_ASSERT(skreq->state == SKD_REQ_STATE_IDLE);
+ SKD_ASSERT((skreq->id & SKD_ID_INCR) == 0);
+
+ /* Now we check to see if we can get a fit msg */
+ if (skmsg == NULL) {
+ if (skdev->skmsg_free_list == NULL) {
+ VPRINTK(skdev, "Out of msg\n");
+ break;
+ }
+ }
+
+ skreq->flush_cmd = 0;
+ skreq->n_sg = 0;
+ skreq->sg_byte_count = 0;
+ skreq->discard_page = 0;
+
+ /*
+ * OK to now dequeue request from either bio or q.
+ *
+ * At this point we are comitted to either start or reject
+ * the native request. Note that skd_request_context is
+ * available but is still at the head of the free list.
+ */
+ if (!skd_bio) {
+ blk_start_request(req);
+ skreq->req = req;
+ skreq->fitmsg_id = 0;
+ } else {
+ if (unlikely(flush == SKD_FLUSH_DATA_SECOND)) {
+ skreq->bio = bio;
+ } else {
+ skreq->bio = bio_list_pop(&skdev->bio_queue);
+ SKD_ASSERT(skreq->bio == bio);
+ skreq->start_time = jiffies;
+ part_inc_in_flight(&skdev->disk->part0,
+ bio_data_dir(bio));
+ }
+
+ skreq->fitmsg_id = 0;
+ }
+
+ /* Either a FIT msg is in progress or we have to start one. */
+ if (skmsg == NULL) {
+ /* Are there any FIT msg buffers available? */
+ skmsg = skdev->skmsg_free_list;
+ if (skmsg == NULL) {
+ VPRINTK(skdev, "Out of msg skdev=%p\n", skdev);
+ break;
+ }
+ SKD_ASSERT(skmsg->state == SKD_MSG_STATE_IDLE);
+ SKD_ASSERT((skmsg->id & SKD_ID_INCR) == 0);
+
+ skdev->skmsg_free_list = skmsg->next;
+
+ skmsg->state = SKD_MSG_STATE_BUSY;
+ skmsg->id += SKD_ID_INCR;
+
+ /* Initialize the FIT msg header */
+ fmh = (struct fit_msg_hdr *)skmsg->msg_buf;
+ memset(fmh, 0, sizeof(*fmh));
+ fmh->protocol_id = FIT_PROTOCOL_ID_SOFIT;
+ skmsg->length = sizeof(*fmh);
+ }
+
+ skreq->fitmsg_id = skmsg->id;
+
+ /*
+ * Note that a FIT msg may have just been started
+ * but contains no SoFIT requests yet.
+ */
+
+ /*
+ * Transcode the request, checking as we go. The outcome of
+ * the transcoding is represented by the error variable.
+ */
+ cmd_ptr = &skmsg->msg_buf[skmsg->length];
+ memset(cmd_ptr, 0, 32);
+
+ be_lba = cpu_to_be32(lba);
+ be_count = cpu_to_be32(count);
+ be_dmaa = cpu_to_be64((u64)skreq->sksg_dma_address);
+ cmdctxt = skreq->id + SKD_ID_INCR;
+
+ scsi_req = cmd_ptr;
+ scsi_req->hdr.tag = cmdctxt;
+ scsi_req->hdr.sg_list_dma_address = be_dmaa;
+
+ if (data_dir == READ)
+ skreq->sg_data_dir = SKD_DATA_DIR_CARD_TO_HOST;
+ else
+ skreq->sg_data_dir = SKD_DATA_DIR_HOST_TO_CARD;
+
+ if (io_flags & REQ_DISCARD) {
+ page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
+ if (!page) {
+ pr_err("request_fn:Page allocation failed.\n");
+ skd_end_request(skdev, skreq, -ENOMEM);
+ break;
+ }
+ skreq->discard_page = 1;
+ skd_prep_discard_cdb(scsi_req, skreq, page, lba, count);
+
+ } else if (flush == SKD_FLUSH_ZERO_SIZE_FIRST) {
+ skd_prep_zerosize_flush_cdb(scsi_req, skreq);
+ SKD_ASSERT(skreq->flush_cmd == 1);
+
+ } else {
+ skd_prep_rw_cdb(scsi_req, data_dir, lba, count);
+ }
+
+ if (fua)
+ scsi_req->cdb[1] |= SKD_FUA_NV;
+
+ if ((!skd_bio && !req->bio) ||
+ (skd_bio && flush == SKD_FLUSH_ZERO_SIZE_FIRST))
+ goto skip_sg;
+
+ error = skd_preop_sg_list(skdev, skreq);
+
+ if (error != 0) {
+ /*
+ * Complete the native request with error.
+ * Note that the request context is still at the
+ * head of the free list, and that the SoFIT request
+ * was encoded into the FIT msg buffer but the FIT
+ * msg length has not been updated. In short, the
+ * only resource that has been allocated but might
+ * not be used is that the FIT msg could be empty.
+ */
+ DPRINTK(skdev, "error Out\n");
+ skd_end_request(skdev, skreq, error);
+ continue;
+ }
+
+skip_sg:
+ scsi_req->hdr.sg_list_len_bytes =
+ cpu_to_be32(skreq->sg_byte_count);
+
+ /* Complete resource allocations. */
+ skdev->skreq_free_list = skreq->next;
+ skreq->state = SKD_REQ_STATE_BUSY;
+ skreq->id += SKD_ID_INCR;
+
+ skmsg->length += sizeof(struct skd_scsi_request);
+ fmh->num_protocol_cmds_coalesced++;
+
+ /*
+ * Update the active request counts.
+ * Capture the timeout timestamp.
+ */
+ skreq->timeout_stamp = skdev->timeout_stamp;
+ timo_slot = skreq->timeout_stamp & SKD_TIMEOUT_SLOT_MASK;
+ skdev->timeout_slot[timo_slot]++;
+ skdev->in_flight++;
+ VPRINTK(skdev, "req=0x%x busy=%d\n",
+ skreq->id, skdev->in_flight);
+
+ /*
+ * If the FIT msg buffer is full send it.
+ */
+ if (skmsg->length >= SKD_N_FITMSG_BYTES ||
+ fmh->num_protocol_cmds_coalesced >= skd_max_req_per_msg) {
+ skd_send_fitmsg(skdev, skmsg);
+ skmsg = NULL;
+ fmh = NULL;
+ }
+ }
+
+ /*
+ * Is a FIT msg in progress? If it is empty put the buffer back
+ * on the free list. If it is non-empty send what we got.
+ * This minimizes latency when there are fewer requests than
+ * what fits in a FIT msg.
+ */
+ if (skmsg != NULL) {
+ /* Bigger than just a FIT msg header? */
+ if (skmsg->length > sizeof(struct fit_msg_hdr)) {
+ VPRINTK(skdev, "sending msg=%p, len %d\n",
+ skmsg, skmsg->length);
+ skd_send_fitmsg(skdev, skmsg);
+ } else {
+ /*
+ * The FIT msg is empty. It means we got started
+ * on the msg, but the requests were rejected.
+ */
+ skmsg->state = SKD_MSG_STATE_IDLE;
+ skmsg->id += SKD_ID_INCR;
+ skmsg->next = skdev->skmsg_free_list;
+ skdev->skmsg_free_list = skmsg;
+ }
+ skmsg = NULL;
+ fmh = NULL;
+ }
+
+ /*
+ * If req is non-NULL it means there is something to do but
+ * we are out of a resource.
+ */
+ if (((!skd_bio) && req) ||
+ ((skd_bio) && bio_list_peek(&skdev->bio_queue)))
+ skd_stop_queue(skdev);
+}
+
+static void skd_end_request_blk(struct skd_device *skdev,
+ struct skd_request_context *skreq, int error)
+{
+ struct request *req = skreq->req;
+ unsigned int io_flags = req->cmd_flags;
+
+ if ((io_flags & REQ_DISCARD) &&
+ (skreq->discard_page == 1)) {
+ VPRINTK(skdev, "skd_end_request_blk, free the page!");
+ free_page((unsigned long)req->buffer);
+ req->buffer = NULL;
+ }
+
+ if (unlikely(error)) {
+ struct request *req = skreq->req;
+ char *cmd = (rq_data_dir(req) == READ) ? "read" : "write";
+ u32 lba = (u32)blk_rq_pos(req);
+ u32 count = blk_rq_sectors(req);
+
+ pr_err("(%s): Error cmd=%s sect=%u count=%u id=0x%x\n",
+ skd_name(skdev), cmd, lba, count, skreq->id);
+ } else
+ VPRINTK(skdev, "id=0x%x error=%d\n", skreq->id, error);
+
+ __blk_end_request_all(skreq->req, error);
+}
+
+static int skd_preop_sg_list_blk(struct skd_device *skdev,
+ struct skd_request_context *skreq)
+{
+ struct request *req = skreq->req;
+ int writing = skreq->sg_data_dir == SKD_DATA_DIR_HOST_TO_CARD;
+ int pci_dir = writing ? PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE;
+ struct scatterlist *sg = &skreq->sg[0];
+ int n_sg;
+ int i;
+
+ skreq->sg_byte_count = 0;
+
+ /* SKD_ASSERT(skreq->sg_data_dir == SKD_DATA_DIR_HOST_TO_CARD ||
+ skreq->sg_data_dir == SKD_DATA_DIR_CARD_TO_HOST); */
+
+ n_sg = blk_rq_map_sg(skdev->queue, req, sg);
+ if (n_sg <= 0)
+ return -EINVAL;
+
+ /*
+ * Map scatterlist to PCI bus addresses.
+ * Note PCI might change the number of entries.
+ */
+ n_sg = pci_map_sg(skdev->pdev, sg, n_sg, pci_dir);
+ if (n_sg <= 0)
+ return -EINVAL;
+
+ SKD_ASSERT(n_sg <= skdev->sgs_per_request);
+
+ skreq->n_sg = n_sg;
+
+ for (i = 0; i < n_sg; i++) {
+ struct fit_sg_descriptor *sgd = &skreq->sksg_list[i];
+ u32 cnt = sg_dma_len(&sg[i]);
+ uint64_t dma_addr = sg_dma_address(&sg[i]);
+
+ sgd->control = FIT_SGD_CONTROL_NOT_LAST;
+ sgd->byte_count = cnt;
+ skreq->sg_byte_count += cnt;
+ sgd->host_side_addr = dma_addr;
+ sgd->dev_side_addr = 0;
+ }
+
+ skreq->sksg_list[n_sg - 1].next_desc_ptr = 0LL;
+ skreq->sksg_list[n_sg - 1].control = FIT_SGD_CONTROL_LAST;
+
+ if (unlikely(skdev->dbg_level > 1)) {
+ VPRINTK(skdev, "skreq=%x sksg_list=%p sksg_dma=%llx\n",
+ skreq->id, skreq->sksg_list, skreq->sksg_dma_address);
+ for (i = 0; i < n_sg; i++) {
+ struct fit_sg_descriptor *sgd = &skreq->sksg_list[i];
+ VPRINTK(skdev, " sg[%d] count=%u ctrl=0x%x "
+ "addr=0x%llx next=0x%llx\n",
+ i, sgd->byte_count, sgd->control,
+ sgd->host_side_addr, sgd->next_desc_ptr);
+ }
+ }
+
+ return 0;
+}
+
+static void skd_postop_sg_list_blk(struct skd_device *skdev,
+ struct skd_request_context *skreq)
+{
+ int writing = skreq->sg_data_dir == SKD_DATA_DIR_HOST_TO_CARD;
+ int pci_dir = writing ? PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE;
+
+ /*
+ * restore the next ptr for next IO request so we
+ * don't have to set it every time.
+ */
+ skreq->sksg_list[skreq->n_sg - 1].next_desc_ptr =
+ skreq->sksg_dma_address +
+ ((skreq->n_sg) * sizeof(struct fit_sg_descriptor));
+ pci_unmap_sg(skdev->pdev, &skreq->sg[0], skreq->n_sg, pci_dir);
+}
+
+static void skd_end_request_bio(struct skd_device *skdev,
+ struct skd_request_context *skreq, int error)
+{
+ struct bio *bio = skreq->bio;
+ int rw = bio_data_dir(bio);
+ unsigned long io_flags = bio->bi_rw;
+
+ if ((io_flags & REQ_DISCARD) &&
+ (skreq->discard_page == 1)) {
+ VPRINTK(skdev, "biomode: skd_end_request: freeing DISCARD page.\n");
+ free_page((unsigned long)page_address(bio->bi_io_vec->bv_page));
+ }
+
+ if (unlikely(error)) {
+ u32 lba = (u32)skreq->bio->bi_sector;
+ u32 count = bio_sectors(skreq->bio);
+ char *cmd = (rw == WRITE) ? "write" : "read";
+ pr_err("(%s): Error cmd=%s sect=%u count=%u id=0x%x\n",
+ skd_name(skdev), cmd, lba, count, skreq->id);
+ }
+ {
+ int cpu = part_stat_lock();
+
+ if (likely(!error)) {
+ part_stat_inc(cpu, &skdev->disk->part0, ios[rw]);
+ part_stat_add(cpu, &skdev->disk->part0, sectors[rw],
+ bio_sectors(bio));
+ }
+ part_stat_add(cpu, &skdev->disk->part0, ticks[rw],
+ jiffies - skreq->start_time);
+ part_dec_in_flight(&skdev->disk->part0, rw);
+ part_stat_unlock();
+ }
+
+ VPRINTK(skdev, "id=0x%x error=%d\n", skreq->id, error);
+
+ bio_endio(skreq->bio, error);
+}
+
+static int skd_preop_sg_list_bio(struct skd_device *skdev,
+ struct skd_request_context *skreq)
+{