1 files changed, 1080 insertions, 254 deletions
diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c
index 24778ba4b6e8..29650ba669da 100644
--- a/drivers/scsi/megaraid/megaraid_sas_fusion.c
+++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c
@@ -47,6 +47,7 @@
 #include <linux/blkdev.h>
 #include <linux/mutex.h>
 #include <linux/poll.h>
+#include <linux/vmalloc.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_cmnd.h>
@@ -181,32 +182,44 @@ inline void megasas_return_cmd_fusion(struct megasas_instance *instance,
 	struct megasas_cmd_fusion *cmd)
 {
 	cmd->scmd = NULL;
-	memset(cmd->io_request, 0, sizeof(struct MPI2_RAID_SCSI_IO_REQUEST));
+	memset(cmd->io_request, 0, MEGA_MPI2_RAID_DEFAULT_IO_FRAME_SIZE);
+	cmd->r1_alt_dev_handle = MR_DEVHANDLE_INVALID;
+	cmd->cmd_completed = false;
 }
 
 /**
  * megasas_fire_cmd_fusion -	Sends command to the FW
+ * @instance:			Adapter soft state
+ * @req_desc:			32bit or 64bit Request descriptor
+ *
+ * Perform PCI Write. Ventura supports 32 bit Descriptor.
+ * Prior to Ventura (12G) MR controller supports 64 bit Descriptor.
  */
+
 static void
 megasas_fire_cmd_fusion(struct megasas_instance *instance,
 		union MEGASAS_REQUEST_DESCRIPTOR_UNION *req_desc)
 {
+	if (instance->is_ventura)
+		writel(le32_to_cpu(req_desc->u.low),
+			&instance->reg_set->inbound_single_queue_port);
+	else {
 #if defined(writeq) && defined(CONFIG_64BIT)
-	u64 req_data = (((u64)le32_to_cpu(req_desc->u.high) << 32) |
-			le32_to_cpu(req_desc->u.low));
+		u64 req_data = (((u64)le32_to_cpu(req_desc->u.high) << 32) |
+				le32_to_cpu(req_desc->u.low));
 
-	writeq(req_data, &instance->reg_set->inbound_low_queue_port);
+		writeq(req_data, &instance->reg_set->inbound_low_queue_port);
 #else
-	unsigned long flags;
-
-	spin_lock_irqsave(&instance->hba_lock, flags);
-	writel(le32_to_cpu(req_desc->u.low),
-		&instance->reg_set->inbound_low_queue_port);
-	writel(le32_to_cpu(req_desc->u.high),
-		&instance->reg_set->inbound_high_queue_port);
-	mmiowb();
-	spin_unlock_irqrestore(&instance->hba_lock, flags);
+		unsigned long flags;
+		spin_lock_irqsave(&instance->hba_lock, flags);
+		writel(le32_to_cpu(req_desc->u.low),
+			&instance->reg_set->inbound_low_queue_port);
+		writel(le32_to_cpu(req_desc->u.high),
+			&instance->reg_set->inbound_high_queue_port);
+		mmiowb();
+		spin_unlock_irqrestore(&instance->hba_lock, flags);
 #endif
+	}
 }
 
 /**
@@ -229,7 +242,10 @@ megasas_fusion_update_can_queue(struct megasas_instance *instance, int fw_boot_c
 
 	reg_set = instance->reg_set;
 
-	cur_max_fw_cmds = readl(&instance->reg_set->outbound_scratch_pad_3) & 0x00FFFF;
+	/* ventura FW does not fill outbound_scratch_pad_3 with queue depth */
+	if (!instance->is_ventura)
+		cur_max_fw_cmds =
+		readl(&instance->reg_set->outbound_scratch_pad_3) & 0x00FFFF;
 
 	if (dual_qdepth_disable || !cur_max_fw_cmds)
 		cur_max_fw_cmds = instance->instancet->read_fw_status_reg(reg_set) & 0x00FFFF;
@@ -243,7 +259,7 @@ megasas_fusion_update_can_queue(struct megasas_instance *instance, int fw_boot_c
 
 	if (fw_boot_context == OCR_CONTEXT) {
 		cur_max_fw_cmds = cur_max_fw_cmds - 1;
-		if (cur_max_fw_cmds <= instance->max_fw_cmds) {
+		if (cur_max_fw_cmds < instance->max_fw_cmds) {
 			instance->cur_can_queue =
 				cur_max_fw_cmds - (MEGASAS_FUSION_INTERNAL_CMDS +
 						MEGASAS_FUSION_IOCTL_CMDS);
@@ -255,7 +271,8 @@ megasas_fusion_update_can_queue(struct megasas_instance *instance, int fw_boot_c
 		instance->ldio_threshold = ldio_threshold;
 
 		if (!instance->is_rdpq)
-			instance->max_fw_cmds = min_t(u16, instance->max_fw_cmds, 1024);
+			instance->max_fw_cmds =
+				min_t(u16, instance->max_fw_cmds, 1024);
 
 		if (reset_devices)
 			instance->max_fw_cmds = min(instance->max_fw_cmds,
@@ -271,7 +288,14 @@ megasas_fusion_update_can_queue(struct megasas_instance *instance, int fw_boot_c
 				(MEGASAS_FUSION_INTERNAL_CMDS +
 				MEGASAS_FUSION_IOCTL_CMDS);
 		instance->cur_can_queue = instance->max_scsi_cmds;
+		instance->host->can_queue = instance->cur_can_queue;
 	}
+
+	if (instance->is_ventura)
+		instance->max_mpt_cmds =
+		instance->max_fw_cmds * RAID_1_PEER_CMDS;
+	else
+		instance->max_mpt_cmds = instance->max_fw_cmds;
 }
 /**
  * megasas_free_cmds_fusion -	Free all the cmds in the free cmd pool
@@ -285,7 +309,7 @@ megasas_free_cmds_fusion(struct megasas_instance *instance)
 	struct megasas_cmd_fusion *cmd;
 
 	/* SG, Sense */
-	for (i = 0; i < instance->max_fw_cmds; i++) {
+	for (i = 0; i < instance->max_mpt_cmds; i++) {
 		cmd = fusion->cmd_list[i];
 		if (cmd) {
 			if (cmd->sg_frame)
@@ -329,7 +353,7 @@ megasas_free_cmds_fusion(struct megasas_instance *instance)
 
 
 	/* cmd_list */
-	for (i = 0; i < instance->max_fw_cmds; i++)
+	for (i = 0; i < instance->max_mpt_cmds; i++)
 		kfree(fusion->cmd_list[i]);
 
 	kfree(fusion->cmd_list);
@@ -343,7 +367,7 @@ megasas_free_cmds_fusion(struct megasas_instance *instance)
 static int megasas_create_sg_sense_fusion(struct megasas_instance *instance)
 {
 	int i;
-	u32 max_cmd;
+	u16 max_cmd;
 	struct fusion_context *fusion;
 	struct megasas_cmd_fusion *cmd;
 
@@ -353,7 +377,8 @@ static int megasas_create_sg_sense_fusion(struct megasas_instance *instance)
 
 	fusion->sg_dma_pool =
 			pci_pool_create("mr_sg", instance->pdev,
-				instance->max_chain_frame_sz, 4, 0);
+				instance->max_chain_frame_sz,
+				MR_DEFAULT_NVME_PAGE_SIZE, 0);
 	/* SCSI_SENSE_BUFFERSIZE  = 96 bytes */
 	fusion->sense_dma_pool =
 			pci_pool_create("mr_sense", instance->pdev,
@@ -381,33 +406,47 @@ static int megasas_create_sg_sense_fusion(struct megasas_instance *instance)
 			return -ENOMEM;
 		}
 	}
+
+	/* create sense buffer for the raid 1/10 fp */
+	for (i = max_cmd; i < instance->max_mpt_cmds; i++) {
+		cmd = fusion->cmd_list[i];
+		cmd->sense = pci_pool_alloc(fusion->sense_dma_pool,
+			GFP_KERNEL, &cmd->sense_phys_addr);
+		if (!cmd->sense) {
+			dev_err(&instance->pdev->dev,
+				"Failed from %s %d\n",  __func__, __LINE__);
+			return -ENOMEM;
+		}
+	}
+
 	return 0;
 }
 
 int
 megasas_alloc_cmdlist_fusion(struct megasas_instance *instance)
 {
-	u32 max_cmd, i;
+	u32 max_mpt_cmd, i;
 	struct fusion_context *fusion;
 
 	fusion = instance->ctrl_context;
 
-	max_cmd = instance->max_fw_cmds;
+	max_mpt_cmd = instance->max_mpt_cmds;
 
 	/*
 	 * fusion->cmd_list is an array of struct megasas_cmd_fusion pointers.
 	 * Allocate the dynamic array first and then allocate individual
 	 * commands.
 	 */
-	fusion->cmd_list = kzalloc(sizeof(struct megasas_cmd_fusion *) * max_cmd,
-						GFP_KERNEL);
+	fusion->cmd_list =
+		kzalloc(sizeof(struct megasas_cmd_fusion *) * max_mpt_cmd,
+			GFP_KERNEL);
 	if (!fusion->cmd_list) {
 		dev_err(&instance->pdev->dev,
 			"Failed from %s %d\n",  __func__, __LINE__);
 		return -ENOMEM;
 	}
 
-	for (i = 0; i < max_cmd; i++) {
+	for (i = 0; i < max_mpt_cmd; i++) {
 		fusion->cmd_list[i] = kzalloc(sizeof(struct megasas_cmd_fusion),
 					      GFP_KERNEL);
 		if (!fusion->cmd_list[i]) {
@@ -539,7 +578,7 @@ megasas_alloc_rdpq_fusion(struct megasas_instance *instance)
 		}
 
 		fusion->rdpq_virt[i].RDPQBaseAddress =
-			fusion->reply_frames_desc_phys[i];
+			cpu_to_le64(fusion->reply_frames_desc_phys[i]);
 
 		reply_desc = fusion->reply_frames_desc[i];
 		for (j = 0; j < fusion->reply_q_depth; j++, reply_desc++)
@@ -642,13 +681,14 @@ megasas_alloc_cmds_fusion(struct megasas_instance *instance)
 	 */
 
 	/* SMID 0 is reserved. Set SMID/index from 1 */
-	for (i = 0; i < instance->max_fw_cmds; i++) {
+	for (i = 0; i < instance->max_mpt_cmds; i++) {
 		cmd = fusion->cmd_list[i];
 		offset = MEGA_MPI2_RAID_DEFAULT_IO_FRAME_SIZE * i;
 		memset(cmd, 0, sizeof(struct megasas_cmd_fusion));
 		cmd->index = i + 1;
 		cmd->scmd = NULL;
-		cmd->sync_cmd_idx = (i >= instance->max_scsi_cmds) ?
+		cmd->sync_cmd_idx =
+		(i >= instance->max_scsi_cmds && i < instance->max_fw_cmds) ?
 				(i - instance->max_scsi_cmds) :
 				(u32)ULONG_MAX; /* Set to Invalid */
 		cmd->instance = instance;
@@ -658,6 +698,7 @@ megasas_alloc_cmds_fusion(struct megasas_instance *instance)
 		memset(cmd->io_request, 0,
 		       sizeof(struct MPI2_RAID_SCSI_IO_REQUEST));
 		cmd->io_request_phys_addr = io_req_base_phys + offset;
+		cmd->r1_alt_dev_handle = MR_DEVHANDLE_INVALID;
 	}
 
 	if (megasas_create_sg_sense_fusion(instance))
@@ -725,6 +766,7 @@ megasas_ioc_init_fusion(struct megasas_instance *instance)
 	const char *sys_info;
 	MFI_CAPABILITIES *drv_ops;
 	u32 scratch_pad_2;
+	unsigned long flags;
 
 	fusion = instance->ctrl_context;
 
@@ -781,6 +823,7 @@ megasas_ioc_init_fusion(struct megasas_instance *instance)
 			MPI2_IOCINIT_MSGFLAG_RDPQ_ARRAY_MODE : 0;
 	IOCInitMessage->SystemRequestFrameBaseAddress = cpu_to_le64(fusion->io_request_frames_phys);
 	IOCInitMessage->HostMSIxVectors = instance->msix_vectors;
+	IOCInitMessage->HostPageSize = MR_DEFAULT_NVME_PAGE_SHIFT;
 	init_frame = (struct megasas_init_frame *)cmd->frame;
 	memset(init_frame, 0, MEGAMFI_FRAME_SIZE);
 
@@ -796,7 +839,7 @@ megasas_ioc_init_fusion(struct megasas_instance *instance)
 	drv_ops = (MFI_CAPABILITIES *) &(init_frame->driver_operations);
 
 	/* driver support Extended MSIX */
-	if (fusion->adapter_type == INVADER_SERIES)
+	if (fusion->adapter_type >= INVADER_SERIES)
 		drv_ops->mfi_capabilities.support_additional_msix = 1;
 	/* driver supports HA / Remote LUN over Fast Path interface */
 	drv_ops->mfi_capabilities.support_fp_remote_lun = 1;
@@ -813,6 +856,7 @@ megasas_ioc_init_fusion(struct megasas_instance *instance)
 		drv_ops->mfi_capabilities.support_ext_queue_depth = 1;
 
 	drv_ops->mfi_capabilities.support_qd_throttling = 1;
+	drv_ops->mfi_capabilities.support_pd_map_target_id = 1;
 	/* Convert capability to LE32 */
 	cpu_to_le32s((u32 *)&init_frame->driver_operations.mfi_capabilities);
 
@@ -850,7 +894,14 @@ megasas_ioc_init_fusion(struct megasas_instance *instance)
 			break;
 	}
 
-	megasas_fire_cmd_fusion(instance, &req_desc);
+	/* For Ventura also IOC INIT required 64 bit Descriptor write. */
+	spin_lock_irqsave(&instance->hba_lock, flags);
+	writel(le32_to_cpu(req_desc.u.low),
+	       &instance->reg_set->inbound_low_queue_port);
+	writel(le32_to_cpu(req_desc.u.high),
+	       &instance->reg_set->inbound_high_queue_port);
+	mmiowb();
+	spin_unlock_irqrestore(&instance->hba_lock, flags);
 
 	wait_and_poll(instance, cmd, MFI_POLL_TIMEOUT_SECS);
 
@@ -1009,11 +1060,6 @@ megasas_get_ld_map_info(struct megasas_instance *instance)
 
 	memset(ci, 0, fusion->max_map_sz);
 	memset(dcmd->mbox.b, 0, MFI_MBOX_SIZE);
-#if VD_EXT_DEBUG
-	dev_dbg(&instance->pdev->dev,
-		"%s sending MR_DCMD_LD_MAP_GET_INFO with size %d\n",
-		__func__, cpu_to_le32(size_map_info));
-#endif
 	dcmd->cmd = MFI_CMD_DCMD;
 	dcmd->cmd_status = 0xFF;
 	dcmd->sge_count = 1;
@@ -1065,10 +1111,11 @@ megasas_get_map_info(struct megasas_instance *instance)
 int
 megasas_sync_map_info(struct megasas_instance *instance)
 {
-	int ret = 0, i;
+	int i;
 	struct megasas_cmd *cmd;
 	struct megasas_dcmd_frame *dcmd;
-	u32 size_sync_info, num_lds;
+	u16 num_lds;
+	u32 size_sync_info;
 	struct fusion_context *fusion;
 	struct MR_LD_TARGET_SYNC *ci = NULL;
 	struct MR_DRV_RAID_MAP_ALL *map;
@@ -1134,7 +1181,7 @@ megasas_sync_map_info(struct megasas_instance *instance)
 
 	instance->instancet->issue_dcmd(instance, cmd);
 
-	return ret;
+	return 0;
 }
 
 /*
@@ -1220,7 +1267,8 @@ megasas_init_adapter_fusion(struct megasas_instance *instance)
 {
 	struct megasas_register_set __iomem *reg_set;
 	struct fusion_context *fusion;
-	u32 max_cmd, scratch_pad_2;
+	u16 max_cmd;
+	u32 scratch_pad_2;
 	int i = 0, count;
 
 	fusion = instance->ctrl_context;
@@ -1230,13 +1278,6 @@ megasas_init_adapter_fusion(struct megasas_instance *instance)
 	megasas_fusion_update_can_queue(instance, PROBE_CONTEXT);
 
 	/*
-	 * Reduce the max supported cmds by 1. This is to ensure that the
-	 * reply_q_sz (1 more than the max cmd that driver may send)
-	 * does not exceed max cmds that the FW can support
-	 */
-	instance->max_fw_cmds = instance->max_fw_cmds-1;
-
-	/*
 	 * Only Driver's internal DCMDs and IOCTL DCMDs needs to have MFI frames
 	 */
 	instance->max_mfi_cmds =
@@ -1247,12 +1288,12 @@ megasas_init_adapter_fusion(struct megasas_instance *instance)
 	fusion->reply_q_depth = 2 * (((max_cmd + 1 + 15)/16)*16);
 
 	fusion->request_alloc_sz =
-		sizeof(union MEGASAS_REQUEST_DESCRIPTOR_UNION) *max_cmd;
+	sizeof(union MEGASAS_REQUEST_DESCRIPTOR_UNION) * instance->max_mpt_cmds;
 	fusion->reply_alloc_sz = sizeof(union MPI2_REPLY_DESCRIPTORS_UNION)
 		*(fusion->reply_q_depth);
 	fusion->io_frames_alloc_sz = MEGA_MPI2_RAID_DEFAULT_IO_FRAME_SIZE +
-		(MEGA_MPI2_RAID_DEFAULT_IO_FRAME_SIZE *
-		 (max_cmd + 1)); /* Extra 1 for SMID 0 */
+		(MEGA_MPI2_RAID_DEFAULT_IO_FRAME_SIZE
+		* (instance->max_mpt_cmds + 1)); /* Extra 1 for SMID 0 */
 
 	scratch_pad_2 = readl(&instance->reg_set->outbound_scratch_pad_2);
 	/* If scratch_pad_2 & MEGASAS_MAX_CHAIN_SIZE_UNITS_MASK is set,
@@ -1302,7 +1343,7 @@ megasas_init_adapter_fusion(struct megasas_instance *instance)
 		fusion->last_reply_idx[i] = 0;
 
 	/*
-	 * For fusion adapters, 3 commands for IOCTL and 5 commands
+	 * For fusion adapters, 3 commands for IOCTL and 8 commands
 	 * for driver's internal DCMDs.
 	 */
 	instance->max_scsi_cmds = instance->max_fw_cmds -
@@ -1331,6 +1372,7 @@ megasas_init_adapter_fusion(struct megasas_instance *instance)
 	}
 
 	instance->flag_ieee = 1;
+	instance->r1_ldio_hint_default =  MR_R1_LDIO_PIGGYBACK_DEFAULT;
 	fusion->fast_path_io = 0;
 
 	fusion->drv_map_pages = get_order(fusion->drv_map_sz);
@@ -1388,96 +1430,348 @@ fail_alloc_mfi_cmds:
  */
 
 void
-map_cmd_status(struct megasas_cmd_fusion *cmd, u8 status, u8 ext_status)
+map_cmd_status(struct fusion_context *fusion,
+		struct scsi_cmnd *scmd, u8 status, u8 ext_status,
+		u32 data_length, u8 *sense)
 {
+	u8 cmd_type;
+	int resid;
 
+	cmd_type = megasas_cmd_type(scmd);
 	switch (status) {
 
 	case MFI_STAT_OK:
-		cmd->scmd->result = DID_OK << 16;
+		scmd->result = DID_OK << 16;
 		break;
 
 	case MFI_STAT_SCSI_IO_FAILED:
 	case MFI_STAT_LD_INIT_IN_PROGRESS:
-		cmd->scmd->result = (DID_ERROR << 16) | ext_status;
+		scmd->result = (DID_ERROR << 16) | ext_status;
 		break;
 
 	case MFI_STAT_SCSI_DONE_WITH_ERROR:
 
-		cmd->scmd->result = (DID_OK << 16) | ext_status;
+		scmd->result = (DID_OK << 16) | ext_status;
 		if (ext_status == SAM_STAT_CHECK_CONDITION) {
-			memset(cmd->scmd->sense_buffer, 0,
+			memset(scmd->sense_buffer, 0,
 			       SCSI_SENSE_BUFFERSIZE);
-			memcpy(cmd->scmd->sense_buffer, cmd->sense,
+			memcpy(scmd->sense_buffer, sense,
 			       SCSI_SENSE_BUFFERSIZE);
-			cmd->scmd->result |= DRIVER_SENSE << 24;
+			scmd->result |= DRIVER_SENSE << 24;
 		}
+
+		/*
+		 * If the  IO request is partially completed, then MR FW will
+		 * update "io_request->DataLength" field with actual number of
+		 * bytes transferred.Driver will set residual bytes count in
+		 * SCSI command structure.
+		 */
+		resid = (scsi_bufflen(scmd) - data_length);
+		scsi_set_resid(scmd, resid);
+
+		if (resid &&
+			((cmd_type == READ_WRITE_LDIO) ||
+			(cmd_type == READ_WRITE_SYSPDIO)))
+			scmd_printk(KERN_INFO, scmd, "BRCM Debug mfi stat 0x%x, data len"
+				" requested/completed 0x%x/0x%x\n",
+				status, scsi_bufflen(scmd), data_length);
 		break;
 
 	case MFI_STAT_LD_OFFLINE:
 	case MFI_STAT_DEVICE_NOT_FOUND:
-		cmd->scmd->result = DID_BAD_TARGET << 16;
+		scmd->result = DID_BAD_TARGET << 16;
 		break;
 	case MFI_STAT_CONFIG_SEQ_MISMATCH:
-		cmd->scmd->result = DID_IMM_RETRY << 16;
+		scmd->result = DID_IMM_RETRY << 16;
 		break;
 	default:
-		dev_printk(KERN_DEBUG, &cmd->instance->pdev->dev, "FW status %#x\n", status);
-		cmd->scmd->result = DID_ERROR << 16;
+		scmd->result = DID_ERROR << 16;
 		break;
 	}
 }
 
 /**
+ * megasas_is_prp_possible -
+ * Checks if native NVMe PRPs can be built for the IO
+ *
+ * @instance:		Adapter soft state
+ * @scmd:		SCSI command from the mid-layer
+ * @sge_count:		scatter gather element count.
+ *
+ * Returns:		true: PRPs can be built
+ *			false: IEEE SGLs needs to be built
+ */
+static bool
+megasas_is_prp_possible(struct megasas_instance *instance,
+			struct scsi_cmnd *scmd, int sge_count)
+{
+	struct fusion_context *fusion;
+	int i;
+	u32 data_length = 0;
+	struct scatterlist *sg_scmd;
+	bool build_prp = false;
+	u32 mr_nvme_pg_size;
+
+	mr_nvme_pg_size = max_t(u32, instance->nvme_page_size,
+				MR_DEFAULT_NVME_PAGE_SIZE);
+	fusion = instance->ctrl_context;
+	data_length = scsi_bufflen(scmd);
+	sg_scmd = scsi_sglist(scmd);
+
+	/*
+	 * NVMe uses one PRP for each page (or part of a page)
+	 * look at the data length - if 4 pages or less then IEEE is OK
+	 * if  > 5 pages then we need to build a native SGL
+	 * if > 4 and <= 5 pages, then check physical address of 1st SG entry
+	 * if this first size in the page is >= the residual beyond 4 pages
+	 * then use IEEE, otherwise use native SGL
+	 */
+
+	if (data_length > (mr_nvme_pg_size * 5)) {
+		build_prp = true;
+	} else if ((data_length > (mr_nvme_pg_size * 4)) &&
+			(data_length <= (mr_nvme_pg_size * 5)))  {
+		/* check if 1st SG entry size is < residual beyond 4 pages */
+		if (sg_dma_len(sg_scmd) < (data_length - (mr_nvme_pg_size * 4)))
+			build_prp = true;
+	}
+
+/*
+ * Below code detects gaps/holes in IO data buffers.
+ * What does holes/gaps mean?
+ * Any SGE except first one in a SGL starts at non NVME page size
+ * aligned address OR Any SGE except last one in a SGL ends at
+ * non NVME page size boundary.
+ *
+ * Driver has already informed block layer by setting boundary rules for
+ * bio merging done at NVME page size boundary calling kernel API
+ * blk_queue_virt_boundary inside slave_config.
+ * Still there is possibility of IO coming with holes to driver because of
+ * IO merging done by IO scheduler.
+ *
+ * With SCSI BLK MQ enabled, there will be no IO with holes as there is no
+ * IO scheduling so no IO merging.
+ *
+ * With SCSI BLK MQ disabled, IO scheduler may attempt to merge IOs and
+ * then sending IOs with holes.
+ *
+ * Though driver can request block layer to disable IO merging by calling-
+ * queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, sdev->request_queue) but
+ * user may tune sysfs parameter- nomerges again to 0 or 1.
+ *
+ * If in future IO scheduling is enabled with SCSI BLK MQ,
+ * this algorithm to detect holes will be required in driver
+ * for SCSI BLK MQ enabled case as well.
+ *
+ *
+ */
+	scsi_for_each_sg(scmd, sg_scmd, sge_count, i) {
+		if ((i != 0) && (i != (sge_count - 1))) {
+			if (mega_mod64(sg_dma_len(sg_scmd), mr_nvme_pg_size) ||
+			    mega_mod64(sg_dma_address(sg_scmd),
+				       mr_nvme_pg_size)) {
+				build_prp = false;
+				atomic_inc(&instance->sge_holes_type1);
+				break;
+			}
+		}
+
+		if ((sge_count > 1) && (i == 0)) {
+			if ((mega_mod64((sg_dma_address(sg_scmd) +
+					sg_dma_len(sg_scmd)),
+					mr_nvme_pg_size))) {
+				build_prp = false;
+				atomic_inc(&instance->sge_holes_type2);
+				break;
+			}
+		}
+
+		if ((sge_count > 1) && (i == (sge_count - 1))) {
+			if (mega_mod64(sg_dma_address(sg_scmd),
+				       mr_nvme_pg_size)) {
+				build_prp = false;
+				atomic_inc(&instance->sge_holes_type3);
+				break;
+			}
+		}
+	}
+
+	return build_prp;
+}
+
+/**
+ * megasas_make_prp_nvme -
+ * Prepare PRPs(Physical Region Page)- SGLs specific to NVMe drives only
+ *
+ * @instance:		Adapter soft state
+ * @scmd:		SCSI command from the mid-layer
+ * @sgl_ptr:		SGL to be filled in
+ * @cmd:		Fusion command frame
+ * @sge_count:		scatter gather element count.
+ *
+ * Returns:		true: PRPs are built
+ *			false: IEEE SGLs needs to be built
+ */
+static bool
+megasas_make_prp_nvme(struct megasas_instance *instance, struct scsi_cmnd *scmd,
+		      struct MPI25_IEEE_SGE_CHAIN64 *sgl_ptr,
+		      struct megasas_cmd_fusion *cmd, int sge_count)
+{
+	int sge_len, offset, num_prp_in_chain = 0;
+	struct MPI25_IEEE_SGE_CHAIN64 *main_chain_element, *ptr_first_sgl;
+	u64 *ptr_sgl;
+	dma_addr_t ptr_sgl_phys;
+	u64 sge_addr;
+	u32 page_mask, page_mask_result;
+	struct scatterlist *sg_scmd;
+	u32 first_prp_len;
+	bool build_prp = false;
+	int data_len = scsi_bufflen(scmd);
+	struct fusion_context *fusion;
+	u32 mr_nvme_pg_size = max_t(u32, instance->nvme_page_size,
+					MR_DEFAULT_NVME_PAGE_SIZE);
+
+	fusion = instance->ctrl_context;
+
+	build_prp = megasas_is_prp_possible(instance, scmd, sge_count);
+
+	if (!build_prp)
+		return false;
+
+	/*
+	 * Nvme has a very convoluted prp format.  One prp is required
+	 * for each page or partial page. Driver need to split up OS sg_list
+	 * entries if it is longer than one page or cross a page
+	 * boundary.  Driver also have to insert a PRP list pointer entry as
+	 * the last entry in each physical page of the PRP list.
+	 *
+	 * NOTE: The first PRP "entry" is actually placed in the first
+	 * SGL entry in the main message as IEEE 64 format.  The 2nd
+	 * entry in the main message is the chain element, and the rest
+	 * of the PRP entries are built in the contiguous pcie buffer.
+	 */
+	page_mask = mr_nvme_pg_size - 1;
+	ptr_sgl = (u64 *)cmd->sg_frame;
+	ptr_sgl_phys = cmd->sg_frame_phys_addr;
+	memset(ptr_sgl, 0, instance->max_chain_frame_sz);
+
+	/* Build chain frame element which holds all prps except first*/
+	main_chain_element = (struct MPI25_IEEE_SGE_CHAIN64 *)
+	    ((u8 *)sgl_ptr + sizeof(struct MPI25_IEEE_SGE_CHAIN64));
+
+	main_chain_element->Address = cpu_to_le64(ptr_sgl_phys);
+	main_chain_element->NextChainOffset = 0;
+	main_chain_element->Flags = IEEE_SGE_FLAGS_CHAIN_ELEMENT |
+					IEEE_SGE_FLAGS_SYSTEM_ADDR |
+					MPI26_IEEE_SGE_FLAGS_NSF_NVME_PRP;
+
+	/* Build first prp, sge need not to be page aligned*/
+	ptr_first_sgl = sgl_ptr;
+	sg_scmd = scsi_sglist(scmd);
+	sge_addr = sg_dma_address(sg_scmd);
+	sge_len = sg_dma_len(sg_scmd);
+
+	offset = (u32)(sge_addr & page_mask);
+	first_prp_len = mr_nvme_pg_size - offset;
+
+	ptr_first_sgl->Address = cpu_to_le64(sge_addr);
+	ptr_first_sgl->Length = cpu_to_le32(first_prp_len);
+
+	data_len -= first_prp_len;
+
+	if (sge_len > first_prp_len) {
+		sge_addr += first_prp_len;
+		sge_len -= first_prp_len;
+	} else if (sge_len == first_prp_len) {
+		sg_scmd = sg_next(sg_scmd);
+		sge_addr = sg_dma_address(sg_scmd);
+		sge_len = sg_dma_len(sg_scmd);
+	}
+
+	for (;;) {
+		offset = (u32)(sge_addr & page_mask);
+
+		/* Put PRP pointer due to page boundary*/
+		page_mask_result = (uintptr_t)(ptr_sgl + 1) & page_mask;
+		if (unlikely(!page_mask_result)) {
+			scmd_printk(KERN_NOTICE,
+				    scmd, "page boundary ptr_sgl: 0x%p\n",
+				    ptr_sgl);
+			ptr_sgl_phys += 8;
+			*ptr_sgl = cpu_to_le64(ptr_sgl_phys);
+			ptr_sgl++;
+			num_prp_in_chain++;
+		}
+
+		*ptr_sgl = cpu_to_le64(sge_addr);
+		ptr_sgl++;
+		ptr_sgl_phys += 8;
+		num_prp_in_chain++;
+
+		sge_addr += mr_nvme_pg_size;
+		sge_len -= mr_nvme_pg_size;
+		data_len -= mr_nvme_pg_size;
+
+		if (data_len <= 0)
+			break;
+
+		if (sge_len > 0)
+			continue;
+
+		sg_scmd = sg_next(sg_scmd);
+		sge_addr = sg_dma_address(sg_scmd);
+		sge_len = sg_dma_len(sg_scmd);
+	}
+
+	main_chain_element->Length =
+			cpu_to_le32(num_prp_in_chain * sizeof(u64));
+
+	atomic_inc(&instance->prp_sgl);
+	return build_prp;
+}
+
+/**
  * megasas_make_sgl_fusion -	Prepares 32-bit SGL
  * @instance:		Adapter soft state
  * @scp:		SCSI command from the mid-layer
  * @sgl_ptr:		SGL to be filled in
  * @cmd:		cmd we are working on
+ * @sge_count		sge count
  *
- * If successful, this function returns the number of SG elements.
  */
-static int
+static void
 megasas_make_sgl_fusion(struct megasas_instance *instance,
 			struct scsi_cmnd *scp,
 			struct MPI25_IEEE_SGE_CHAIN64 *sgl_ptr,
-			struct megasas_cmd_fusion *cmd)
+			struct megasas_cmd_fusion *cmd, int sge_count)
 {
-	int i, sg_processed, sge_count;
+	int i, sg_processed;
 	struct scatterlist *os_sgl;
 	struct fusion_context *fusion;
 
 	fusion = instance->ctrl_context;
 
-	if (fusion->adapter_type == INVADER_SERIES) {
+	if (fusion->adapter_type >= INVADER_SERIES) {
 		struct MPI25_IEEE_SGE_CHAIN64 *sgl_ptr_end = sgl_ptr;
 		sgl_ptr_end += fusion->max_sge_in_main_msg - 1;
 		sgl_ptr_end->Flags = 0;
 	}
 
-	sge_count = scsi_dma_map(scp);
-
-	BUG_ON(sge_count < 0);
-
-	if (sge_count > instance->max_num_sge || !sge_count)
-		return sge_count;
-
 	scsi_for_each_sg(scp, os_sgl, sge_count, i) {
 		sgl_ptr->Length = cpu_to_le32(sg_dma_len(os_sgl));
 		sgl_ptr->Address = cpu_to_le64(sg_dma_address(os_sgl));
 		sgl_ptr->Flags = 0;
-		if (fusion->adapter_type == INVADER_SERIES)
+		if (fusion->adapter_type >= INVADER_SERIES)
 			if (i == sge_count - 1)
 				sgl_ptr->Flags = IEEE_SGE_FLAGS_END_OF_LIST;
 		sgl_ptr++;
-
 		sg_processed = i + 1;
 
 		if ((sg_processed ==  (fusion->max_sge_in_main_msg - 1)) &&
 		    (sge_count > fusion->max_sge_in_main_msg)) {
 
 			struct MPI25_IEEE_SGE_CHAIN64 *sg_chain;
-			if (fusion->adapter_type == INVADER_SERIES) {
+			if (fusion->adapter_type >= INVADER_SERIES) {
 				if ((le16_to_cpu(cmd->io_request->IoFlags) &
 					MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH) !=
 					MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH)
@@ -1493,7 +1787,7 @@ megasas_make_sgl_fusion(struct megasas_instance *instance,
 			sg_chain = sgl_ptr;
 			/* Prepare chain element */
 			sg_chain->NextChainOffset = 0;
-			if (fusion->adapter_type == INVADER_SERIES)
+			if (fusion->adapter_type >= INVADER_SERIES)
 				sg_chain->Flags = IEEE_SGE_FLAGS_CHAIN_ELEMENT;
 			else
 				sg_chain->Flags =
@@ -1507,6 +1801,45 @@ megasas_make_sgl_fusion(struct megasas_instance *instance,
 			memset(sgl_ptr, 0, instance->max_chain_frame_sz);
 		}
 	}
+	atomic_inc(&instance->ieee_sgl);
+}
+
+/**
+ * megasas_make_sgl -	Build Scatter Gather List(SGLs)
+ * @scp:		SCSI command pointer
+ * @instance:		Soft instance of controller
+ * @cmd:		Fusion command pointer
+ *
+ * This function will build sgls based on device type.
+ * For nvme drives, there is different way of building sgls in nvme native
+ * format- PRPs(Physical Region Page).
+ *
+ * Returns the number of sg lists actually used, zero if the sg lists
+ * is NULL, or -ENOMEM if the mapping failed
+ */
+static
+int megasas_make_sgl(struct megasas_instance *instance, struct scsi_cmnd *scp,
+		     struct megasas_cmd_fusion *cmd)
+{
+	int sge_count;
+	bool build_prp = false;
+	struct MPI25_IEEE_SGE_CHAIN64 *sgl_chain64;
+
+	sge_count = scsi_dma_map(scp);
+
+	if ((sge_count > instance->max_num_sge) || (sge_count <= 0))
+		return sge_count;
+
+	sgl_chain64 = (struct MPI25_IEEE_SGE_CHAIN64 *)&cmd->io_request->SGL;
+	if ((le16_to_cpu(cmd->io_request->IoFlags) &
+	    MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH) &&
+	    (cmd->pd_interface == NVME_PD))
+		build_prp = megasas_make_prp_nvme(instance, scp, sgl_chain64,
+						  cmd, sge_count);
+
+	if (!build_prp)
+		megasas_make_sgl_fusion(instance, scp, sgl_chain64,
+					cmd, sge_count);
 
 	return sge_count;
 }
@@ -1525,7 +1858,7 @@ megasas_set_pd_lba(struct MPI2_RAID_SCSI_IO_REQUEST *io_request, u8 cdb_len,
 		   struct MR_DRV_RAID_MAP_ALL *local_map_ptr, u32 ref_tag)
 {
 	struct MR_LD_RAID *raid;
-	u32 ld;
+	u16 ld;
 	u64 start_blk = io_info->pdBlock;
 	u8 *cdb = io_request->CDB.CDB32;
 	u32 num_blocks = io_info->numBlocks;
@@ -1574,6 +1907,7 @@ megasas_set_pd_lba(struct MPI2_RAID_SCSI_IO_REQUEST *io_request, u8 cdb_len,
 				MPI2_SCSIIO_EEDPFLAGS_CHECK_REFTAG |
 				MPI2_SCSIIO_EEDPFLAGS_CHECK_REMOVE_OP |
 				MPI2_SCSIIO_EEDPFLAGS_CHECK_APPTAG |
+				MPI25_SCSIIO_EEDPFLAGS_DO_NOT_DISABLE_MODE |
 				MPI2_SCSIIO_EEDPFLAGS_CHECK_GUARD);
 		} else {
 			io_request->EEDPFlags = cpu_to_le16(
@@ -1688,6 +2022,166 @@ megasas_set_pd_lba(struct MPI2_RAID_SCSI_IO_REQUEST *io_request, u8 cdb_len,
 }
 
 /**
+ * megasas_stream_detect -	stream detection on read and and write IOs
+ * @instance:		Adapter soft state
+ * @cmd:		    Command to be prepared
+ * @io_info:		IO Request info
+ *
+ */
+
+/** stream detection on read and and write IOs */
+static void megasas_stream_detect(struct megasas_instance *instance,
+				  struct megasas_cmd_fusion *cmd,
+				  struct IO_REQUEST_INFO *io_info)
+{
+	struct fusion_context *fusion = instance->ctrl_context;
+	u32 device_id = io_info->ldTgtId;
+	struct LD_STREAM_DETECT *current_ld_sd
+		= fusion->stream_detect_by_ld[device_id];
+	u32 *track_stream = &current_ld_sd->mru_bit_map, stream_num;
+	u32 shifted_values, unshifted_values;
+	u32 index_value_mask, shifted_values_mask;
+	int i;
+	bool is_read_ahead = false;
+	struct STREAM_DETECT *current_sd;
+	/* find possible stream */
+	for (i = 0; i < MAX_STREAMS_TRACKED; ++i) {
+		stream_num = (*track_stream >>
+			(i * BITS_PER_INDEX_STREAM)) &
+			STREAM_MASK;
+		current_sd = &current_ld_sd->stream_track[stream_num];
+		/* if we found a stream, update the raid
+		 *  context and also update the mruBitMap
+		 */
+		/*	boundary condition */
+		if ((current_sd->next_seq_lba) &&
+		    (io_info->ldStartBlock >= current_sd->next_seq_lba) &&
+		    (io_info->ldStartBlock <= (current_sd->next_seq_lba + 32)) &&
+		    (current_sd->is_read == io_info->isRead)) {
+
+			if ((io_info->ldStartBlock != current_sd->next_seq_lba)	&&
+			    ((!io_info->isRead) || (!is_read_ahead)))
+				/*
+				 * Once the API availible we need to change this.
+				 * At this point we are not allowing any gap
+				 */
+				continue;
+
+			SET_STREAM_DETECTED(cmd->io_request->RaidContext.raid_context_g35);
+			current_sd->next_seq_lba =
+			io_info->ldStartBlock + io_info->numBlocks;
+			/*
+			 *	update the mruBitMap LRU
+			 */
+			shifted_values_mask =
+				(1 <<  i * BITS_PER_INDEX_STREAM) - 1;
+			shifted_values = ((*track_stream & shifted_values_mask)
+						<< BITS_PER_INDEX_STREAM);
+			index_value_mask =
+				STREAM_MASK << i * BITS_PER_INDEX_STREAM;
+			unshifted_values =
+				*track_stream & ~(shifted_values_mask |
+				index_value_mask);
+			*track_stream =
+				unshifted_values | shifted_values | stream_num;
+			return;
+		}
+	}
+	/*
+	 * if we did not find any stream, create a new one
+	 * from the least recently used
+	 */
+	stream_num = (*track_stream >>
+		((MAX_STREAMS_TRACKED - 1) * BITS_PER_INDEX_STREAM)) &
+		STREAM_MASK;
+	current_sd = &current_ld_sd->stream_track[stream_num];
+	current_sd->is_read = io_info->isRead;
+	current_sd->next_seq_lba = io_info->ldStartBlock + io_info->numBlocks;
+	*track_stream = (((*track_stream & ZERO_LAST_STREAM) << 4) | stream_num);
+	return;
+}
+
+/**
+ * megasas_set_raidflag_cpu_affinity - This function sets the cpu
+ * affinity (cpu of the controller) and raid_flags in the raid context
+ * based on IO type.
+ *
+ * @praid_context:	IO RAID context
+ * @raid:		LD raid map
+ * @fp_possible:	Is fast path possible?
+ * @is_read:		Is read IO?
+ *
+ */
+static void
+megasas_set_raidflag_cpu_affinity(union RAID_CONTEXT_UNION *praid_context,
+				  struct MR_LD_RAID *raid, bool fp_possible,
+				  u8 is_read, u32 scsi_buff_len)
+{
+	u8 cpu_sel = MR_RAID_CTX_CPUSEL_0;
+	struct RAID_CONTEXT_G35 *rctx_g35;
+
+	rctx_g35 = &praid_context->raid_context_g35;
+	if (fp_possible) {
+		if (is_read) {
+			if ((raid->cpuAffinity.pdRead.cpu0) &&
+			    (raid->cpuAffinity.pdRead.cpu1))
+				cpu_sel = MR_RAID_CTX_CPUSEL_FCFS;
+			else if (raid->cpuAffinity.pdRead.cpu1)
+				cpu_sel = MR_RAID_CTX_CPUSEL_1;
+		} else {
+			if ((raid->cpuAffinity.pdWrite.cpu0) &&
+			    (raid->cpuAffinity.pdWrite.cpu1))
+				cpu_sel = MR_RAID_CTX_CPUSEL_FCFS;
+			else if (raid->cpuAffinity.pdWrite.cpu1)
+				cpu_sel = MR_RAID_CTX_CPUSEL_1;
+			/* Fast path cache by pass capable R0/R1 VD */
+			if ((raid->level <= 1) &&
+			    (raid->capability.fp_cache_bypass_capable)) {
+				rctx_g35->routing_flags |=
+					(1 << MR_RAID_CTX_ROUTINGFLAGS_SLD_SHIFT);
+				rctx_g35->raid_flags =
+					(MR_RAID_FLAGS_IO_SUB_TYPE_CACHE_BYPASS
+					<< MR_RAID_CTX_RAID_FLAGS_IO_SUB_TYPE_SHIFT);
+			}
+		}
+	} else {
+		if (is_read) {
+			if ((raid->cpuAffinity.ldRead.cpu0) &&
+			    (raid->cpuAffinity.ldRead.cpu1))
+				cpu_sel = MR_RAID_CTX_CPUSEL_FCFS;
+			else if (raid->cpuAffinity.ldRead.cpu1)
+				cpu_sel = MR_RAID_CTX_CPUSEL_1;
+		} else {
+			if ((raid->cpuAffinity.ldWrite.cpu0) &&
+			    (raid->cpuAffinity.ldWrite.cpu1))
+				cpu_sel = MR_RAID_CTX_CPUSEL_FCFS;
+			else if (raid->cpuAffinity.ldWrite.cpu1)
+				cpu_sel = MR_RAID_CTX_CPUSEL_1;
+
+			if (is_stream_detected(rctx_g35) &&
+			    (raid->level == 5) &&
+			    (raid->writeMode == MR_RL_WRITE_THROUGH_MODE) &&
+			    (cpu_sel == MR_RAID_CTX_CPUSEL_FCFS))
+				cpu_sel = MR_RAID_CTX_CPUSEL_0;
+		}
+	}
+
+	rctx_g35->routing_flags |=
+		(cpu_sel << MR_RAID_CTX_ROUTINGFLAGS_CPUSEL_SHIFT);
+
+	/* Always give priority to MR_RAID_FLAGS_IO_SUB_TYPE_LDIO_BW_LIMIT
+	 * vs MR_RAID_FLAGS_IO_SUB_TYPE_CACHE_BYPASS.
+	 * IO Subtype is not bitmap.
+	 */
+	if ((raid->level == 1) && (!is_read)) {
+		if (scsi_buff_len > MR_LARGE_IO_MIN_SIZE)
+			praid_context->raid_context_g35.raid_flags =
+				(MR_RAID_FLAGS_IO_SUB_TYPE_LDIO_BW_LIMIT
+				<< MR_RAID_CTX_RAID_FLAGS_IO_SUB_TYPE_SHIFT);
+	}
+}
+
+/**
  * megasas_build_ldio_fusion -	Prepares IOs to devices
  * @instance:		Adapter soft state
  * @scp:		SCSI command
@@ -1701,29 +2195,36 @@ megasas_build_ldio_fusion(struct megasas_instance *instance,
 			  struct scsi_cmnd *scp,
 			  struct megasas_cmd_fusion *cmd)
 {
-	u8 fp_possible;
+	bool fp_possible;
+	u16 ld;
 	u32 start_lba_lo, start_lba_hi, device_id, datalength = 0;
+	u32 scsi_buff_len;
 	struct MPI2_RAID_SCSI_IO_REQUEST *io_request;
 	union MEGASAS_REQUEST_DESCRIPTOR_UNION *req_desc;
 	struct IO_REQUEST_INFO io_info;
 	struct fusion_context *fusion;
 	struct MR_DRV_RAID_MAP_ALL *local_map_ptr;
 	u8 *raidLUN;
+	unsigned long spinlock_flags;
+	union RAID_CONTEXT_UNION *praid_context;
+	struct MR_LD_RAID *raid = NULL;
+	struct MR_PRIV_DEVICE *mrdev_priv;
 
 	device_id = MEGASAS_DEV_INDEX(scp);
 
 	fusion = instance->ctrl_context;
 
 	io_request = cmd->io_request;
-	io_request->RaidContext.VirtualDiskTgtId = cpu_to_le16(device_id);
-	io_request->RaidContext.status = 0;
-	io_request->RaidContext.exStatus = 0;
+	io_request->RaidContext.raid_context.virtual_disk_tgt_id =
+		cpu_to_le16(device_id);
+	io_request->RaidContext.raid_context.status = 0;
+	io_request->RaidContext.raid_context.ex_status = 0;
 
 	req_desc = (union MEGASAS_REQUEST_DESCRIPTOR_UNION *)cmd->request_desc;
 
 	start_lba_lo = 0;
 	start_lba_hi = 0;
-	fp_possible = 0;
+	fp_possible = false;
 
 	/*
 	 * 6-byte READ(0x08) or WRITE(0x0A) cdb
@@ -1779,22 +2280,27 @@ megasas_build_ldio_fusion(struct megasas_instance *instance,
 	io_info.ldStartBlock = ((u64)start_lba_hi << 32) | start_lba_lo;
 	io_info.numBlocks = datalength;
 	io_info.ldTgtId = device_id;
-	io_request->DataLength = cpu_to_le32(scsi_bufflen(scp));
+	io_info.r1_alt_dev_handle = MR_DEVHANDLE_INVALID;
+	scsi_buff_len = scsi_bufflen(scp);
+	io_request->DataLength = cpu_to_le32(scsi_buff_len);
 
 	if (scp->sc_data_direction == PCI_DMA_FROMDEVICE)
 		io_info.isRead = 1;
 
 	local_map_ptr = fusion->ld_drv_map[(instance->map_id & 1)];
+	ld = MR_TargetIdToLdGet(device_id, local_map_ptr);