octeontx2-pf: Initialize and config queues

This patch does the initialization of all queues ie the receive buffer pools, receive and transmit queues, completion or notification queues etc. Allocates all required resources (eg transmit schedulers, receive buffers etc) and configures them for proper functioning of queues. Also sets up receive queue's RED dropping levels. Co-developed-by: Geetha sowjanya <gakula@marvell.com> Signed-off-by: Geetha sowjanya <gakula@marvell.com> Signed-off-by: Sunil Goutham <sgoutham@marvell.com> Signed-off-by: David S. Miller <davem@davemloft.net>
author: Sunil Goutham <sgoutham@marvell.com> 2020-01-27 18:35:18 +0530
committer: David S. Miller <davem@davemloft.net> 2020-01-27 14:33:39 +0100
commit: caa2da34fd25a37e9fd43343b6966fb9d730a6d5 (patch)
tree: 2af4a4c4c70b996f16ed5d00f1778ad6cddb0e71 /drivers/net/ethernet/marvell
parent: 05fcc9e08955b7c04afb7f53402902f4a86e553c (diff)
7 files changed, 1290 insertions, 17 deletions
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/common.h b/drivers/net/ethernet/marvell/octeontx2/af/common.h
index 784207bae5f8..cd33c2e6ca5f 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/common.h
@@ -143,8 +143,13 @@ enum nix_scheduler {
 	NIX_TXSCH_LVL_CNT = 0x5,
 };
 
-#define TXSCH_TL1_DFLT_RR_QTM      ((1 << 24) - 1)
-#define TXSCH_TL1_DFLT_RR_PRIO     (0x1ull)
+#define TXSCH_RR_QTM_MAX		((1 << 24) - 1)
+#define TXSCH_TL1_DFLT_RR_QTM		TXSCH_RR_QTM_MAX
+#define TXSCH_TL1_DFLT_RR_PRIO		(0x1ull)
+#define MAX_SCHED_WEIGHT		0xFF
+#define DFLT_RR_WEIGHT			71
+#define DFLT_RR_QTM	((DFLT_RR_WEIGHT * TXSCH_RR_QTM_MAX) \
+			 / MAX_SCHED_WEIGHT)
 
 /* Min/Max packet sizes, excluding FCS */
 #define	NIC_HW_MIN_FRS			40
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
index 1fa09e9cfeb2..3ebbf0462b8f 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
@@ -15,6 +15,388 @@
 #include "otx2_common.h"
 #include "otx2_struct.h"
 
+dma_addr_t otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
+			   gfp_t gfp)
+{
+	dma_addr_t iova;
+
+	/* Check if request can be accommodated in previous allocated page */
+	if (pool->page && ((pool->page_offset + pool->rbsize) <=
+	    (PAGE_SIZE << pool->rbpage_order))) {
+		pool->pageref++;
+		goto ret;
+	}
+
+	otx2_get_page(pool);
+
+	/* Allocate a new page */
+	pool->page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN,
+				 pool->rbpage_order);
+	if (unlikely(!pool->page))
+		return -ENOMEM;
+
+	pool->page_offset = 0;
+ret:
+	iova = (u64)otx2_dma_map_page(pfvf, pool->page, pool->page_offset,
+				      pool->rbsize, DMA_FROM_DEVICE);
+	if (!iova) {
+		if (!pool->page_offset)
+			__free_pages(pool->page, pool->rbpage_order);
+		pool->page = NULL;
+		return -ENOMEM;
+	}
+	pool->page_offset += pool->rbsize;
+	return iova;
+}
+
+static int otx2_get_link(struct otx2_nic *pfvf)
+{
+	int link = 0;
+	u16 map;
+
+	/* cgx lmac link */
+	if (pfvf->hw.tx_chan_base >= CGX_CHAN_BASE) {
+		map = pfvf->hw.tx_chan_base & 0x7FF;
+		link = 4 * ((map >> 8) & 0xF) + ((map >> 4) & 0xF);
+	}
+	/* LBK channel */
+	if (pfvf->hw.tx_chan_base < SDP_CHAN_BASE)
+		link = 12;
+
+	return link;
+}
+
+int otx2_txschq_config(struct otx2_nic *pfvf, int lvl)
+{
+	struct otx2_hw *hw = &pfvf->hw;
+	struct nix_txschq_config *req;
+	u64 schq, parent;
+
+	req = otx2_mbox_alloc_msg_nix_txschq_cfg(&pfvf->mbox);
+	if (!req)
+		return -ENOMEM;
+
+	req->lvl = lvl;
+	req->num_regs = 1;
+
+	schq = hw->txschq_list[lvl][0];
+	/* Set topology e.t.c configuration */
+	if (lvl == NIX_TXSCH_LVL_SMQ) {
+		req->reg[0] = NIX_AF_SMQX_CFG(schq);
+		req->regval[0] |= (0x20ULL << 51) | (0x80ULL << 39) |
+				  (0x2ULL << 36);
+		req->num_regs++;
+		/* MDQ config */
+		parent =  hw->txschq_list[NIX_TXSCH_LVL_TL4][0];
+		req->reg[1] = NIX_AF_MDQX_PARENT(schq);
+		req->regval[1] = parent << 16;
+		req->num_regs++;
+		/* Set DWRR quantum */
+		req->reg[2] = NIX_AF_MDQX_SCHEDULE(schq);
+		req->regval[2] =  DFLT_RR_QTM;
+	} else if (lvl == NIX_TXSCH_LVL_TL4) {
+		parent =  hw->txschq_list[NIX_TXSCH_LVL_TL3][0];
+		req->reg[0] = NIX_AF_TL4X_PARENT(schq);
+		req->regval[0] = parent << 16;
+		req->num_regs++;
+		req->reg[1] = NIX_AF_TL4X_SCHEDULE(schq);
+		req->regval[1] = DFLT_RR_QTM;
+	} else if (lvl == NIX_TXSCH_LVL_TL3) {
+		parent = hw->txschq_list[NIX_TXSCH_LVL_TL2][0];
+		req->reg[0] = NIX_AF_TL3X_PARENT(schq);
+		req->regval[0] = parent << 16;
+		req->num_regs++;
+		req->reg[1] = NIX_AF_TL3X_SCHEDULE(schq);
+		req->regval[1] = DFLT_RR_QTM;
+	} else if (lvl == NIX_TXSCH_LVL_TL2) {
+		parent =  hw->txschq_list[NIX_TXSCH_LVL_TL1][0];
+		req->reg[0] = NIX_AF_TL2X_PARENT(schq);
+		req->regval[0] = parent << 16;
+
+		req->num_regs++;
+		req->reg[1] = NIX_AF_TL2X_SCHEDULE(schq);
+		req->regval[1] = TXSCH_TL1_DFLT_RR_PRIO << 24 | DFLT_RR_QTM;
+
+		req->num_regs++;
+		req->reg[2] = NIX_AF_TL3_TL2X_LINKX_CFG(schq,
+							otx2_get_link(pfvf));
+		/* Enable this queue and backpressure */
+		req->regval[2] = BIT_ULL(13) | BIT_ULL(12);
+
+	} else if (lvl == NIX_TXSCH_LVL_TL1) {
+		/* Default config for TL1.
+		 * For VF this is always ignored.
+		 */
+
+		/* Set DWRR quantum */
+		req->reg[0] = NIX_AF_TL1X_SCHEDULE(schq);
+		req->regval[0] = TXSCH_TL1_DFLT_RR_QTM;
+
+		req->num_regs++;
+		req->reg[1] = NIX_AF_TL1X_TOPOLOGY(schq);
+		req->regval[1] = (TXSCH_TL1_DFLT_RR_PRIO << 1);
+
+		req->num_regs++;
+		req->reg[2] = NIX_AF_TL1X_CIR(schq);
+		req->regval[2] = 0;
+	}
+
+	return otx2_sync_mbox_msg(&pfvf->mbox);
+}
+
+int otx2_txsch_alloc(struct otx2_nic *pfvf)
+{
+	struct nix_txsch_alloc_req *req;
+	int lvl;
+
+	/* Get memory to put this msg */
+	req = otx2_mbox_alloc_msg_nix_txsch_alloc(&pfvf->mbox);
+	if (!req)
+		return -ENOMEM;
+
+	/* Request one schq per level */
+	for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++)
+		req->schq[lvl] = 1;
+
+	return otx2_sync_mbox_msg(&pfvf->mbox);
+}
+
+int otx2_txschq_stop(struct otx2_nic *pfvf)
+{
+	struct nix_txsch_free_req *free_req;
+	int lvl, schq, err;
+
+	otx2_mbox_lock(&pfvf->mbox);
+	/* Free the transmit schedulers */
+	free_req = otx2_mbox_alloc_msg_nix_txsch_free(&pfvf->mbox);
+	if (!free_req) {
+		otx2_mbox_unlock(&pfvf->mbox);
+		return -ENOMEM;
+	}
+
+	free_req->flags = TXSCHQ_FREE_ALL;
+	err = otx2_sync_mbox_msg(&pfvf->mbox);
+	otx2_mbox_unlock(&pfvf->mbox);
+
+	/* Clear the txschq list */
+	for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) {
+		for (schq = 0; schq < MAX_TXSCHQ_PER_FUNC; schq++)
+			pfvf->hw.txschq_list[lvl][schq] = 0;
+	}
+	return err;
+}
+
+void otx2_sqb_flush(struct otx2_nic *pfvf)
+{
+	int qidx, sqe_tail, sqe_head;
+	u64 incr, *ptr, val;
+
+	ptr = (u64 *)otx2_get_regaddr(pfvf, NIX_LF_SQ_OP_STATUS);
+	for (qidx = 0; qidx < pfvf->hw.tx_queues; qidx++) {
+		incr = (u64)qidx << 32;
+		while (1) {
+			val = otx2_atomic64_add(incr, ptr);
+			sqe_head = (val >> 20) & 0x3F;
+			sqe_tail = (val >> 28) & 0x3F;
+			if (sqe_head == sqe_tail)
+				break;
+			usleep_range(1, 3);
+		}
+	}
+}
+
+/* RED and drop levels of CQ on packet reception.
+ * For CQ level is measure of emptiness ( 0x0 = full, 255 = empty).
+ */
+#define RQ_PASS_LVL_CQ(skid, qsize)	((((skid) + 16) * 256) / (qsize))
+#define RQ_DROP_LVL_CQ(skid, qsize)	(((skid) * 256) / (qsize))
+
+/* RED and drop levels of AURA for packet reception.
+ * For AURA level is measure of fullness (0x0 = empty, 255 = full).
+ * Eg: For RQ length 1K, for pass/drop level 204/230.
+ * RED accepts pkts if free pointers > 102 & <= 205.
+ * Drops pkts if free pointers < 102.
+ */
+#define RQ_PASS_LVL_AURA (255 - ((95 * 256) / 100)) /* RED when 95% is full */
+#define RQ_DROP_LVL_AURA (255 - ((99 * 256) / 100)) /* Drop when 99% is full */
+
+/* Send skid of 2000 packets required for CQ size of 4K CQEs. */
+#define SEND_CQ_SKID	2000
+
+static int otx2_rq_init(struct otx2_nic *pfvf, u16 qidx, u16 lpb_aura)
+{
+	struct otx2_qset *qset = &pfvf->qset;
+	struct nix_aq_enq_req *aq;
+
+	/* Get memory to put this msg */
+	aq = otx2_mbox_alloc_msg_nix_aq_enq(&pfvf->mbox);
+	if (!aq)
+		return -ENOMEM;
+
+	aq->rq.cq = qidx;
+	aq->rq.ena = 1;
+	aq->rq.pb_caching = 1;
+	aq->rq.lpb_aura = lpb_aura; /* Use large packet buffer aura */
+	aq->rq.lpb_sizem1 = (DMA_BUFFER_LEN(pfvf->rbsize) / 8) - 1;
+	aq->rq.xqe_imm_size = 0; /* Copying of packet to CQE not needed */
+	aq->rq.flow_tagw = 32; /* Copy full 32bit flow_tag to CQE header */
+	aq->rq.lpb_drop_ena = 1; /* Enable RED dropping for AURA */
+	aq->rq.xqe_drop_ena = 1; /* Enable RED dropping for CQ/SSO */
+	aq->rq.xqe_pass = RQ_PASS_LVL_CQ(pfvf->hw.rq_skid, qset->rqe_cnt);
+	aq->rq.xqe_drop = RQ_DROP_LVL_CQ(pfvf->hw.rq_skid, qset->rqe_cnt);
+	aq->rq.lpb_aura_pass = RQ_PASS_LVL_AURA;
+	aq->rq.lpb_aura_drop = RQ_DROP_LVL_AURA;
+
+	/* Fill AQ info */
+	aq->qidx = qidx;
+	aq->ctype = NIX_AQ_CTYPE_RQ;
+	aq->op = NIX_AQ_INSTOP_INIT;
+
+	return otx2_sync_mbox_msg(&pfvf->mbox);
+}
+
+static int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura)
+{
+	struct otx2_qset *qset = &pfvf->qset;
+	struct otx2_snd_queue *sq;
+	struct nix_aq_enq_req *aq;
+	struct otx2_pool *pool;
+	int err;
+
+	pool = &pfvf->qset.pool[sqb_aura];
+	sq = &qset->sq[qidx];
+	sq->sqe_size = NIX_SQESZ_W16 ? 64 : 128;
+	sq->sqe_cnt = qset->sqe_cnt;
+
+	err = qmem_alloc(pfvf->dev, &sq->sqe, 1, sq->sqe_size);
+	if (err)
+		return err;
+
+	sq->sqe_base = sq->sqe->base;
+
+	sq->sqe_per_sqb = (pfvf->hw.sqb_size / sq->sqe_size) - 1;
+	sq->num_sqbs = (qset->sqe_cnt + sq->sqe_per_sqb) / sq->sqe_per_sqb;
+	sq->aura_id = sqb_aura;
+	sq->aura_fc_addr = pool->fc_addr->base;
+	sq->lmt_addr = (__force u64 *)(pfvf->reg_base + LMT_LF_LMTLINEX(qidx));
+	sq->io_addr = (__force u64)otx2_get_regaddr(pfvf, NIX_LF_OP_SENDX(0));
+
+	/* Get memory to put this msg */
+	aq = otx2_mbox_alloc_msg_nix_aq_enq(&pfvf->mbox);
+	if (!aq)
+		return -ENOMEM;
+
+	aq->sq.cq = pfvf->hw.rx_queues + qidx;
+	aq->sq.max_sqe_size = NIX_MAXSQESZ_W16; /* 128 byte */
+	aq->sq.cq_ena = 1;
+	aq->sq.ena = 1;
+	/* Only one SMQ is allocated, map all SQ's to that SMQ  */
+	aq->sq.smq = pfvf->hw.txschq_list[NIX_TXSCH_LVL_SMQ][0];
+	aq->sq.smq_rr_quantum = DFLT_RR_QTM;
+	aq->sq.default_chan = pfvf->hw.tx_chan_base;
+	aq->sq.sqe_stype = NIX_STYPE_STF; /* Cache SQB */
+	aq->sq.sqb_aura = sqb_aura;
+	/* Due pipelining impact minimum 2000 unused SQ CQE's
+	 * need to maintain to avoid CQ overflow.
+	 */
+	aq->sq.cq_limit = ((SEND_CQ_SKID * 256) / (sq->sqe_cnt));
+
+	/* Fill AQ info */
+	aq->qidx = qidx;
+	aq->ctype = NIX_AQ_CTYPE_SQ;
+	aq->op = NIX_AQ_INSTOP_INIT;
+
+	return otx2_sync_mbox_msg(&pfvf->mbox);
+}
+
+static int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx)
+{
+	struct otx2_qset *qset = &pfvf->qset;
+	struct nix_aq_enq_req *aq;
+	struct otx2_cq_queue *cq;
+	int err, pool_id;
+
+	cq = &qset->cq[qidx];
+	cq->cq_idx = qidx;
+	if (qidx < pfvf->hw.rx_queues) {
+		cq->cq_type = CQ_RX;
+		cq->cqe_cnt = qset->rqe_cnt;
+	} else {
+		cq->cq_type = CQ_TX;
+		cq->cqe_cnt = qset->sqe_cnt;
+	}
+	cq->cqe_size = pfvf->qset.xqe_size;
+
+	/* Allocate memory for CQEs */
+	err = qmem_alloc(pfvf->dev, &cq->cqe, cq->cqe_cnt, cq->cqe_size);
+	if (err)
+		return err;
+
+	/* Save CQE CPU base for faster reference */
+	cq->cqe_base = cq->cqe->base;
+	/* In case where all RQs auras point to single pool,
+	 * all CQs receive buffer pool also point to same pool.
+	 */
+	pool_id = ((cq->cq_type == CQ_RX) &&
+		   (pfvf->hw.rqpool_cnt != pfvf->hw.rx_queues)) ? 0 : qidx;
+	cq->rbpool = &qset->pool[pool_id];
+
+	/* Get memory to put this msg */
+	aq = otx2_mbox_alloc_msg_nix_aq_enq(&pfvf->mbox);
+	if (!aq)
+		return -ENOMEM;
+
+	aq->cq.ena = 1;
+	aq->cq.qsize = Q_SIZE(cq->cqe_cnt, 4);
+	aq->cq.caching = 1;
+	aq->cq.base = cq->cqe->iova;
+	aq->cq.avg_level = 255;
+
+	if (qidx < pfvf->hw.rx_queues) {
+		aq->cq.drop = RQ_DROP_LVL_CQ(pfvf->hw.rq_skid, cq->cqe_cnt);
+		aq->cq.drop_ena = 1;
+	}
+
+	/* Fill AQ info */
+	aq->qidx = qidx;
+	aq->ctype = NIX_AQ_CTYPE_CQ;
+	aq->op = NIX_AQ_INSTOP_INIT;
+
+	return otx2_sync_mbox_msg(&pfvf->mbox);
+}
+
+int otx2_config_nix_queues(struct otx2_nic *pfvf)
+{
+	int qidx, err;
+
+	/* Initialize RX queues */
+	for (qidx = 0; qidx < pfvf->hw.rx_queues; qidx++) {
+		u16 lpb_aura = otx2_get_pool_idx(pfvf, AURA_NIX_RQ, qidx);
+
+		err = otx2_rq_init(pfvf, qidx, lpb_aura);
+		if (err)
+			return err;
+	}
+
+	/* Initialize TX queues */
+	for (qidx = 0; qidx < pfvf->hw.tx_queues; qidx++) {
+		u16 sqb_aura = otx2_get_pool_idx(pfvf, AURA_NIX_SQ, qidx);
+
+		err = otx2_sq_init(pfvf, qidx, sqb_aura);
+		if (err)
+			return err;
+	}
+
+	/* Initialize completion queues */
+	for (qidx = 0; qidx < pfvf->qset.cq_cnt; qidx++) {
+		err = otx2_cq_init(pfvf, qidx);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 int otx2_config_nix(struct otx2_nic *pfvf)
 {
 	struct nix_lf_alloc_req  *nixlf;
@@ -58,6 +440,302 @@ int otx2_config_nix(struct otx2_nic *pfvf)
 	return rsp->hdr.rc;
 }
 
+void otx2_sq_free_sqbs(struct otx2_nic *pfvf)
+{
+	struct otx2_qset *qset = &pfvf->qset;
+	struct otx2_hw *hw = &pfvf->hw;
+	struct otx2_snd_queue *sq;
+	int sqb, qidx;
+	u64 iova, pa;
+
+	for (qidx = 0; qidx < hw->tx_queues; qidx++) {
+		sq = &qset->sq[qidx];
+		if (!sq->sqb_ptrs)
+			continue;
+		for (sqb = 0; sqb < sq->sqb_count; sqb++) {
+			if (!sq->sqb_ptrs[sqb])
+				continue;
+			iova = sq->sqb_ptrs[sqb];
+			pa = otx2_iova_to_phys(pfvf->iommu_domain, iova);
+			dma_unmap_page_attrs(pfvf->dev, iova, hw->sqb_size,
+					     DMA_FROM_DEVICE,
+					     DMA_ATTR_SKIP_CPU_SYNC);
+			put_page(virt_to_page(phys_to_virt(pa)));
+		}
+		sq->sqb_count = 0;
+	}
+}
+
+void otx2_free_aura_ptr(struct otx2_nic *pfvf, int type)
+{
+	int pool_id, pool_start = 0, pool_end = 0, size = 0;
+	u64 iova, pa;
+
+	if (type == AURA_NIX_SQ) {
+		pool_start = otx2_get_pool_idx(pfvf, type, 0);
+		pool_end =  pool_start + pfvf->hw.sqpool_cnt;
+		size = pfvf->hw.sqb_size;
+	}
+	if (type == AURA_NIX_RQ) {
+		pool_start = otx2_get_pool_idx(pfvf, type, 0);
+		pool_end = pfvf->hw.rqpool_cnt;
+		size = pfvf->rbsize;
+	}
+
+	/* Free SQB and RQB pointers from the aura pool */
+	for (pool_id = pool_start; pool_id < pool_end; pool_id++) {
+		iova = otx2_aura_allocptr(pfvf, pool_id);
+		while (iova) {
+			if (type == AURA_NIX_RQ)
+				iova -= OTX2_HEAD_ROOM;
+
+			pa = otx2_iova_to_phys(pfvf->iommu_domain, iova);
+			dma_unmap_page_attrs(pfvf->dev, iova, size,
+					     DMA_FROM_DEVICE,
+					     DMA_ATTR_SKIP_CPU_SYNC);
+			put_page(virt_to_page(phys_to_virt(pa)));
+			iova = otx2_aura_allocptr(pfvf, pool_id);
+		}
+	}
+}
+
+void otx2_aura_pool_free(struct otx2_nic *pfvf)
+{
+	struct otx2_pool *pool;
+	int pool_id;
+
+	if (!pfvf->qset.pool)
+		return;
+
+	for (pool_id = 0; pool_id < pfvf->hw.pool_cnt; pool_id++) {
+		pool = &pfvf->qset.pool[pool_id];
+		qmem_free(pfvf->dev, pool->stack);
+		qmem_free(pfvf->dev, pool->fc_addr);
+	}
+	devm_kfree(pfvf->dev, pfvf->qset.pool);
+}
+
+static int otx2_aura_init(struct otx2_nic *pfvf, int aura_id,
+			  int pool_id, int numptrs)
+{
+	struct npa_aq_enq_req *aq;
+	struct otx2_pool *pool;
+	int err;
+
+	pool = &pfvf->qset.pool[pool_id];
+
+	/* Allocate memory for HW to update Aura count.
+	 * Alloc one cache line, so that it fits all FC_STYPE modes.
+	 */
+	if (!pool->fc_addr) {
+		err = qmem_alloc(pfvf->dev, &pool->fc_addr, 1, OTX2_ALIGN);
+		if (err)
+			return err;
+	}
+
+	/* Initialize this aura's context via AF */
+	aq = otx2_mbox_alloc_msg_npa_aq_enq(&pfvf->mbox);
+	if (!aq) {
+		/* Shared mbox memory buffer is full, flush it and retry */
+		err = otx2_sync_mbox_msg(&pfvf->mbox);
+		if (err)
+			return err;
+		aq = otx2_mbox_alloc_msg_npa_aq_enq(&pfvf->mbox);
+		if (!aq)
+			return -ENOMEM;
+	}
+
+	aq->aura_id = aura_id;
+	/* Will be filled by AF with correct pool context address */
+	aq->aura.pool_addr = pool_id;
+	aq->aura.pool_caching = 1;
+	aq->aura.shift = ilog2(numptrs) - 8;
+	aq->aura.count = numptrs;
+	aq->aura.limit = numptrs;
+	aq->aura.avg_level = 255;
+	aq->aura.ena = 1;
+	aq->aura.fc_ena = 1;
+	aq->aura.fc_addr = pool->fc_addr->iova;
+	aq->aura.fc_hyst_bits = 0; /* Store count on all updates */
+
+	/* Fill AQ info */
+	aq->ctype = NPA_AQ_CTYPE_AURA;
+	aq->op = NPA_AQ_INSTOP_INIT;
+
+	return 0;
+}
+
+static int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id,
+			  int stack_pages, int numptrs, int buf_size)
+{
+	struct npa_aq_enq_req *aq;
+	struct otx2_pool *pool;
+	int err;
+
+	pool = &pfvf->qset.pool[pool_id];
+	/* Alloc memory for stack which is used to store buffer pointers */
+	err = qmem_alloc(pfvf->dev, &pool->stack,
+			 stack_pages, pfvf->hw.stack_pg_bytes);
+	if (err)
+		return err;
+
+	pool->rbsize = buf_size;
+	pool->rbpage_order = get_order(buf_size);
+
+	/* Initialize this pool's context via AF */
+	aq = otx2_mbox_alloc_msg_npa_aq_enq(&pfvf->mbox);
+	if (!aq) {
+		/* Shared mbox memory buffer is full, flush it and retry */
+		err = otx2_sync_mbox_msg(&pfvf->mbox);
+		if (err) {
+			qmem_free(pfvf->dev, pool->stack);
+			return err;
+		}
+		aq = otx2_mbox_alloc_msg_npa_aq_enq(&pfvf->mbox);
+		if (!aq) {
+			qmem_free(pfvf->dev, pool->stack);
+			return -ENOMEM;
+		}
+	}
+
+	aq->aura_id = pool_id;
+	aq->pool.stack_base = pool->stack->iova;
+	aq->pool.stack_caching = 1;
+	aq->pool.ena = 1;
+	aq->pool.buf_size = buf_size / 128;
+	aq->pool.stack_max_pages = stack_pages;
+	aq->pool.shift = ilog2(numptrs) - 8;
+	aq->pool.ptr_start = 0;
+	aq->pool.ptr_end = ~0ULL;
+
+	/* Fill AQ info */
+	aq->ctype = NPA_AQ_CTYPE_POOL;
+	aq->op = NPA_AQ_INSTOP_INIT;
+
+	return 0;
+}
+
+int otx2_sq_aura_pool_init(struct otx2_nic *pfvf)
+{
+	int qidx, pool_id, stack_pages, num_sqbs;
+	struct otx2_qset *qset = &pfvf->qset;
+	struct otx2_hw *hw = &pfvf->hw;
+	struct otx2_snd_queue *sq;
+	struct otx2_pool *pool;
+	int err, ptr;
+	s64 bufptr;
+
+	/* Calculate number of SQBs needed.
+	 *
+	 * For a 128byte SQE, and 4K size SQB, 31 SQEs will fit in one SQB.
+	 * Last SQE is used for pointing to next SQB.
+	 */
+	num_sqbs = (hw->sqb_size / 128) - 1;
+	num_sqbs = (qset->sqe_cnt + num_sqbs) / num_sqbs;
+
+	/* Get no of stack pages needed */
+	stack_pages =
+		(num_sqbs + hw->stack_pg_ptrs - 1) / hw->stack_pg_ptrs;
+
+	for (qidx = 0; qidx < hw->tx_queues; qidx++) {
+		pool_id = otx2_get_pool_idx(pfvf, AURA_NIX_SQ, qidx);
+		/* Initialize aura context */
+		err = otx2_aura_init(pfvf, pool_id, pool_id, num_sqbs);
+		if (err)
+			goto fail;
+
+		/* Initialize pool context */
+		err = otx2_pool_init(pfvf, pool_id, stack_pages,
+				     num_sqbs, hw->sqb_size);
+		if (err)
+			goto fail;
+	}
+
+	/* Flush accumulated messages */
+	err = otx2_sync_mbox_msg(&pfvf->mbox);
+	if (err)
+		goto fail;
+
+	/* Allocate pointers and free them to aura/pool */
+	for (qidx = 0; qidx < hw->tx_queues; qidx++) {
+		pool_id = otx2_get_pool_idx(pfvf, AURA_NIX_SQ, qidx);
+		pool = &pfvf->qset.pool[pool_id];
+
+		sq = &qset->sq[qidx];
+		sq->sqb_count = 0;
+		sq->sqb_ptrs = kcalloc(num_sqbs, sizeof(u64 *), GFP_KERNEL);
+		if (!sq->sqb_ptrs)
+			return -ENOMEM;
+
+		for (ptr = 0; ptr < num_sqbs; ptr++) {
+			bufptr = otx2_alloc_rbuf(pfvf, pool, GFP_KERNEL);
+			if (bufptr <= 0)
+				return bufptr;
+			otx2_aura_freeptr(pfvf, pool_id, bufptr);
+			sq->sqb_ptrs[sq->sqb_count++] = (u64)bufptr;
+		}
+		otx2_get_page(pool);
+	}
+
+	return 0;
+fail:
+	otx2_mbox_reset(&pfvf->mbox.mbox, 0);
+	otx2_aura_pool_free(pfvf);
+	return err;
+}
+
+int otx2_rq_aura_pool_init(struct otx2_nic *pfvf)
+{
+	struct otx2_hw *hw = &pfvf->hw;
+	int stack_pages, pool_id, rq;
+	struct otx2_pool *pool;
+	int err, ptr, num_ptrs;
+	s64 bufptr;
+
+	num_ptrs = pfvf->qset.rqe_cnt;
+
+	stack_pages =
+		(num_ptrs + hw->stack_pg_ptrs - 1) / hw->stack_pg_ptrs;
+
+	for (rq = 0; rq < hw->rx_queues; rq++) {
+		pool_id = otx2_get_pool_idx(pfvf, AURA_NIX_RQ, rq);
+		/* Initialize aura context */
+		err = otx2_aura_init(pfvf, pool_id, pool_id, num_ptrs);
+		if (err)
+			goto fail;
+	}
+	for (pool_id = 0; pool_id < hw->rqpool_cnt; pool_id++) {
+		err = otx2_pool_init(pfvf, pool_id, stack_pages,
+				     num_ptrs, pfvf->rbsize);
+		if (err)
+			goto fail;
+	}
+
+	/* Flush accumulated messages */
+	err = otx2_sync_mbox_msg(&pfvf->mbox);
+	if (err)
+		goto fail;
+
+	/* Allocate pointers and free them to aura/pool */
+	for (pool_id = 0; pool_id < hw->rqpool_cnt; pool_id++) {
+		pool = &pfvf->qset.pool[pool_id];
+		for (ptr = 0; ptr < num_ptrs; ptr++) {
+			bufptr = otx2_alloc_rbuf(pfvf, pool, GFP_KERNEL);
+			if (bufptr <= 0)
+				return bufptr;
+			otx2_aura_freeptr(pfvf, pool_id,
+					  bufptr + OTX2_HEAD_ROOM);
+		}
+		otx2_get_page(pool);
+	}
+
+	return 0;
+fail:
+	otx2_mbox_reset(&pfvf->mbox.mbox, 0);
+	otx2_aura_pool_free(pfvf);
+	return err;
+}
+
 int otx2_config_npa(struct otx2_nic *pfvf)
 {
 	struct otx2_qset *qset = &pfvf->qset;
@@ -134,6 +812,14 @@ int otx2_attach_npa_nix(struct otx2_nic *pfvf)
 		return err;
 	}
 
+	pfvf->nix_blkaddr = BLKADDR_NIX0;
+
+	/* If the platform has two NIX blocks then LF may be
+	 * allocated from NIX1.
+	 */
+	if (otx2_read64(pfvf, RVU_PF_BLOCK_ADDRX_DISC(BLKADDR_NIX1)) & 0x1FFULL)
+		pfvf->nix_blkaddr = BLKADDR_NIX1;
+
 	/* Get NPA and NIX MSIX vector offsets */
 	msix = otx2_mbox_alloc_msg_msix_offset(&pfvf->mbox);
 	if (!msix) {
@@ -158,6 +844,43 @@ int otx2_attach_npa_nix(struct otx2_nic *pfvf)
 	return 0;
 }
 
+void otx2_ctx_disable(struct mbox *mbox, int type, bool npa)
+{
+	struct hwctx_disable_req *req;
+
+	otx2_mbox_lock(mbox);
+	/* Request AQ to disable this context */
+	if (npa)
+		req = otx2_mbox_alloc_msg_npa_hwctx_disable(mbox);
+	else
+		req = otx2_mbox_alloc_msg_nix_hwctx_disable(mbox);
+
+	if (!req) {
+		otx2_mbox_unlock(mbox);
+		return;
+	}
+
+	req->ctype = type;
+
+	if (otx2_sync_mbox_msg(mbox))
+		dev_err(mbox->pfvf->dev, "%s failed to disable context\n",
+			__func__);
+
+	otx2_mbox_unlock(mbox);
+}
+
+void mbox_handler_nix_txsch_alloc(struct otx2_nic *pf,
+				  struct nix_txsch_alloc_rsp *rsp)
+{
+	int lvl, schq;
+
+	/* Setup transmit scheduler list */
+	for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++)
+		for (schq = 0; schq < rsp->schq[lvl]; schq++)
+			pf->hw.txschq_list[lvl][schq] =
+				rsp->schq_list[lvl][schq];
+}
+
 /* Mbox message handlers */
 void mbox_handler_npa_lf_alloc(struct otx2_nic *pfvf,
 			       struct npa_lf_alloc_rsp *rsp)
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
index cdb1c567d5dc..a4f1c604cb60 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
@@ -12,9 +12,11 @@
 #define OTX2_COMMON_H
 
 #include <linux/pci.h>
+#include <linux/iommu.h>
 
 #include <mbox.h>
 #include "otx2_reg.h"
+#include "otx2_txrx.h"
 
 /* PCI device IDs */
 #define PCI_DEVID_OCTEONTX2_RVU_PF              0xA063
@@ -25,15 +27,9 @@
 
 #define NAME_SIZE                               32
 
-struct otx2_pool {
-	struct qmem		*stack;
-};
-
-struct otx2_qset {
-#define OTX2_MAX_CQ_CNT		64
-	u16			cq_cnt;
-	u16			xqe_size; /* Size of CQE i.e 128 or 512 bytes */
-	struct otx2_pool	*pool;
+enum arua_mapped_qtypes {
+	AURA_NIX_RQ,
+	AURA_NIX_SQ,
 };
 
 struct mbox {
@@ -54,14 +50,21 @@ struct otx2_hw {
 	u16                     tx_queues;
 	u16			max_queues;
 	u16			pool_cnt;
+	u16			rqpool_cnt;
+	u16			sqpool_cnt;
 
 	/* NPA */
 	u32			stack_pg_ptrs;  /* No of ptrs per stack page */
 	u32			stack_pg_bytes; /* Size of stack page */
 	u16			sqb_size;
 
+	/* NIX */
+	u16		txschq_list[NIX_TXSCH_LVL_CNT][MAX_TXSCHQ_PER_FUNC];
+
+	/* HW settings, coalescing etc */
 	u16			rx_chan_base;
 	u16			tx_chan_base;
+	u16			rq_skid;
 
 	/* MSI-X */
 	u16			npa_msixoff; /* Offset of NPA vectors */
@@ -73,6 +76,8 @@ struct otx2_hw {
 struct otx2_nic {
 	void __iomem		*reg_base;
 	struct net_device	*netdev;
+	void			*iommu_domain;
+	u16			rbsize; /* Receive buffer size */
 
 	struct otx2_qset	qset;
 	struct otx2_hw		hw;
@@ -84,6 +89,9 @@ struct otx2_nic {
 	struct workqueue_struct *mbox_wq;
 
 	u16			pcifunc; /* RVU PF_FUNC */
+
+	/* Block address of NIX either BLKADDR_NIX0 or BLKADDR_NIX1 */
+	int			nix_blkaddr;
 };
 
 /* Register read/write APIs */
@@ -93,7 +101,7 @@ static inline void __iomem *otx2_get_regaddr(struct otx2_nic *nic, u64 offset)
 
 	switch ((offset >> RVU_FUNC_BLKADDR_SHIFT) & RVU_FUNC_BLKADDR_MASK) {
 	case BLKTYPE_NIX:
-		blkaddr = BLKADDR_NIX0;
+		blkaddr = nic->nix_blkaddr;
 		break;
 	case BLKTYPE_NPA:
 		blkaddr = BLKADDR_NPA;
@@ -184,6 +192,72 @@ static inline void otx2_mbox_unlock(struct mbox *mbox)
 	mutex_unlock(&mbox->lock);
 }
 
+/* With the absence of API for 128-bit IO memory access for arm64,
+ * implement required operations at place.
+ */
+#if defined(CONFIG_ARM64)
+static inline void otx2_write128(u64 lo, u64 hi, void __iomem *addr)
+{
+	__asm__ volatile("stp %x[x0], %x[x1], [%x[p1],#0]!"
+			 ::[x0]"r"(lo), [x1]"r"(hi), [p1]"r"(addr));
+}
+
+static inline u64 otx2_atomic64_add(u64 incr, u64 *ptr)
+{
+	u64 result;
+
+	__asm__ volatile(".cpu   generic+lse\n"
+			 "ldadd %x[i], %x[r], [%[b]]"
+			 : [r]"=r"(result), "+m"(*ptr)
+			 : [i]"r"(incr), [b]"r"(ptr)
+			 : "memory");
+	return result;
+}
+
+#else
+#define otx2_write128(lo, hi, addr)
+#define otx2_atomic64_add(incr, ptr)		({ *ptr += incr; })
+#endif
+
+/* Alloc pointer from pool/aura */
+static inline u64 otx2_aura_allocptr(struct otx2_nic *pfvf, int aura)
+{
+	u64 *ptr = (u64 *)otx2_get_regaddr(pfvf,
+			   NPA_LF_AURA_OP_ALLOCX(0));
+	u64 incr = (u64)aura | BIT_ULL(63);
+
+	return otx2_atomic64_add(incr, ptr);
+}
+
+/* Free pointer to a pool/aura */
+static inline void otx2_aura_freeptr(struct otx2_nic *pfvf,
+				     int aura, s64 buf)
+{
+	otx2_write128((u64)buf, (u64)aura | BIT_ULL(63),
+		      otx2_get_regaddr(pfvf, NPA_LF_AURA_OP_FREE0));
+}
+
+/* Update page ref count */
+static inline void otx2_get_page(struct otx2_pool *pool)
+{
+	if (!pool->page)
+		return;
+
+	if (pool->pageref)
+		page_ref_add(pool->page, pool->pageref);
+	pool->pageref = 0;
+	pool->page = NULL;
+}
+
+static inline int otx2_get_pool_idx(struct otx2_nic *pfvf, int type, int idx)
+{
+	if (type == AURA_NIX_SQ)
+		return pfvf->hw.rqpool_cnt + idx;
+
+	 /* AURA_NIX_RQ */
+	return idx;
+}
+
 /* Mbox APIs */
 static inline int otx2_sync_mbox_msg(struct mbox *mbox)
 {
@@ -263,11 +337,46 @@ MBOX_UP_CGX_MESSAGES
 #define	RVU_PFVF_FUNC_SHIFT	0
 #define	RVU_PFVF_FUNC_MASK	0x3FF
 
+static inline dma_addr_t otx2_dma_map_page(struct otx2_nic *pfvf,
+					   struct page *page,
+					   size_t offset, size_t size,
+					   enum dma_data_direction dir)
+{
+	dma_addr_t iova;
+
+	iova = dma_map_page_attrs(pfvf->dev, page,
+				  offset, size, dir, DMA_ATTR_SKIP_CPU_SYNC);
+	if (unlikely(dma_mapping_error(pfvf->dev, iova)))
+		return (dma_addr_t)NULL;
+	return iova;
+}
+
+static inline void otx2_dma_unmap_page(struct otx2_nic *pfvf,
+				       dma_addr_t addr, size_t size,
+				       enum dma_data_direction dir)
+{
+	dma_unmap_page_attrs(pfvf->dev, addr, size,
+			     dir, DMA_ATTR_SKIP_CPU_SYNC);
+}
+
 /* RVU block related APIs */
 int otx2_attach_npa_nix(struct otx2_nic *pfvf);
 int otx2_detach_resources(struct mbox *mbox);
 int otx2_config_npa(struct otx2_nic *pfvf);
+int otx2_sq_aura_pool_init(struct otx2_nic *pfvf);
+int otx2_rq_aura_pool_init(struct otx2_nic *pfvf);
+void otx2_aura_pool_free(struct otx2_nic *pfvf);
+void otx2_free_aura_ptr(struct otx2_nic *pfvf, int type);
+void otx2_sq_free_sqbs(struct otx2_nic *pfvf);
 int otx2_config_nix(struct otx2_nic *pfvf);
+int otx2_config_nix_queues(struct otx2_nic *pfvf);
+int otx2_txschq_config(struct otx2_nic *pfvf, int lvl);
+int otx2_txsch_alloc(struct otx2_nic *pfvf);
+int otx2_txschq_stop(struct otx2_nic *pfvf);
+void otx2_sqb_flush(struct otx2_nic *pfvf);
+dma_addr_t otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
+			   gfp_t gfp);
+void otx2_ctx_disable(struct mbox *mbox, int type, bool npa);
 
 /* Mbox handlers */
 void mbox_handler_msix_offset(struct otx2_nic *pfvf,
@@ -276,4 +385,6 @@ void mbox_handler_npa_lf_alloc(struct otx2_nic *pfvf,
 			       struct npa_lf_alloc_rsp *rsp);
 void mbox_handler_nix_lf_alloc(struct otx2_nic *pfvf,
 			       struct nix_lf_alloc_rsp *rsp);
+void mbox_handler_nix_txsch_alloc(struct otx2_nic *pf,
+				  struct nix_txsch_alloc_rsp *rsp);
 #endif /* OTX2_COMMON_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
index ef5dba442abf..735188900de4 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
@@ -17,7 +17,10 @@
 #include <linux/iommu.h>
 #include <net/ip.h>
 
+#include "otx2_reg.h"
 #include "otx2_common.h"
+#include "otx2_txrx.h"
+#include "otx2_struct.h"
 
 #define DRV_NAME	"octeontx2-nicpf"
 #define DRV_STRING	"Marvell OcteonTX2 NIC Physical Function Driver"
@@ -123,6 +126,10 @@ static void otx2_process_pfaf_mbox_msg(struct otx2_nic *pf,
 	case MBOX_MSG_NIX_LF_ALLOC:
 		mbox_handler_nix_lf_alloc(pf, (struct nix_lf_alloc_rsp *)msg);
 		break;
+	case MBOX_MSG_NIX_TXSCH_ALLOC:
+		mbox_handler_nix_txsch_alloc(pf,
+					     (struct nix_txsch_alloc_rsp *)msg);
+		break;
 	default:
 		if (msg->rc)
 			dev_err(pf->dev,
@@ -379,26 +386,231 @@ static int otx2_set_real_num_queues(struct net_device *netdev,
 	return err;
 }
 
+static void otx2_free_cq_res(struct otx2_nic *pf)
+{
+	struct otx2_qset *qset = &pf->qset;
+	struct otx2_cq_queue *cq;
+	int qidx;
+
+	/* Disable CQs */
+	otx2_ctx_disable(&pf->mbox, NIX_AQ_CTYPE_CQ, false);
+	for (qidx = 0; qidx < qset->cq_cnt; qidx++) {
+		cq = &qset->cq[qidx];
+		qmem_free(pf->dev, cq->cqe);
+	}
+}
+
+static void otx2_free_sq_res(struct otx2_nic *pf)
+{
+	struct otx2_qset *qset = &pf->qset;
+	struct otx2_snd_queue *sq;
+	int qidx;
+
+	/* Disable SQs */
+	otx2_ctx_disable(&pf->mbox, NIX_AQ_CTYPE_SQ, false);
+	/* Free SQB pointers */
+	otx2_sq_free_sqbs(pf);
+	for (qidx = 0; qidx < pf->hw.tx_queues; qidx++) {
+		sq = &qset->sq[qidx];
+		qmem_free(pf->dev, sq->sqe);
+		kfree(sq->sqb_ptrs);
+	}
+}
+
+static int otx2_init_hw_resources(struct otx2_nic *pf)
+{
+	struct mbox *mbox = &pf->mbox;
+	struct otx2_hw *hw = &pf->hw;
+	struct msg_req *req;
+	int err = 0, lvl;
+
+	/* Set required NPA LF's pool counts
+	 * Auras and Pools are used in a 1:1 mapping,
+	 * so, aura count = pool count.
+	 */
+	hw->rqpool_cnt = hw->rx_queues;
+	hw->sqpool_cnt = hw->tx_queues;
+	hw->pool_cnt = hw->rqpool_cnt + hw->sqpool_cnt;
+
+	/* Get the size of receive buffers to allocate */
author	Sunil Goutham <sgoutham@marvell.com>	2020-01-27 18:35:18 +0530
committer	David S. Miller <davem@davemloft.net>	2020-01-27 14:33:39 +0100
commit	caa2da34fd25a37e9fd43343b6966fb9d730a6d5 (patch)
tree	2af4a4c4c70b996f16ed5d00f1778ad6cddb0e71 /drivers/net/ethernet/marvell
parent	05fcc9e08955b7c04afb7f53402902f4a86e553c (diff)