diff options
author | Yuval Mintz <Yuval.Mintz@qlogic.com> | 2016-06-03 14:35:35 +0300 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2016-06-03 20:08:40 -0400 |
commit | dbb799c39717e7b71a386b04b015ddca2dcb1ecd (patch) | |
tree | 40e6ff8695791c88815a9457754a52cf093d5431 /drivers/net/ethernet/qlogic/qed | |
parent | c5ac93191d7e6977c5c3465ac94c73ebb8a8ecba (diff) |
qed: Initialize hardware for new protocols
RoCE and iSCSI would require some added/changed hw configuration in order
to properly run; The biggest single change being the requirement of
allocating and mapping host memory for several HW blocks that aren't being
used by qede [SRC, QM, TM, etc.].
In addition, whereas qede is only using context memory for HW blocks, the
new protocol would also require task memories to be added.
Signed-off-by: Yuval Mintz <Yuval.Mintz@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/ethernet/qlogic/qed')
-rw-r--r-- | drivers/net/ethernet/qlogic/qed/qed.h | 9 | ||||
-rw-r--r-- | drivers/net/ethernet/qlogic/qed/qed_cxt.c | 1347 | ||||
-rw-r--r-- | drivers/net/ethernet/qlogic/qed/qed_cxt.h | 24 | ||||
-rw-r--r-- | drivers/net/ethernet/qlogic/qed/qed_dev.c | 154 | ||||
-rw-r--r-- | drivers/net/ethernet/qlogic/qed/qed_hw.c | 26 | ||||
-rw-r--r-- | drivers/net/ethernet/qlogic/qed/qed_hw.h | 12 | ||||
-rw-r--r-- | drivers/net/ethernet/qlogic/qed/qed_reg_addr.h | 33 |
7 files changed, 1535 insertions, 70 deletions
diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index 4417f127f766..9a63df1184f1 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -187,6 +187,8 @@ struct qed_hw_info { #define RESC_START(_p_hwfn, resc) ((_p_hwfn)->hw_info.resc_start[resc]) #define RESC_NUM(_p_hwfn, resc) ((_p_hwfn)->hw_info.resc_num[resc]) +#define RESC_END(_p_hwfn, resc) (RESC_START(_p_hwfn, resc) + \ + RESC_NUM(_p_hwfn, resc)) #define FEAT_NUM(_p_hwfn, resc) ((_p_hwfn)->hw_info.feat_num[resc]) u8 num_tc; @@ -259,6 +261,7 @@ struct qed_qm_info { u8 pure_lb_pq; u8 offload_pq; u8 pure_ack_pq; + u8 ooo_pq; u8 vf_queues_offset; u16 num_pqs; u16 num_vf_pqs; @@ -271,6 +274,7 @@ struct qed_qm_info { u8 pf_wfq; u32 pf_rl; struct qed_wfq_data *wfq_data; + u8 num_pf_rls; }; struct storm_stats { @@ -316,6 +320,7 @@ struct qed_hwfn { bool hw_init_done; u8 num_funcs_on_engine; + u8 enabled_func_idx; /* BAR access */ void __iomem *regview; @@ -354,6 +359,9 @@ struct qed_hwfn { /* Protocol related */ struct qed_pf_params pf_params; + bool b_rdma_enabled_in_prs; + u32 rdma_prs_search_reg; + /* Array of sb_info of all status blocks */ struct qed_sb_info *sbs_info[MAX_SB_PER_PF_MIMD]; u16 num_sbs; @@ -559,6 +567,7 @@ static inline u8 qed_concrete_to_sw_fid(struct qed_dev *cdev, } #define PURE_LB_TC 8 +#define OOO_LB_TC 9 int qed_configure_vport_wfq(struct qed_dev *cdev, u16 vp_id, u32 rate); void qed_configure_vp_wfq_on_link_change(struct qed_dev *cdev, u32 min_pf_rate); diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c index ac284c58d8c2..d85b7ba8cb9a 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c +++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c @@ -39,6 +39,14 @@ #define DQ_RANGE_SHIFT 4 #define DQ_RANGE_ALIGN BIT(DQ_RANGE_SHIFT) +/* Searcher constants */ +#define SRC_MIN_NUM_ELEMS 256 + +/* Timers constants */ +#define TM_SHIFT 7 +#define TM_ALIGN BIT(TM_SHIFT) +#define TM_ELEM_SIZE 4 + /* ILT constants */ #define ILT_DEFAULT_HW_P_SIZE 3 #define ILT_PAGE_IN_BYTES(hw_p_size) (1U << ((hw_p_size) + 12)) @@ -56,26 +64,71 @@ union conn_context { struct core_conn_context core_ctx; struct eth_conn_context eth_ctx; + struct iscsi_conn_context iscsi_ctx; + struct roce_conn_context roce_ctx; +}; + +/* TYPE-0 task context - iSCSI */ +union type0_task_context { + struct iscsi_task_context iscsi_ctx; }; +/* TYPE-1 task context - ROCE */ +union type1_task_context { + struct rdma_task_context roce_ctx; +}; + +struct src_ent { + u8 opaque[56]; + u64 next; +}; + +#define CDUT_SEG_ALIGNMET 3 /* in 4k chunks */ +#define CDUT_SEG_ALIGNMET_IN_BYTES (1 << (CDUT_SEG_ALIGNMET + 12)) + #define CONN_CXT_SIZE(p_hwfn) \ ALIGNED_TYPE_SIZE(union conn_context, p_hwfn) +#define SRQ_CXT_SIZE (sizeof(struct rdma_srq_context)) + +#define TYPE0_TASK_CXT_SIZE(p_hwfn) \ + ALIGNED_TYPE_SIZE(union type0_task_context, p_hwfn) + +/* Alignment is inherent to the type1_task_context structure */ +#define TYPE1_TASK_CXT_SIZE(p_hwfn) sizeof(union type1_task_context) + /* PF per protocl configuration object */ +#define TASK_SEGMENTS (NUM_TASK_PF_SEGMENTS + NUM_TASK_VF_SEGMENTS) +#define TASK_SEGMENT_VF (NUM_TASK_PF_SEGMENTS) + +struct qed_tid_seg { + u32 count; + u8 type; + bool has_fl_mem; +}; + struct qed_conn_type_cfg { u32 cid_count; u32 cid_start; u32 cids_per_vf; + struct qed_tid_seg tid_seg[TASK_SEGMENTS]; }; /* ILT Client configuration, Per connection type (protocol) resources. */ #define ILT_CLI_PF_BLOCKS (1 + NUM_TASK_PF_SEGMENTS * 2) #define ILT_CLI_VF_BLOCKS (1 + NUM_TASK_VF_SEGMENTS * 2) #define CDUC_BLK (0) +#define SRQ_BLK (0) +#define CDUT_SEG_BLK(n) (1 + (u8)(n)) +#define CDUT_FL_SEG_BLK(n, X) (1 + (n) + NUM_TASK_ ## X ## _SEGMENTS) enum ilt_clients { ILT_CLI_CDUC, + ILT_CLI_CDUT, ILT_CLI_QM, + ILT_CLI_TM, + ILT_CLI_SRC, + ILT_CLI_TSDM, ILT_CLI_MAX }; @@ -88,6 +141,7 @@ struct qed_ilt_cli_blk { u32 total_size; /* 0 means not active */ u32 real_size_in_page; u32 start_line; + u32 dynamic_line_cnt; }; struct qed_ilt_client_cfg { @@ -131,18 +185,44 @@ struct qed_cxt_mngr { /* computed ILT structure */ struct qed_ilt_client_cfg clients[ILT_CLI_MAX]; + /* Task type sizes */ + u32 task_type_size[NUM_TASK_TYPES]; + /* total number of VFs for this hwfn - * ALL VFs are symmetric in terms of HW resources */ u32 vf_count; + /* total number of SRQ's for this hwfn */ + u32 srq_count; + /* Acquired CIDs */ struct qed_cid_acquired_map acquired[MAX_CONN_TYPES]; /* ILT shadow table */ struct qed_dma_mem *ilt_shadow; u32 pf_start_line; + + /* Mutex for a dynamic ILT allocation */ + struct mutex mutex; + + /* SRC T2 */ + struct qed_dma_mem *t2; + u32 t2_num_pages; + u64 first_free; + u64 last_free; }; +static bool src_proto(enum protocol_type type) +{ + return type == PROTOCOLID_ISCSI || + type == PROTOCOLID_ROCE; +} + +static bool tm_cid_proto(enum protocol_type type) +{ + return type == PROTOCOLID_ISCSI || + type == PROTOCOLID_ROCE; +} /* counts the iids for the CDU/CDUC ILT client configuration */ struct qed_cdu_iids { @@ -161,21 +241,120 @@ static void qed_cxt_cdu_iids(struct qed_cxt_mngr *p_mngr, } } +/* counts the iids for the Searcher block configuration */ +struct qed_src_iids { + u32 pf_cids; + u32 per_vf_cids; +}; + +static void qed_cxt_src_iids(struct qed_cxt_mngr *p_mngr, + struct qed_src_iids *iids) +{ + u32 i; + + for (i = 0; i < MAX_CONN_TYPES; i++) { + if (!src_proto(i)) + continue; + + iids->pf_cids += p_mngr->conn_cfg[i].cid_count; + iids->per_vf_cids += p_mngr->conn_cfg[i].cids_per_vf; + } +} + +/* counts the iids for the Timers block configuration */ +struct qed_tm_iids { + u32 pf_cids; + u32 pf_tids[NUM_TASK_PF_SEGMENTS]; /* per segment */ + u32 pf_tids_total; + u32 per_vf_cids; + u32 per_vf_tids; +}; + +static void qed_cxt_tm_iids(struct qed_cxt_mngr *p_mngr, + struct qed_tm_iids *iids) +{ + u32 i, j; + + for (i = 0; i < MAX_CONN_TYPES; i++) { + struct qed_conn_type_cfg *p_cfg = &p_mngr->conn_cfg[i]; + + if (tm_cid_proto(i)) { + iids->pf_cids += p_cfg->cid_count; + iids->per_vf_cids += p_cfg->cids_per_vf; + } + } + + iids->pf_cids = roundup(iids->pf_cids, TM_ALIGN); + iids->per_vf_cids = roundup(iids->per_vf_cids, TM_ALIGN); + iids->per_vf_tids = roundup(iids->per_vf_tids, TM_ALIGN); + + for (iids->pf_tids_total = 0, j = 0; j < NUM_TASK_PF_SEGMENTS; j++) { + iids->pf_tids[j] = roundup(iids->pf_tids[j], TM_ALIGN); + iids->pf_tids_total += iids->pf_tids[j]; + } +} + static void qed_cxt_qm_iids(struct qed_hwfn *p_hwfn, struct qed_qm_iids *iids) { struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr; - u32 vf_cids = 0, type; + struct qed_tid_seg *segs; + u32 vf_cids = 0, type, j; + u32 vf_tids = 0; for (type = 0; type < MAX_CONN_TYPES; type++) { iids->cids += p_mngr->conn_cfg[type].cid_count; vf_cids += p_mngr->conn_cfg[type].cids_per_vf; + + segs = p_mngr->conn_cfg[type].tid_seg; + /* for each segment there is at most one + * protocol for which count is not 0. + */ + for (j = 0; j < NUM_TASK_PF_SEGMENTS; j++) + iids->tids += segs[j].count; + + /* The last array elelment is for the VFs. As for PF + * segments there can be only one protocol for + * which this value is not 0. + */ + vf_tids += segs[NUM_TASK_PF_SEGMENTS].count; } iids->vf_cids += vf_cids * p_mngr->vf_count; + iids->tids += vf_tids * p_mngr->vf_count; + DP_VERBOSE(p_hwfn, QED_MSG_ILT, - "iids: CIDS %08x vf_cids %08x\n", - iids->cids, iids->vf_cids); + "iids: CIDS %08x vf_cids %08x tids %08x vf_tids %08x\n", + iids->cids, iids->vf_cids, iids->tids, vf_tids); +} + +static struct qed_tid_seg *qed_cxt_tid_seg_info(struct qed_hwfn *p_hwfn, + u32 seg) +{ + struct qed_cxt_mngr *p_cfg = p_hwfn->p_cxt_mngr; + u32 i; + + /* Find the protocol with tid count > 0 for this segment. + * Note: there can only be one and this is already validated. + */ + for (i = 0; i < MAX_CONN_TYPES; i++) + if (p_cfg->conn_cfg[i].tid_seg[seg].count) + return &p_cfg->conn_cfg[i].tid_seg[seg]; + return NULL; +} + +void qed_cxt_set_srq_count(struct qed_hwfn *p_hwfn, u32 num_srqs) +{ + struct qed_cxt_mngr *p_mgr = p_hwfn->p_cxt_mngr; + + p_mgr->srq_count = num_srqs; +} + +u32 qed_cxt_get_srq_count(struct qed_hwfn *p_hwfn) +{ + struct qed_cxt_mngr *p_mgr = p_hwfn->p_cxt_mngr; + + return p_mgr->srq_count; } /* set the iids count per protocol */ @@ -188,6 +367,14 @@ static void qed_cxt_set_proto_cid_count(struct qed_hwfn *p_hwfn, p_conn->cid_count = roundup(cid_count, DQ_RANGE_ALIGN); p_conn->cids_per_vf = roundup(vf_cid_cnt, DQ_RANGE_ALIGN); + + if (type == PROTOCOLID_ROCE) { + u32 page_sz = p_mgr->clients[ILT_CLI_CDUC].p_size.val; + u32 cxt_size = CONN_CXT_SIZE(p_hwfn); + u32 elems_per_page = ILT_PAGE_IN_BYTES(page_sz) / cxt_size; + + p_conn->cid_count = roundup(p_conn->cid_count, elems_per_page); + } } u32 qed_cxt_get_proto_cid_count(struct qed_hwfn *p_hwfn, @@ -200,6 +387,37 @@ u32 qed_cxt_get_proto_cid_count(struct qed_hwfn *p_hwfn, return p_hwfn->p_cxt_mngr->conn_cfg[type].cid_count; } +u32 qed_cxt_get_proto_cid_start(struct qed_hwfn *p_hwfn, + enum protocol_type type) +{ + return p_hwfn->p_cxt_mngr->acquired[type].start_cid; +} + +u32 qed_cxt_get_proto_tid_count(struct qed_hwfn *p_hwfn, + enum protocol_type type) +{ + u32 cnt = 0; + int i; + + for (i = 0; i < TASK_SEGMENTS; i++) + cnt += p_hwfn->p_cxt_mngr->conn_cfg[type].tid_seg[i].count; + + return cnt; +} + +static void +qed_cxt_set_proto_tid_count(struct qed_hwfn *p_hwfn, + enum protocol_type proto, + u8 seg, u8 seg_type, u32 count, bool has_fl) +{ + struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr; + struct qed_tid_seg *p_seg = &p_mngr->conn_cfg[proto].tid_seg[seg]; + + p_seg->count = count; + p_seg->has_fl_mem = has_fl; + p_seg->type = seg_type; +} + static void qed_ilt_cli_blk_fill(struct qed_ilt_client_cfg *p_cli, struct qed_ilt_cli_blk *p_blk, u32 start_line, u32 total_size, @@ -241,17 +459,42 @@ static void qed_ilt_cli_adv_line(struct qed_hwfn *p_hwfn, p_blk->real_size_in_page, p_blk->start_line); } +static u32 qed_ilt_get_dynamic_line_cnt(struct qed_hwfn *p_hwfn, + enum ilt_clients ilt_client) +{ + u32 cid_count = p_hwfn->p_cxt_mngr->conn_cfg[PROTOCOLID_ROCE].cid_count; + struct qed_ilt_client_cfg *p_cli; + u32 lines_to_skip = 0; + u32 cxts_per_p; + + if (ilt_client == ILT_CLI_CDUC) { + p_cli = &p_hwfn->p_cxt_mngr->clients[ILT_CLI_CDUC]; + + cxts_per_p = ILT_PAGE_IN_BYTES(p_cli->p_size.val) / + (u32) CONN_CXT_SIZE(p_hwfn); + + lines_to_skip = cid_count / cxts_per_p; + } + + return lines_to_skip; +} + int qed_cxt_cfg_ilt_compute(struct qed_hwfn *p_hwfn) { struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr; + u32 curr_line, total, i, task_size, line; struct qed_ilt_client_cfg *p_cli; struct qed_ilt_cli_blk *p_blk; struct qed_cdu_iids cdu_iids; + struct qed_src_iids src_iids; struct qed_qm_iids qm_iids; - u32 curr_line, total, i; + struct qed_tm_iids tm_iids; + struct qed_tid_seg *p_seg; memset(&qm_iids, 0, sizeof(qm_iids)); memset(&cdu_iids, 0, sizeof(cdu_iids)); + memset(&src_iids, 0, sizeof(src_iids)); + memset(&tm_iids, 0, sizeof(tm_iids)); p_mngr->pf_start_line = RESC_START(p_hwfn, QED_ILT); @@ -279,6 +522,9 @@ int qed_cxt_cfg_ilt_compute(struct qed_hwfn *p_hwfn) qed_ilt_cli_adv_line(p_hwfn, p_cli, p_blk, &curr_line, ILT_CLI_CDUC); p_cli->pf_total_lines = curr_line - p_blk->start_line; + p_blk->dynamic_line_cnt = qed_ilt_get_dynamic_line_cnt(p_hwfn, + ILT_CLI_CDUC); + /* CDUC VF */ p_blk = &p_cli->vf_blks[CDUC_BLK]; total = cdu_iids.per_vf_cids * CONN_CXT_SIZE(p_hwfn); @@ -293,21 +539,128 @@ int qed_cxt_cfg_ilt_compute(struct qed_hwfn *p_hwfn) qed_ilt_cli_adv_line(p_hwfn, p_cli, p_blk, &curr_line, ILT_CLI_CDUC); + /* CDUT PF */ + p_cli = &p_mngr->clients[ILT_CLI_CDUT]; + p_cli->first.val = curr_line; + + /* first the 'working' task memory */ + for (i = 0; i < NUM_TASK_PF_SEGMENTS; i++) { + p_seg = qed_cxt_tid_seg_info(p_hwfn, i); + if (!p_seg || p_seg->count == 0) + continue; + + p_blk = &p_cli->pf_blks[CDUT_SEG_BLK(i)]; + total = p_seg->count * p_mngr->task_type_size[p_seg->type]; + qed_ilt_cli_blk_fill(p_cli, p_blk, curr_line, total, + p_mngr->task_type_size[p_seg->type]); + + qed_ilt_cli_adv_line(p_hwfn, p_cli, p_blk, &curr_line, + ILT_CLI_CDUT); + } + + /* next the 'init' task memory (forced load memory) */ + for (i = 0; i < NUM_TASK_PF_SEGMENTS; i++) { + p_seg = qed_cxt_tid_seg_info(p_hwfn, i); + if (!p_seg || p_seg->count == 0) + continue; + + p_blk = &p_cli->pf_blks[CDUT_FL_SEG_BLK(i, PF)]; + + if (!p_seg->has_fl_mem) { + /* The segment is active (total size pf 'working' + * memory is > 0) but has no FL (forced-load, Init) + * memory. Thus: + * + * 1. The total-size in the corrsponding FL block of + * the ILT client is set to 0 - No ILT line are + * provisioned and no ILT memory allocated. + * + * 2. The start-line of said block is set to the + * start line of the matching working memory + * block in the ILT client. This is later used to + * configure the CDU segment offset registers and + * results in an FL command for TIDs of this + * segement behaves as regular load commands + * (loading TIDs from the working memory). + */ + line = p_cli->pf_blks[CDUT_SEG_BLK(i)].start_line; + + qed_ilt_cli_blk_fill(p_cli, p_blk, line, 0, 0); + continue; + } + total = p_seg->count * p_mngr->task_type_size[p_seg->type]; + + qed_ilt_cli_blk_fill(p_cli, p_blk, + curr_line, total, + p_mngr->task_type_size[p_seg->type]); + + qed_ilt_cli_adv_line(p_hwfn, p_cli, p_blk, &curr_line, + ILT_CLI_CDUT); + } + p_cli->pf_total_lines = curr_line - p_cli->pf_blks[0].start_line; + + /* CDUT VF */ + p_seg = qed_cxt_tid_seg_info(p_hwfn, TASK_SEGMENT_VF); + if (p_seg && p_seg->count) { + /* Stricly speaking we need to iterate over all VF + * task segment types, but a VF has only 1 segment + */ + + /* 'working' memory */ + total = p_seg->count * p_mngr->task_type_size[p_seg->type]; + + p_blk = &p_cli->vf_blks[CDUT_SEG_BLK(0)]; + qed_ilt_cli_blk_fill(p_cli, p_blk, + curr_line, total, + p_mngr->task_type_size[p_seg->type]); + + qed_ilt_cli_adv_line(p_hwfn, p_cli, p_blk, &curr_line, + ILT_CLI_CDUT); + + /* 'init' memory */ + p_blk = &p_cli->vf_blks[CDUT_FL_SEG_BLK(0, VF)]; + if (!p_seg->has_fl_mem) { + /* see comment above */ + line = p_cli->vf_blks[CDUT_SEG_BLK(0)].start_line; + qed_ilt_cli_blk_fill(p_cli, p_blk, line, 0, 0); + } else { + task_size = p_mngr->task_type_size[p_seg->type]; + qed_ilt_cli_blk_fill(p_cli, p_blk, + curr_line, total, task_size); + qed_ilt_cli_adv_line(p_hwfn, p_cli, p_blk, &curr_line, + ILT_CLI_CDUT); + } + p_cli->vf_total_lines = curr_line - + p_cli->vf_blks[0].start_line; + + /* Now for the rest of the VFs */ + for (i = 1; i < p_mngr->vf_count; i++) { + p_blk = &p_cli->vf_blks[CDUT_SEG_BLK(0)]; + qed_ilt_cli_adv_line(p_hwfn, p_cli, p_blk, &curr_line, + ILT_CLI_CDUT); + + p_blk = &p_cli->vf_blks[CDUT_FL_SEG_BLK(0, VF)]; + qed_ilt_cli_adv_line(p_hwfn, p_cli, p_blk, &curr_line, + ILT_CLI_CDUT); + } + } + /* QM */ p_cli = &p_mngr->clients[ILT_CLI_QM]; p_blk = &p_cli->pf_blks[0]; qed_cxt_qm_iids(p_hwfn, &qm_iids); total = qed_qm_pf_mem_size(p_hwfn->rel_pf_id, qm_iids.cids, - qm_iids.vf_cids, 0, + qm_iids.vf_cids, qm_iids.tids, p_hwfn->qm_info.num_pqs, p_hwfn->qm_info.num_vf_pqs); DP_VERBOSE(p_hwfn, QED_MSG_ILT, - "QM ILT Info, (cids=%d, vf_cids=%d, num_pqs=%d, num_vf_pqs=%d, memory_size=%d)\n", + "QM ILT Info, (cids=%d, vf_cids=%d, tids=%d, num_pqs=%d, num_vf_pqs=%d, memory_size=%d)\n", qm_iids.cids, qm_iids.vf_cids, + qm_iids.tids, p_hwfn->qm_info.num_pqs, p_hwfn->qm_info.num_vf_pqs, total); qed_ilt_cli_blk_fill(p_cli, p_blk, @@ -317,6 +670,75 @@ int qed_cxt_cfg_ilt_compute(struct qed_hwfn *p_hwfn) qed_ilt_cli_adv_line(p_hwfn, p_cli, p_blk, &curr_line, ILT_CLI_QM); p_cli->pf_total_lines = curr_line - p_blk->start_line; + /* SRC */ + p_cli = &p_mngr->clients[ILT_CLI_SRC]; + qed_cxt_src_iids(p_mngr, &src_iids); + + /* Both the PF and VFs searcher connections are stored in the per PF + * database. Thus sum the PF searcher cids and all the VFs searcher + * cids. + */ + total = src_iids.pf_cids + src_iids.per_vf_cids * p_mngr->vf_count; + if (total) { + u32 local_max = max_t(u32, total, + SRC_MIN_NUM_ELEMS); + + total = roundup_pow_of_two(local_max); + + p_blk = &p_cli->pf_blks[0]; + qed_ilt_cli_blk_fill(p_cli, p_blk, curr_line, + total * sizeof(struct src_ent), + sizeof(struct src_ent)); + + qed_ilt_cli_adv_line(p_hwfn, p_cli, p_blk, &curr_line, + ILT_CLI_SRC); + p_cli->pf_total_lines = curr_line - p_blk->start_line; + } + + /* TM PF */ + p_cli = &p_mngr->clients[ILT_CLI_TM]; + qed_cxt_tm_iids(p_mngr, &tm_iids); + total = tm_iids.pf_cids + tm_iids.pf_tids_total; + if (total) { + p_blk = &p_cli->pf_blks[0]; + qed_ilt_cli_blk_fill(p_cli, p_blk, curr_line, + total * TM_ELEM_SIZE, TM_ELEM_SIZE); + + qed_ilt_cli_adv_line(p_hwfn, p_cli, p_blk, &curr_line, + ILT_CLI_TM); + p_cli->pf_total_lines = curr_line - p_blk->start_line; + } + + /* TM VF */ + total = tm_iids.per_vf_cids + tm_iids.per_vf_tids; + if (total) { + p_blk = &p_cli->vf_blks[0]; + qed_ilt_cli_blk_fill(p_cli, p_blk, curr_line, + total * TM_ELEM_SIZE, TM_ELEM_SIZE); + + qed_ilt_cli_adv_line(p_hwfn, p_cli, p_blk, &curr_line, + ILT_CLI_TM); + p_cli->pf_total_lines = curr_line - p_blk->start_line; + + for (i = 1; i < p_mngr->vf_count; i++) + qed_ilt_cli_adv_line(p_hwfn, p_cli, p_blk, &curr_line, + ILT_CLI_TM); + } + + /* TSDM (SRQ CONTEXT) */ + total = qed_cxt_get_srq_count(p_hwfn); + + if (total) { + p_cli = &p_mngr->clients[ILT_CLI_TSDM]; + p_blk = &p_cli->pf_blks[SRQ_BLK]; + qed_ilt_cli_blk_fill(p_cli, p_blk, curr_line, + total * SRQ_CXT_SIZE, SRQ_CXT_SIZE); + + qed_ilt_cli_adv_line(p_hwfn, p_cli, p_blk, &curr_line, + ILT_CLI_TSDM); + p_cli->pf_total_lines = curr_line - p_blk->start_line; + } + if (curr_line - p_hwfn->p_cxt_mngr->pf_start_line > RESC_NUM(p_hwfn, QED_ILT)) { DP_ERR(p_hwfn, "too many ilt lines...#lines=%d\n", @@ -327,8 +749,122 @@ int qed_cxt_cfg_ilt_compute(struct qed_hwfn *p_hwfn) return 0; } +static void qed_cxt_src_t2_free(struct qed_hwfn *p_hwfn) +{ + struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr; + u32 i; + + if (!p_mngr->t2) + return; + + for (i = 0; i < p_mngr->t2_num_pages; i++) + if (p_mngr->t2[i].p_virt) + dma_free_coherent(&p_hwfn->cdev->pdev->dev, + p_mngr->t2[i].size, + p_mngr->t2[i].p_virt, + p_mngr->t2[i].p_phys); + + kfree(p_mngr->t2); + p_mngr->t2 = NULL; +} + +static int qed_cxt_src_t2_alloc(struct qed_hwfn *p_hwfn) +{ + struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr; + u32 conn_num, total_size, ent_per_page, psz, i; + struct qed_ilt_client_cfg *p_src; + struct qed_src_iids src_iids; + struct qed_dma_mem *p_t2; + int rc; + + memset(&src_iids, 0, sizeof(src_iids)); + + /* if the SRC ILT client is inactive - there are no connection + * requiring the searcer, leave. + */ + p_src = &p_hwfn->p_cxt_mngr->clients[ILT_CLI_SRC]; + if (!p_src->active) + return 0; + + qed_cxt_src_iids(p_mngr, &src_iids); + conn_num = src_iids.pf_cids + src_iids.per_vf_cids * p_mngr->vf_count; + total_size = conn_num * sizeof(struct src_ent); + + /* use the same page size as the SRC ILT client */ + psz = ILT_PAGE_IN_BYTES(p_src->p_size.val); + p_mngr->t2_num_pages = DIV_ROUND_UP(total_size, psz); + + /* allocate t2 */ + p_mngr->t2 = kzalloc(p_mngr->t2_num_pages * sizeof(struct qed_dma_mem), + GFP_KERNEL); + if (!p_mngr->t2) { + DP_NOTICE(p_hwfn, "Failed to allocate t2 table\n"); + rc = -ENOMEM; + goto t2_fail; + } + + /* allocate t2 pages */ + for (i = 0; i < p_mngr->t2_num_pages; i++) { + u32 size = min_t(u32, total_size, psz); + void **p_virt = &p_mngr->t2[i].p_virt; + + *p_virt = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, + size, + &p_mngr->t2[i].p_phys, GFP_KERNEL); + if (!p_mngr->t2[i].p_virt) { + rc = -ENOMEM; + goto t2_fail; + } + memset(*p_virt, 0, size); + p_mngr->t2[i].size = size; + total_size -= size; + } + + /* Set the t2 pointers */ + + /* entries per page - must be a power of two */ + ent_per_page = psz / sizeof(struct src_ent); + + p_mngr->first_free = (u64) p_mngr->t2[0].p_phys; + + p_t2 = &p_mngr->t2[(conn_num - 1) / ent_per_page]; + p_mngr->last_free = (u64) p_t2->p_phys + + ((conn_num - 1) & (ent_per_page - 1)) * sizeof(struct src_ent); + + for (i = 0; i < p_mngr->t2_num_pages; i++) { + u32 ent_num = min_t(u32, + ent_per_page, + conn_num); + struct src_ent *entries = p_mngr->t2[i].p_virt; + u64 p_ent_phys = (u64) p_mngr->t2[i].p_phys, val; + u32 j; + + for (j = 0; j < ent_num - 1; j++) { + val = p_ent_phys + (j + 1) * sizeof(struct src_ent); + entries[j].next = cpu_to_be64(val); + } + + if (i < p_mngr->t2_num_pages - 1) + val = (u64) p_mngr->t2[i + 1].p_phys; + else + val = 0; + entries[j].next = cpu_to_be64(val); + + conn_num -= ent_per_page; + } + + return 0; + +t2_fail: + qed_cxt_src_t2_free(p_hwfn); + return rc; +} + #define for_each_ilt_valid_client(pos, clients) \ - for (pos = 0; pos < ILT_CLI_MAX; pos++) + for (pos = 0; pos < ILT_CLI_MAX; pos++) \ + if (!clients[pos].active) { \ + continue; \ + } else \ /* Total number of ILT lines used by this PF */ static u32 qed_cxt_ilt_shadow_size(struct qed_ilt_client_cfg *ilt_clients) @@ -336,12 +872,8 @@ static u32 qed_cxt_ilt_shadow_size(struct qed_ilt_client_cfg *ilt_clients) u32 size = 0; u32 i; - for_each_ilt_valid_client(i, ilt_clients) { - if (!ilt_clients[i].active) - continue; - size += (ilt_clients[i].last.val - - ilt_clients[i].first.val + 1); - } + for_each_ilt_valid_client(i, ilt_clients) + size += (ilt_clients[i].last.val - ilt_clients[i].first.val + 1); return size; } @@ -372,15 +904,22 @@ static int qed_ilt_blk_alloc(struct qed_hwfn *p_hwfn, u32 start_line_offset) { struct qed_dma_mem *ilt_shadow = p_hwfn->p_cxt_mngr->ilt_shadow; - u32 lines, line, sz_left; + u32 lines, line, sz_left, lines_to_skip = 0; + + /* Special handling for RoCE that supports dynamic allocation */ + if ((p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE) && + ((ilt_client == ILT_CLI_CDUT) || ilt_client == ILT_CLI_TSDM)) + return 0; + + lines_to_skip = p_blk->dynamic_line_cnt; if (!p_blk->total_size) return 0; sz_left = p_blk->total_size; - lines = DIV_ROUND_UP(sz_left, p_blk->real_size_in_page); + lines = DIV_ROUND_UP(sz_left, p_blk->real_size_in_page) - lines_to_skip; line = p_blk->start_line + start_line_offset - - p_hwfn->p_cxt_mngr->pf_start_line; + p_hwfn->p_cxt_mngr->pf_start_line + lines_to_skip; for (; lines; lines--) { dma_addr_t p_phys; @@ -434,8 +973,6 @@ static int qed_ilt_shadow_alloc(struct qed_hwfn *p_hwfn) (u32)(size * sizeof(struct qed_dma_mem))); for_each_ilt_valid_client(i, clients) { - if (!clients[i].active) - continue; for (j = 0; j < ILT_CLI_PF_BLOCKS; j++) { p_blk = &clients[i].pf_blks[j]; rc = qed_ilt_blk_alloc(p_hwfn, p_blk, i, 0); @@ -514,6 +1051,7 @@ cid_map_fail: int qed_cxt_mngr_alloc(struct qed_hwfn *p_hwfn) { + struct qed_ilt_client_cfg *clients; struct qed_cxt_mngr *p_mngr; u32 i; @@ -524,20 +1062,42 @@ int qed_cxt_mngr_alloc(struct qed_hwfn *p_hwfn) } /* Initialize ILT client registers */ - p_mngr->clients[ILT_CLI_CDUC].first.reg = ILT_CFG_REG(CDUC, FIRST_ILT); - p_mngr->clients[ILT_CLI_CDUC].last.reg = ILT_CFG_REG(CDUC, LAST_ILT); - p_mngr->clients[ILT_CLI_CDUC].p_size.reg = ILT_CFG_REG(CDUC, P_SIZE); - - p_mngr->clients[ILT_CLI_QM].first.reg = ILT_CFG_REG(QM, FIRST_ILT); - p_mngr->clients[ILT_CLI_QM].last.reg = ILT_CFG_REG(QM, LAST_ILT); - p_mngr->clients[ILT_CLI_QM].p_size.reg = ILT_CFG_REG(QM, P_SIZE); - + clients = p_mngr->clients; + clients[ILT_CLI_CDUC].first.reg = ILT_CFG_REG(CDUC, FIRST_ILT); + clients[ILT_CLI_CDUC].last.reg = ILT_CFG_REG(CDUC, LAST_ILT); + clients[ILT_CLI_CDUC].p_size.reg = ILT_CFG_REG(CDUC, P_SIZE); + + clients[ILT_CLI_QM].first.reg = ILT_CFG_REG(QM, FIRST_ILT); + clients[ILT_CLI_QM].last.reg = ILT_CFG_REG(QM, LAST_ILT); + clients[ILT_CLI_QM].p_size.reg = ILT_CFG_REG(QM, P_SIZE); + + clients[ILT_CLI_TM].first.reg = ILT_CFG_REG(TM, FIRST_ILT); + clients[ILT_CLI_TM].last.reg = ILT_CFG_REG(TM, LAST_ILT); + clients[ILT_CLI_TM].p_size.reg = ILT_CFG_REG(TM, P_SIZE); + + clients[ILT_CLI_SRC].first.reg = ILT_CFG_REG(SRC, FIRST_ILT); + clients[ILT_CLI_SRC].last.reg = ILT_CFG_REG(SRC, LAST_ILT); + clients[ILT_CLI_SRC].p_size.reg = ILT_CFG_REG(SRC, P_SIZE); + + clients[ILT_CLI_CDUT].first.reg = ILT_CFG_REG(CDUT, FIRST_ILT); + clients[ILT_CLI_CDUT].last.reg = ILT_CFG_REG(CDUT, LAST_ILT); + clients[ILT_CLI_CDUT].p_size.reg = ILT_CFG_REG(CDUT, P_SIZE); + + clients[ILT_CLI_TSDM].first.reg = ILT_CFG_REG(TSDM, FIRST_ILT); + clients[ILT_CLI_TSDM].last.reg = ILT_CFG_REG(TSDM, LAST_ILT); + clients[ILT_CLI_TSDM].p_size.reg = ILT_CFG_REG(TSDM, P_SIZE); /* default ILT page size for all clients is 32K */ for (i = 0; i < ILT_CLI_MAX; i++) p_mngr->clients[i].p_size.val = ILT_DEFAULT_HW_P_SIZE; + /* Initialize task sizes */ + p_mngr->task_type_size[0] = TYPE0_TASK_CXT_SIZE(p_hwfn); + p_mngr->task_type_size[1] = TYPE1_TASK_CXT_SIZE(p_hwfn); + if (p_hwfn->cdev->p_iov_info) p_mngr->vf_count = p_hwfn->cdev->p_iov_info->total_vfs; + /* Initialize the dynamic ILT allocation mutex */ + mutex_init(&p_mngr->mutex); /* Set the cxt mangr pointer priori to further allocations */ p_hwfn->p_cxt_mngr = p_mngr; @@ -556,6 +1116,13 @@ int qed_cxt_tables_alloc(struct qed_hwfn *p_hwfn) goto tables_alloc_fail; } + /* Allocate the T2 table */ + rc = qed_cxt_src_t2_alloc(p_hwfn); + if (rc) { + DP_NOTICE(p_hwfn, "Failed to allocate T2 memory\n"); + goto tables_alloc_fail; + } + /* Allocate and initialize the acquired cids bitmaps */ rc = qed_cid_map_alloc(p_hwfn); if (rc) { @@ -576,6 +1143,7 @@ void qed_cxt_mngr_free(struct qed_hwfn *p_hwfn) return; qed_cid_map_free(p_hwfn); + qed_cxt_src_t2_free(p_hwfn); qed_ilt_shadow_free(p_hwfn); kfree(p_hwfn->p_cxt_mngr); @@ -620,6 +1188,48 @@ void qed_cxt_mngr_setup(struct qed_hwfn *p_hwfn) #define CDUC_NCIB_MASK \ (CDU_REG_CID_ADDR_PARAMS_NCIB >> CDUC_NCIB_SHIFT) +#define CDUT_TYPE0_CXT_SIZE_SHIFT \ + CDU_REG_SEGMENT0_PARAMS_T0_TID_SIZE_SHIFT + +#define CDUT_TYPE0_CXT_SIZE_MASK \ + (CDU_REG_SEGMENT0_PARAMS_T0_TID_SIZE >> \ + CDUT_TYPE0_CXT_SIZE_SHIFT) + +#define CDUT_TYPE0_BLOCK_WASTE_SHIFT \ + CDU_REG_SEGMENT0_PARAMS_T0_TID_BLOCK_WASTE_SHIFT + +#define CDUT_TYPE0_BLOCK_WASTE_MASK \ + (CDU_REG_SEGMENT0_PARAMS_T0_TID_BLOCK_WASTE >> \ + CDUT_TYPE0_BLOCK_WASTE_SHIFT) + +#define CDUT_TYPE0_NCIB_SHIFT \ + CDU_REG_SEGMENT0_PARAMS_T0_NUM_TIDS_IN_BLOCK_SHIFT + +#define CDUT_TYPE0_NCIB_MASK \ + (CDU_REG_SEGMENT0_PARAMS_T0_NUM_TIDS_IN_BLOCK >> \ + CDUT_TYPE0_NCIB_SHIFT) + +#define CDUT_TYPE1_CXT_SIZE_SHIFT \ + CDU_REG_SEGMENT1_PARAMS_T1_TID_SIZE_SHIFT + +#define CDUT_TYPE1_CXT_SIZE_MASK \ + (CDU_REG_SEGMENT1_PARAMS_T1_TID_SIZE >> \ + CDUT_TYPE1_CXT_SIZE_SHIFT) + +#define CDUT_TYPE1_BLOCK_WASTE_SHIFT \ + CDU_REG_SEGMENT1_PARAMS_T1_TID_BLOCK_WASTE_SHIFT + +#define CDUT_TYPE1_BLOCK_WASTE_MASK \ + (CDU_REG_SEGMENT1_PARAMS_T1_TID_BLOCK_WASTE >> \ + CDUT_TYPE1_BLOCK_WASTE_SHIFT) + +#define CDUT_TYPE1_NCIB_SHIFT \ + CDU_REG_SEGMENT1_PARAMS_T1_NUM_TIDS_IN_BLOCK_SHIFT + +#define CDUT_TYPE1_NCIB_MASK \ + (CDU_REG_SEGMENT1_PARAMS_T1_NUM_TIDS_IN_BLOCK >> \ + CDUT_TYPE1_NCIB_SHIFT) + static void qed_cdu_init_common(struct qed_hwfn *p_hwfn) { u32 page_sz, elems_per_page, block_waste, cxt_size, cdu_params = 0; @@ -634,6 +1244,92 @@ static void qed_cdu_init_common(struct qed_hwfn *p_hwfn) SET_FIELD(cdu_params, CDUC_BLOCK_WASTE, block_waste); SET_FIELD(cdu_params, CDUC_NCIB, elems_per_page); STORE_RT_REG(p_hwfn, CDU_REG_CID_ADDR_PARAMS_RT_OFFSET, cdu_params); + + /* CDUT - type-0 tasks configuration */ + page_sz = p_hwfn->p_cxt_mngr->clients[ILT_CLI_CDUT].p_size.val; + cxt_size = p_hwfn->p_cxt_mngr->task_type_size[0]; + elems_per_page = ILT_PAGE_IN_BYTES(page_sz) / cxt_size; + block_waste = ILT_PAGE_IN_BYTES(page_sz) - elems_per_page * cxt_size; + + /* cxt size and block-waste are multipes of 8 */ + cdu_params = 0; + SET_FIELD(cdu_params, CDUT_TYPE0_CXT_SIZE, (cxt_size >> 3)); + SET_FIELD(cdu_params, CDUT_TYPE0_BLOCK_WASTE, (block_waste >> 3)); + SET_FIELD(cdu_params, CDUT_TYPE0_NCIB, elems_per_page); + STORE_RT_REG(p_hwfn, CDU_REG_SEGMENT0_PARAMS_RT_OFFSET, cdu_params); + + /* CDUT - type-1 tasks configuration */ + cxt_size = p_hwfn->p_cxt_mngr->task_type_size[1]; + elems_per_page = ILT_PAGE_IN_BYTES(page_sz) / cxt_size; + block_waste = ILT_PAGE_IN_BYTES(page_sz) - elems_per_page * cxt_size; + + /* cxt size and block-waste are multipes of 8 */ + cdu_params = 0; + SET_FIELD(cdu_params, CDUT_TYPE1_CXT_SIZE, (cxt_size >> 3)); + SET_FIELD(cdu_params, CDUT_TYPE1_BLOCK_WASTE, (block_waste >> 3)); + SET_FIELD(cdu_params, CDUT_TYPE1_NCIB, elems_per_page); + STORE_RT_REG(p_hwfn, CDU_REG_SEGMENT1_PARAMS_RT_OFFSET, cdu_params); +} + +/* CDU PF */ +#define CDU_SEG_REG_TYPE_SHIFT CDU_SEG_TYPE_OFFSET_REG_TYPE_SHIFT +#define CDU_SEG_REG_TYPE_MASK 0x1 +#define CDU_SEG_REG_OFFSET_SHIFT 0 +#define CDU_SEG_REG_OFFSET_MASK CDU_SEG_TYPE_OFFSET_REG_OFFSET_MASK + +static void qed_cdu_init_pf(struct qed_hwfn *p_hwfn) +{ + struct qed_ilt_client_cfg *p_cli; + struct qed_tid_seg *p_seg; + u32 cdu_seg_params, offset; + int i; + + static const u32 rt_type_offset_arr[] = { + CDU_REG_PF_SEG0_TYPE_OFFSET_RT_OFFSET, + CDU_REG_PF_SEG1_TYPE_OFFSET_RT_OFFSET, + CDU_REG_PF_SEG2_TYPE_OFFSET_RT_OFFSET, + CDU_REG_PF_SEG3_TYPE_OFFSET_RT_OFFSET + }; + + static const u32 rt_type_offset_fl_arr[] = { + CDU_REG_PF_FL_SEG0_TYPE_OFFSET_RT_OFFSET, + CDU_REG_PF_FL_SEG1_TYPE_OFFSET_RT_OFFSET, + CDU_REG_PF_FL_SEG2_TYPE_OFFSET_RT_OFFSET, + CDU_REG_PF_FL_SEG3_TYPE_OFFSET_RT_OFFSET + }; + + p_cli = &p_hwfn->p_cxt_mngr->clients[ILT_CLI_CDUT]; + + /* There are initializations only for CDUT during pf Phase */ + for (i = 0; i < NUM_TASK_PF_SEGMENTS; i++) { + /* Segment 0 */ + p_seg = qed_cxt_tid_seg_info(p_hwfn, i); + if (!p_seg) + continue; + + /* Note: start_line is already adjusted for the CDU + * segment register granularity, so we just need to + * divide. Adjustment is implicit as we assume ILT + * Page size is larger than 32K! + */ + offset = (ILT_PAGE_IN_BYTES(p_cli->p_size.val) * + (p_cli->pf_blks[CDUT_SEG_BLK(i)].start_line - + p_cli->first.val)) / CDUT_SEG_ALIGNMET_IN_BYTES; + + cdu_seg_params = 0; + SET_FIELD(cdu_seg_params, CDU_SEG_REG_TYPE, p_seg->type); + SET_FIELD(cdu_seg_params, CDU_SEG_REG_OFFSET, offset); + STORE_RT_REG(p_hwfn, rt_type_offset_arr[i], cdu_seg_params); + + offset = (ILT_PAGE_IN_BYTES(p_cli->p_size.va |