diff options
Diffstat (limited to 'net/smc')
-rw-r--r-- | net/smc/af_smc.c | 125 | ||||
-rw-r--r-- | net/smc/smc.h | 6 | ||||
-rw-r--r-- | net/smc/smc_cdc.c | 52 | ||||
-rw-r--r-- | net/smc/smc_cdc.h | 62 | ||||
-rw-r--r-- | net/smc/smc_clc.c | 2 | ||||
-rw-r--r-- | net/smc/smc_close.c | 16 | ||||
-rw-r--r-- | net/smc/smc_core.c | 17 | ||||
-rw-r--r-- | net/smc/smc_core.h | 20 | ||||
-rw-r--r-- | net/smc/smc_diag.c | 3 | ||||
-rw-r--r-- | net/smc/smc_ib.c | 31 | ||||
-rw-r--r-- | net/smc/smc_ib.h | 2 | ||||
-rw-r--r-- | net/smc/smc_llc.c | 3 | ||||
-rw-r--r-- | net/smc/smc_netns.h | 20 | ||||
-rw-r--r-- | net/smc/smc_pnet.c | 671 | ||||
-rw-r--r-- | net/smc/smc_pnet.h | 13 | ||||
-rw-r--r-- | net/smc/smc_tx.c | 81 | ||||
-rw-r--r-- | net/smc/smc_wr.c | 46 | ||||
-rw-r--r-- | net/smc/smc_wr.h | 1 |
18 files changed, 790 insertions, 381 deletions
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index c4da4a78d369..77ef53596d18 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -30,6 +30,10 @@ #include <net/smc.h> #include <asm/ioctls.h> +#include <net/net_namespace.h> +#include <net/netns/generic.h> +#include "smc_netns.h" + #include "smc.h" #include "smc_clc.h" #include "smc_llc.h" @@ -42,8 +46,11 @@ #include "smc_rx.h" #include "smc_close.h" -static DEFINE_MUTEX(smc_create_lgr_pending); /* serialize link group - * creation +static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group + * creation on server + */ +static DEFINE_MUTEX(smc_client_lgr_pending); /* serialize link group + * creation on client */ static void smc_tcp_listen_work(struct work_struct *); @@ -145,32 +152,35 @@ static int smc_release(struct socket *sock) rc = smc_close_active(smc); sock_set_flag(sk, SOCK_DEAD); sk->sk_shutdown |= SHUTDOWN_MASK; - } - if (smc->clcsock) { - if (smc->use_fallback && sk->sk_state == SMC_LISTEN) { + } else { + if (sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_INIT) + sock_put(sk); /* passive closing */ + if (sk->sk_state == SMC_LISTEN) { /* wake up clcsock accept */ rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); } - mutex_lock(&smc->clcsock_release_lock); - sock_release(smc->clcsock); - smc->clcsock = NULL; - mutex_unlock(&smc->clcsock_release_lock); - } - if (smc->use_fallback) { - if (sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_INIT) - sock_put(sk); /* passive closing */ sk->sk_state = SMC_CLOSED; sk->sk_state_change(sk); } + sk->sk_prot->unhash(sk); + + if (sk->sk_state == SMC_CLOSED) { + if (smc->clcsock) { + mutex_lock(&smc->clcsock_release_lock); + sock_release(smc->clcsock); + smc->clcsock = NULL; + mutex_unlock(&smc->clcsock_release_lock); + } + if (!smc->use_fallback) + smc_conn_free(&smc->conn); + } + /* detach socket */ sock_orphan(sk); sock->sk = NULL; - if (!smc->use_fallback && sk->sk_state == SMC_CLOSED) - smc_conn_free(&smc->conn); release_sock(sk); - sk->sk_prot->unhash(sk); sock_put(sk); /* final sock_put */ out: return rc; @@ -289,7 +299,8 @@ static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk, (1UL << SOCK_RXQ_OVFL) | \ (1UL << SOCK_WIFI_STATUS) | \ (1UL << SOCK_NOFCS) | \ - (1UL << SOCK_FILTER_LOCKED)) + (1UL << SOCK_FILTER_LOCKED) | \ + (1UL << SOCK_TSTAMP_NEW)) /* copy only relevant settings and flags of SOL_SOCKET level from smc to * clc socket (since smc is not called for these options from net/core) */ @@ -473,7 +484,12 @@ static int smc_connect_abort(struct smc_sock *smc, int reason_code, { if (local_contact == SMC_FIRST_CONTACT) smc_lgr_forget(smc->conn.lgr); - mutex_unlock(&smc_create_lgr_pending); + if (smc->conn.lgr->is_smcd) + /* there is only one lgr role for SMC-D; use server lock */ + mutex_unlock(&smc_server_lgr_pending); + else + mutex_unlock(&smc_client_lgr_pending); + smc_conn_free(&smc->conn); return reason_code; } @@ -558,7 +574,7 @@ static int smc_connect_rdma(struct smc_sock *smc, struct smc_link *link; int reason_code = 0; - mutex_lock(&smc_create_lgr_pending); + mutex_lock(&smc_client_lgr_pending); local_contact = smc_conn_create(smc, false, aclc->hdr.flag, ibdev, ibport, ntoh24(aclc->qpn), &aclc->lcl, NULL, 0); @@ -569,7 +585,8 @@ static int smc_connect_rdma(struct smc_sock *smc, reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */ else reason_code = SMC_CLC_DECL_INTERR; /* other error */ - return smc_connect_abort(smc, reason_code, 0); + mutex_unlock(&smc_client_lgr_pending); + return reason_code; } link = &smc->conn.lgr->lnk[SMC_SINGLE_LINK]; @@ -613,7 +630,7 @@ static int smc_connect_rdma(struct smc_sock *smc, return smc_connect_abort(smc, reason_code, local_contact); } - mutex_unlock(&smc_create_lgr_pending); + mutex_unlock(&smc_client_lgr_pending); smc_copy_sock_settings_to_clc(smc); if (smc->sk.sk_state == SMC_INIT) @@ -630,11 +647,14 @@ static int smc_connect_ism(struct smc_sock *smc, int local_contact = SMC_FIRST_CONTACT; int rc = 0; - mutex_lock(&smc_create_lgr_pending); + /* there is only one lgr role for SMC-D; use server lock */ + mutex_lock(&smc_server_lgr_pending); local_contact = smc_conn_create(smc, true, aclc->hdr.flag, NULL, 0, 0, NULL, ismdev, aclc->gid); - if (local_contact < 0) - return smc_connect_abort(smc, SMC_CLC_DECL_MEM, 0); + if (local_contact < 0) { + mutex_unlock(&smc_server_lgr_pending); + return SMC_CLC_DECL_MEM; + } /* Create send and receive buffers */ if (smc_buf_create(smc, true)) @@ -648,7 +668,7 @@ static int smc_connect_ism(struct smc_sock *smc, rc = smc_clc_send_confirm(smc); if (rc) return smc_connect_abort(smc, rc, local_contact); - mutex_unlock(&smc_create_lgr_pending); + mutex_unlock(&smc_server_lgr_pending); smc_copy_sock_settings_to_clc(smc); if (smc->sk.sk_state == SMC_INIT) @@ -1247,7 +1267,7 @@ static void smc_listen_work(struct work_struct *work) return; } - mutex_lock(&smc_create_lgr_pending); + mutex_lock(&smc_server_lgr_pending); smc_close_init(new_smc); smc_rx_init(new_smc); smc_tx_init(new_smc); @@ -1269,7 +1289,7 @@ static void smc_listen_work(struct work_struct *work) &local_contact) || smc_listen_rdma_reg(new_smc, local_contact))) { /* SMC not supported, decline */ - mutex_unlock(&smc_create_lgr_pending); + mutex_unlock(&smc_server_lgr_pending); smc_listen_decline(new_smc, SMC_CLC_DECL_MODEUNSUPP, local_contact); return; @@ -1278,29 +1298,33 @@ static void smc_listen_work(struct work_struct *work) /* send SMC Accept CLC message */ rc = smc_clc_send_accept(new_smc, local_contact); if (rc) { - mutex_unlock(&smc_create_lgr_pending); + mutex_unlock(&smc_server_lgr_pending); smc_listen_decline(new_smc, rc, local_contact); return; } + /* SMC-D does not need this lock any more */ + if (ism_supported) + mutex_unlock(&smc_server_lgr_pending); + /* receive SMC Confirm CLC message */ reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc), SMC_CLC_CONFIRM, CLC_WAIT_TIME); if (reason_code) { - mutex_unlock(&smc_create_lgr_pending); + if (!ism_supported) + mutex_unlock(&smc_server_lgr_pending); smc_listen_decline(new_smc, reason_code, local_contact); return; } /* finish worker */ if (!ism_supported) { - if (smc_listen_rdma_finish(new_smc, &cclc, local_contact)) { - mutex_unlock(&smc_create_lgr_pending); + rc = smc_listen_rdma_finish(new_smc, &cclc, local_contact); + mutex_unlock(&smc_server_lgr_pending); + if (rc) return; - } } smc_conn_save_peer_info(new_smc, &cclc); - mutex_unlock(&smc_create_lgr_pending); smc_listen_out_connected(new_smc); } @@ -1503,6 +1527,11 @@ static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, smc = smc_sk(sk); lock_sock(sk); + if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) { + /* socket was connected before, no more data to read */ + rc = 0; + goto out; + } if ((sk->sk_state == SMC_INIT) || (sk->sk_state == SMC_LISTEN) || (sk->sk_state == SMC_CLOSED)) @@ -1838,7 +1867,11 @@ static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos, smc = smc_sk(sk); lock_sock(sk); - + if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) { + /* socket was connected before, no more data to read */ + rc = 0; + goto out; + } if (sk->sk_state == SMC_INIT || sk->sk_state == SMC_LISTEN || sk->sk_state == SMC_CLOSED) @@ -1937,10 +1970,33 @@ static const struct net_proto_family smc_sock_family_ops = { .create = smc_create, }; +unsigned int smc_net_id; + +static __net_init int smc_net_init(struct net *net) +{ + return smc_pnet_net_init(net); +} + +static void __net_exit smc_net_exit(struct net *net) +{ + smc_pnet_net_exit(net); +} + +static struct pernet_operations smc_net_ops = { + .init = smc_net_init, + .exit = smc_net_exit, + .id = &smc_net_id, + .size = sizeof(struct smc_net), +}; + static int __init smc_init(void) { int rc; + rc = register_pernet_subsys(&smc_net_ops); + if (rc) + return rc; + rc = smc_pnet_init(); if (rc) return rc; @@ -2006,6 +2062,7 @@ static void __exit smc_exit(void) proto_unregister(&smc_proto6); proto_unregister(&smc_proto); smc_pnet_exit(); + unregister_pernet_subsys(&smc_net_ops); } module_init(smc_init); diff --git a/net/smc/smc.h b/net/smc/smc.h index 5721416d0605..adbdf195eb08 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -113,9 +113,9 @@ struct smc_host_cdc_msg { /* Connection Data Control message */ } __aligned(8); enum smc_urg_state { - SMC_URG_VALID, /* data present */ - SMC_URG_NOTYET, /* data pending */ - SMC_URG_READ /* data was already read */ + SMC_URG_VALID = 1, /* data present */ + SMC_URG_NOTYET = 2, /* data pending */ + SMC_URG_READ = 3, /* data was already read */ }; struct smc_connection { diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index db83332ac1c8..d0b0f4c865b4 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -21,13 +21,6 @@ /********************************** send *************************************/ -struct smc_cdc_tx_pend { - struct smc_connection *conn; /* socket connection */ - union smc_host_cursor cursor; /* tx sndbuf cursor sent */ - union smc_host_cursor p_cursor; /* rx RMBE cursor produced */ - u16 ctrl_seq; /* conn. tx sequence # */ -}; - /* handler for send/transmission completion of a CDC msg */ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd, struct smc_link *link, @@ -61,12 +54,14 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd, int smc_cdc_get_free_slot(struct smc_connection *conn, struct smc_wr_buf **wr_buf, + struct smc_rdma_wr **wr_rdma_buf, struct smc_cdc_tx_pend **pend) { struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK]; int rc; rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf, + wr_rdma_buf, (struct smc_wr_tx_pend_priv **)pend); if (!conn->alert_token_local) /* abnormal termination */ @@ -96,6 +91,7 @@ int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf, struct smc_cdc_tx_pend *pend) { + union smc_host_cursor cfed; struct smc_link *link; int rc; @@ -105,12 +101,12 @@ int smc_cdc_msg_send(struct smc_connection *conn, conn->tx_cdc_seq++; conn->local_tx_ctrl.seqno = conn->tx_cdc_seq; - smc_host_msg_to_cdc((struct smc_cdc_msg *)wr_buf, - &conn->local_tx_ctrl, conn); + smc_host_msg_to_cdc((struct smc_cdc_msg *)wr_buf, conn, &cfed); rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend); - if (!rc) - smc_curs_copy(&conn->rx_curs_confirmed, - &conn->local_tx_ctrl.cons, conn); + if (!rc) { + smc_curs_copy(&conn->rx_curs_confirmed, &cfed, conn); + conn->local_rx_ctrl.prod_flags.cons_curs_upd_req = 0; + } return rc; } @@ -121,11 +117,14 @@ static int smcr_cdc_get_slot_and_msg_send(struct smc_connection *conn) struct smc_wr_buf *wr_buf; int rc; - rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend); + rc = smc_cdc_get_free_slot(conn, &wr_buf, NULL, &pend); if (rc) return rc; - return smc_cdc_msg_send(conn, wr_buf, pend); + spin_lock_bh(&conn->send_lock); + rc = smc_cdc_msg_send(conn, wr_buf, pend); + spin_unlock_bh(&conn->send_lock); + return rc; } int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn) @@ -195,6 +194,7 @@ int smcd_cdc_msg_send(struct smc_connection *conn) if (rc) return rc; smc_curs_copy(&conn->rx_curs_confirmed, &curs, conn); + conn->local_rx_ctrl.prod_flags.cons_curs_upd_req = 0; /* Calculate transmitted data and increment free send buffer space */ diff = smc_curs_diff(conn->sndbuf_desc->len, &conn->tx_curs_fin, &conn->tx_curs_sent); @@ -271,26 +271,18 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc, smp_mb__after_atomic(); smc->sk.sk_data_ready(&smc->sk); } else { - if (conn->local_rx_ctrl.prod_flags.write_blocked || - conn->local_rx_ctrl.prod_flags.cons_curs_upd_req || - conn->local_rx_ctrl.prod_flags.urg_data_pending) { - if (conn->local_rx_ctrl.prod_flags.urg_data_pending) - conn->urg_state = SMC_URG_NOTYET; - /* force immediate tx of current consumer cursor, but - * under send_lock to guarantee arrival in seqno-order - */ - if (smc->sk.sk_state != SMC_INIT) - smc_tx_sndbuf_nonempty(conn); - } + if (conn->local_rx_ctrl.prod_flags.write_blocked) + smc->sk.sk_data_ready(&smc->sk); + if (conn->local_rx_ctrl.prod_flags.urg_data_pending) + conn->urg_state = SMC_URG_NOTYET; } - /* piggy backed tx info */ /* trigger sndbuf consumer: RDMA write into peer RMBE and CDC */ - if (diff_cons && smc_tx_prepared_sends(conn)) { + if ((diff_cons && smc_tx_prepared_sends(conn)) || + conn->local_rx_ctrl.prod_flags.cons_curs_upd_req || + conn->local_rx_ctrl.prod_flags.urg_data_pending) smc_tx_sndbuf_nonempty(conn); - /* trigger socket release if connection closed */ - smc_close_wake_tx_prepared(smc); - } + if (diff_cons && conn->urg_tx_pend && atomic_read(&conn->peer_rmbe_space) == conn->peer_rmbe_size) { /* urg data confirmed by peer, indicate we're ready for more */ diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h index b5bfe38c7f9b..861dc24c588c 100644 --- a/net/smc/smc_cdc.h +++ b/net/smc/smc_cdc.h @@ -160,7 +160,9 @@ static inline void smcd_curs_copy(union smcd_cdc_cursor *tgt, #endif } -/* calculate cursor difference between old and new, where old <= new */ +/* calculate cursor difference between old and new, where old <= new and + * difference cannot exceed size + */ static inline int smc_curs_diff(unsigned int size, union smc_host_cursor *old, union smc_host_cursor *new) @@ -185,28 +187,51 @@ static inline int smc_curs_comp(unsigned int size, return smc_curs_diff(size, old, new); } +/* calculate cursor difference between old and new, where old <= new and + * difference may exceed size + */ +static inline int smc_curs_diff_large(unsigned int size, + union smc_host_cursor *old, + union smc_host_cursor *new) +{ + if (old->wrap < new->wrap) + return min_t(int, + (size - old->count) + new->count + + (new->wrap - old->wrap - 1) * size, + size); + + if (old->wrap > new->wrap) /* wrap has switched from 0xffff to 0x0000 */ + return min_t(int, + (size - old->count) + new->count + + (new->wrap + 0xffff - old->wrap) * size, + size); + + return max_t(int, 0, (new->count - old->count)); +} + static inline void smc_host_cursor_to_cdc(union smc_cdc_cursor *peer, union smc_host_cursor *local, + union smc_host_cursor *save, struct smc_connection *conn) { - union smc_host_cursor temp; - - smc_curs_copy(&temp, local, conn); - peer->count = htonl(temp.count); - peer->wrap = htons(temp.wrap); + smc_curs_copy(save, local, conn); + peer->count = htonl(save->count); + peer->wrap = htons(save->wrap); /* peer->reserved = htons(0); must be ensured by caller */ } static inline void smc_host_msg_to_cdc(struct smc_cdc_msg *peer, - struct smc_host_cdc_msg *local, - struct smc_connection *conn) + struct smc_connection *conn, + union smc_host_cursor *save) { + struct smc_host_cdc_msg *local = &conn->local_tx_ctrl; + peer->common.type = local->common.type; peer->len = local->len; peer->seqno = htons(local->seqno); peer->token = htonl(local->token); - smc_host_cursor_to_cdc(&peer->prod, &local->prod, conn); - smc_host_cursor_to_cdc(&peer->cons, &local->cons, conn); + smc_host_cursor_to_cdc(&peer->prod, &local->prod, save, conn); + smc_host_cursor_to_cdc(&peer->cons, &local->cons, save, conn); peer->prod_flags = local->prod_flags; peer->conn_state_flags = local->conn_state_flags; } @@ -245,17 +270,18 @@ static inline void smcr_cdc_msg_to_host(struct smc_host_cdc_msg *local, } static inline void smcd_cdc_msg_to_host(struct smc_host_cdc_msg *local, - struct smcd_cdc_msg *peer) + struct smcd_cdc_msg *peer, + struct smc_connection *conn) { union smc_host_cursor temp; temp.wrap = peer->prod.wrap; temp.count = peer->prod.count; - atomic64_set(&local->prod.acurs, atomic64_read(&temp.acurs)); + smc_curs_copy(&local->prod, &temp, conn); temp.wrap = peer->cons.wrap; temp.count = peer->cons.count; - atomic64_set(&local->cons.acurs, atomic64_read(&temp.acurs)); + smc_curs_copy(&local->cons, &temp, conn); local->prod_flags = peer->cons.prod_flags; local->conn_state_flags = peer->cons.conn_state_flags; } @@ -265,15 +291,21 @@ static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local, struct smc_connection *conn) { if (conn->lgr->is_smcd) - smcd_cdc_msg_to_host(local, (struct smcd_cdc_msg *)peer); + smcd_cdc_msg_to_host(local, (struct smcd_cdc_msg *)peer, conn); else smcr_cdc_msg_to_host(local, peer, conn); } -struct smc_cdc_tx_pend; +struct smc_cdc_tx_pend { + struct smc_connection *conn; /* socket connection */ + union smc_host_cursor cursor; /* tx sndbuf cursor sent */ + union smc_host_cursor p_cursor; /* rx RMBE cursor produced */ + u16 ctrl_seq; /* conn. tx sequence # */ +}; int smc_cdc_get_free_slot(struct smc_connection *conn, struct smc_wr_buf **wr_buf, + struct smc_rdma_wr **wr_rdma_buf, struct smc_cdc_tx_pend **pend); void smc_cdc_tx_dismiss_slots(struct smc_connection *conn); int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf, diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 776e9dfc915d..d53fd588d1f5 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -378,7 +378,7 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info) vec.iov_len = sizeof(struct smc_clc_msg_decline); len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(struct smc_clc_msg_decline)); - if (len < sizeof(struct smc_clc_msg_decline)) + if (len < 0 || len < sizeof(struct smc_clc_msg_decline)) len = -EPROTO; return len > 0 ? 0 : len; } diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index ea2b87f29469..2ad37e998509 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -345,14 +345,7 @@ static void smc_close_passive_work(struct work_struct *work) switch (sk->sk_state) { case SMC_INIT: - if (atomic_read(&conn->bytes_to_rcv) || - (rxflags->peer_done_writing && - !smc_cdc_rxed_any_close(conn))) { - sk->sk_state = SMC_APPCLOSEWAIT1; - } else { - sk->sk_state = SMC_CLOSED; - sock_put(sk); /* passive closing */ - } + sk->sk_state = SMC_APPCLOSEWAIT1; break; case SMC_ACTIVE: sk->sk_state = SMC_APPCLOSEWAIT1; @@ -405,8 +398,13 @@ wakeup: if (old_state != sk->sk_state) { sk->sk_state_change(sk); if ((sk->sk_state == SMC_CLOSED) && - (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) + (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) { smc_conn_free(conn); + if (smc->clcsock) { + sock_release(smc->clcsock); + smc->clcsock = NULL; + } + } } release_sock(sk); sock_put(sk); /* sock_hold done by schedulers of close_work */ diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 35c1cdc93e1c..53a17cfa61af 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -118,7 +118,6 @@ static void __smc_lgr_unregister_conn(struct smc_connection *conn) rb_erase(&conn->alert_node, &lgr->conns_all); lgr->conns_num--; conn->alert_token_local = 0; - conn->lgr = NULL; sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */ } @@ -128,6 +127,8 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn) { struct smc_link_group *lgr = conn->lgr; + if (!lgr) + return; write_lock_bh(&lgr->conns_lock); if (conn->alert_token_local) { __smc_lgr_unregister_conn(conn); @@ -159,8 +160,6 @@ static void smc_lgr_free_work(struct work_struct *work) bool conns; spin_lock_bh(&smc_lgr_list.lock); - if (list_empty(&lgr->list)) - goto free; read_lock_bh(&lgr->conns_lock); conns = RB_EMPTY_ROOT(&lgr->conns_all); read_unlock_bh(&lgr->conns_lock); @@ -168,8 +167,8 @@ static void smc_lgr_free_work(struct work_struct *work) spin_unlock_bh(&smc_lgr_list.lock); return; } - list_del_init(&lgr->list); /* remove from smc_lgr_list */ -free: + if (!list_empty(&lgr->list)) + list_del_init(&lgr->list); /* remove from smc_lgr_list */ spin_unlock_bh(&smc_lgr_list.lock); if (!lgr->is_smcd && !lgr->terminating) { @@ -300,13 +299,13 @@ static void smc_buf_unuse(struct smc_connection *conn, conn->sndbuf_desc->used = 0; if (conn->rmb_desc) { if (!conn->rmb_desc->regerr) { - conn->rmb_desc->used = 0; if (!lgr->is_smcd) { /* unregister rmb with peer */ smc_llc_do_delete_rkey( &lgr->lnk[SMC_SINGLE_LINK], conn->rmb_desc); } + conn->rmb_desc->used = 0; } else { /* buf registration failed, reuse not possible */ write_lock_bh(&lgr->rmbs_lock); @@ -331,8 +330,9 @@ void smc_conn_free(struct smc_connection *conn) } else { smc_cdc_tx_dismiss_slots(conn); } - smc_lgr_unregister_conn(conn); /* unsets conn->lgr */ + smc_lgr_unregister_conn(conn); smc_buf_unuse(conn, lgr); /* allow buffer reuse */ + conn->lgr = NULL; if (!lgr->conns_num) smc_lgr_schedule_free_work(lgr); @@ -462,6 +462,7 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr) sock_hold(&smc->sk); /* sock_put in close work */ conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; __smc_lgr_unregister_conn(conn); + conn->lgr = NULL; write_unlock_bh(&lgr->conns_lock); if (!schedule_work(&conn->close_work)) sock_put(&smc->sk); @@ -628,6 +629,8 @@ int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact, local_contact = SMC_REUSE_CONTACT; conn->lgr = lgr; smc_lgr_register_conn(conn); /* add smc conn to lgr */ + if (delayed_work_pending(&lgr->free_work)) + cancel_delayed_work(&lgr->free_work); write_unlock_bh(&lgr->conns_lock); break; } diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index b00287989a3d..8806d2afa6ed 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -52,6 +52,24 @@ enum smc_wr_reg_state { FAILED /* ib_wr_reg_mr response: failure */ }; +struct smc_rdma_sge { /* sges for RDMA writes */ + struct ib_sge wr_tx_rdma_sge[SMC_IB_MAX_SEND_SGE]; +}; + +#define SMC_MAX_RDMA_WRITES 2 /* max. # of RDMA writes per + * message send + */ + +struct smc_rdma_sges { /* sges per message send */ + struct smc_rdma_sge tx_rdma_sge[SMC_MAX_RDMA_WRITES]; +}; + +struct smc_rdma_wr { /* work requests per message + * send + */ + struct ib_rdma_wr wr_tx_rdma[SMC_MAX_RDMA_WRITES]; +}; + struct smc_link { struct smc_ib_device *smcibdev; /* ib-device */ u8 ibport; /* port - values 1 | 2 */ @@ -64,6 +82,8 @@ struct smc_link { struct smc_wr_buf *wr_tx_bufs; /* WR send payload buffers */ struct ib_send_wr *wr_tx_ibs; /* WR send meta data */ struct ib_sge *wr_tx_sges; /* WR send gather meta data */ + struct smc_rdma_sges *wr_tx_rdma_sges;/*RDMA WRITE gather meta data*/ + struct smc_rdma_wr *wr_tx_rdmas; /* WR RDMA WRITE */ struct smc_wr_tx_pend *wr_tx_pends; /* WR send waiting for CQE */ /* above four vectors have wr_tx_cnt elements and use the same index */ dma_addr_t wr_tx_dma_addr; /* DMA address of wr_tx_bufs */ diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index dbf64a93d68a..371b4cf31fcd 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -38,6 +38,7 @@ static void smc_diag_msg_common_fill(struct smc_diag_msg *r, struct sock *sk) { struct smc_sock *smc = smc_sk(sk); + r->diag_family = sk->sk_family; if (!smc->clcsock) return; r->id.idiag_sport = htons(smc->clcsock->sk->sk_num); @@ -45,14 +46,12 @@ static void smc_diag_msg_common_fill(struct smc_diag_msg *r, struct sock *sk) r->id.idiag_if = smc->clcsock->sk->sk_bound_dev_if; sock_diag_save_cookie(sk, r->id.idiag_cookie); if (sk->sk_protocol == SMCPROTO_SMC) { - r->diag_family = PF_INET; memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); r->id.idiag_src[0] = smc->clcsock->sk->sk_rcv_saddr; r->id.idiag_dst[0] = smc->clcsock->sk->sk_daddr; #if IS_ENABLED(CONFIG_IPV6) } else if (sk->sk_protocol == SMCPROTO_SMC6) { - r->diag_family = PF_INET6; memcpy(&r->id.idiag_src, &smc->clcsock->sk->sk_v6_rcv_saddr, sizeof(smc->clcsock->sk->sk_v6_rcv_saddr)); memcpy(&r->id.idiag_dst, &smc->clcsock->sk->sk_v6_daddr, diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index e519ef29c0ff..53f429c04843 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -257,12 +257,20 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler, smcibdev = container_of(handler, struct smc_ib_device, event_handler); switch (ibevent->event) { - case IB_EVENT_PORT_ERR: case IB_EVENT_DEVICE_FATAL: + /* terminate all ports on device */ + for (port_idx = 0; port_idx < SMC_MAX_PORTS; port_idx++) + set_bit(port_idx, &smcibdev->port_event_mask); + schedule_work(&smcibdev->port_event_work); + break; + case IB_EVENT_PORT_ERR: case IB_EVENT_PORT_ACTIVE: + case IB_EVENT_GID_CHANGE: port_idx = ibevent->element.port_num - 1; - set_bit(port_idx, &smcibdev->port_event_mask); - schedule_work(&smcibdev->port_event_work); + if (port_idx < SMC_MAX_PORTS) { + set_bit(port_idx, &smcibdev->port_event_mask); + schedule_work(&smcibdev->port_event_work); + } break; default: break; @@ -289,18 +297,18 @@ int smc_ib_create_protection_domain(struct smc_link *lnk) static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv) { - struct smc_ib_device *smcibdev = - (struct smc_ib_device *)ibevent->device; + struct smc_link *lnk = (struct smc_link *)priv; + struct smc_ib_device *smcibdev = lnk->smcibdev; u8 port_idx; switch (ibevent->event) { - case IB_EVENT_DEVICE_FATAL: - case IB_EVENT_GID_CHANGE: - case IB_EVENT_PORT_ERR: + case IB_EVENT_QP_FATAL: case IB_EVENT_QP_ACCESS_ERR: - port_idx = ibevent->element.port_num - 1; - set_bit(port_idx, &smcibdev->port_event_mask); - schedule_work(&smcibdev->port_event_work); + port_idx = ibevent->element.qp->port - 1; + if (port_idx < SMC_MAX_PORTS) { + set_bit(port_idx, &smcibdev->port_event_mask); + schedule_work(&smcibdev->port_event_work); + } break; default: break; @@ -556,7 +564,6 @@ static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data) spin_lock(&smc_ib_devices.lock); list_del_init(&smcibdev->list); /* remove from smc_ib_devices */ spin_unlock(&smc_ib_devices.lock); - smc_pnet_remove_by_ibdev(smcibdev); smc_ib_cleanup_per_ibdev(smcibdev); ib_unregister_event_handler(&smcibdev->event_handler); kfree(smcibdev); diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h index bac7fd65a4c0..da60ab9e8d70 100644 --- a/net/smc/smc_ib.h +++ b/net/smc/smc_ib.h @@ -42,6 +42,8 @@ struct smc_ib_device { /* ib-device infos for smc */ /* mac address per port*/ u8 pnetid[SMC_MAX_PORTS][SMC_MAX_PNETID_LEN]; /* pnetid per port */ + bool pnetid_by_user[SMC_MAX_PORTS]; + /* pnetid defined by user? */ u8 initialized : 1; /* ib dev CQ, evthdl done */ struct work_struct port_event_work; unsigned long port_event_mask; diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index a6d3623d06f4..4fd60c522802 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -166,7 +166,8 @@ static int smc_llc_add_pending_send(struct smc_link *link, { int rc; - rc = smc_wr_tx_get_free_slot(link, smc_llc_tx_handler, wr_buf, pend); + rc = smc_wr_tx_get_free_slot(link, smc_llc_tx_handler, wr_buf, NULL, + pend); if (rc < 0) return rc; BUILD_BUG_ON_MSG( diff --git a/net/smc/smc_netns.h b/net/smc/smc_netns.h new file mode 100644 index 000000000000..e7a8fc4ae02f --- /dev/null +++ b/net/smc/smc_netns.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Shared Memory Communications + * + * Network namespace definitions. + * + * Copyright IBM Corp. 2018 + */ + +#ifndef SMC_NETNS_H +#define SMC_NETNS_H + +#include "smc_pnet.h" + +extern unsigned int smc_net_id; + +/* per-network namespace private data */ +struct smc_net { + struct smc_pnettable pnettable; +}; +#endif diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 7cb3e4f07c10..8d2f6296279c 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -20,14 +20,21 @@ #include <rdma/ib_verbs.h> +#include <net/netns/generic.h> +#include "smc_netns.h" + #include "smc_pnet.h" #include "smc_ib.h" #include "smc_ism.h" +#define SMC_ASCII_BLANK 32 + |