From 226f7a7d97e37220a442f52eb85ebff2cd6fc0d2 Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Thu, 30 Jun 2016 16:11:10 -0700 Subject: RDS: Rework path specific indirections Refactor code to avoid separate indirections for single-path and multipath transports. All transports (both single and mp-capable) will get a pointer to the rds_conn_path, and can trivially derive the rds_connection from the ->cp_conn. Acked-by: Santosh Shilimkar Signed-off-by: Sowmini Varadhan Signed-off-by: David S. Miller --- net/rds/connection.c | 5 +---- net/rds/ib.c | 4 ++-- net/rds/ib.h | 4 ++-- net/rds/ib_cm.c | 3 ++- net/rds/ib_send.c | 3 ++- net/rds/loop.c | 4 ++-- net/rds/rds.h | 3 --- net/rds/send.c | 16 ++++------------ net/rds/tcp.c | 6 +++--- net/rds/tcp.h | 6 +++--- net/rds/tcp_connect.c | 7 ++++--- net/rds/tcp_send.c | 8 ++++---- 12 files changed, 29 insertions(+), 40 deletions(-) diff --git a/net/rds/connection.c b/net/rds/connection.c index a4b07c899d89..17c2f2591ac4 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -326,10 +326,7 @@ void rds_conn_shutdown(struct rds_conn_path *cp) wait_event(cp->cp_waitq, !test_bit(RDS_RECV_REFILL, &cp->cp_flags)); - if (!conn->c_trans->t_mp_capable) - conn->c_trans->conn_shutdown(conn); - else - conn->c_trans->conn_path_shutdown(cp); + conn->c_trans->conn_path_shutdown(cp); rds_conn_path_reset(cp); if (!rds_conn_path_transition(cp, RDS_CONN_DISCONNECTING, diff --git a/net/rds/ib.c b/net/rds/ib.c index 44946a681a8c..1b29ec9445fc 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -381,7 +381,7 @@ void rds_ib_exit(void) struct rds_transport rds_ib_transport = { .laddr_check = rds_ib_laddr_check, - .xmit_complete = rds_ib_xmit_complete, + .xmit_path_complete = rds_ib_xmit_path_complete, .xmit = rds_ib_xmit, .xmit_rdma = rds_ib_xmit_rdma, .xmit_atomic = rds_ib_xmit_atomic, @@ -389,7 +389,7 @@ struct rds_transport rds_ib_transport = { .conn_alloc = rds_ib_conn_alloc, .conn_free = rds_ib_conn_free, .conn_connect = rds_ib_conn_connect, - .conn_shutdown = rds_ib_conn_shutdown, + .conn_path_shutdown = rds_ib_conn_path_shutdown, .inc_copy_to_user = rds_ib_inc_copy_to_user, .inc_free = rds_ib_inc_free, .cm_initiate_connect = rds_ib_cm_initiate_connect, diff --git a/net/rds/ib.h b/net/rds/ib.h index 627fb79aee65..2051f4bd7a66 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -329,7 +329,7 @@ extern struct list_head ib_nodev_conns; int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp); void rds_ib_conn_free(void *arg); int rds_ib_conn_connect(struct rds_connection *conn); -void rds_ib_conn_shutdown(struct rds_connection *conn); +void rds_ib_conn_path_shutdown(struct rds_conn_path *cp); void rds_ib_state_change(struct sock *sk); int rds_ib_listen_init(void); void rds_ib_listen_stop(void); @@ -384,7 +384,7 @@ u32 rds_ib_ring_completed(struct rds_ib_work_ring *ring, u32 wr_id, u32 oldest); extern wait_queue_head_t rds_ib_ring_empty_wait; /* ib_send.c */ -void rds_ib_xmit_complete(struct rds_connection *conn); +void rds_ib_xmit_path_complete(struct rds_conn_path *cp); int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, unsigned int hdr_off, unsigned int sg, unsigned int off); void rds_ib_send_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc); diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index e48bb1ba3dfc..e34ea0b5c16a 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -731,8 +731,9 @@ out: * so that it can be called at any point during startup. In fact it * can be called multiple times for a given connection. */ -void rds_ib_conn_shutdown(struct rds_connection *conn) +void rds_ib_conn_path_shutdown(struct rds_conn_path *cp) { + struct rds_connection *conn = cp->cp_conn; struct rds_ib_connection *ic = conn->c_transport_data; int err = 0; diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index 6e4110aa5135..84d90c97332f 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -980,8 +980,9 @@ out: return ret; } -void rds_ib_xmit_complete(struct rds_connection *conn) +void rds_ib_xmit_path_complete(struct rds_conn_path *cp) { + struct rds_connection *conn = cp->cp_conn; struct rds_ib_connection *ic = conn->c_transport_data; /* We may have a pending ACK or window update we were unable diff --git a/net/rds/loop.c b/net/rds/loop.c index 15f83db78f0c..318c21d7d8d4 100644 --- a/net/rds/loop.c +++ b/net/rds/loop.c @@ -156,7 +156,7 @@ static int rds_loop_conn_connect(struct rds_connection *conn) return 0; } -static void rds_loop_conn_shutdown(struct rds_connection *conn) +static void rds_loop_conn_path_shutdown(struct rds_conn_path *cp) { } @@ -189,7 +189,7 @@ struct rds_transport rds_loop_transport = { .conn_alloc = rds_loop_conn_alloc, .conn_free = rds_loop_conn_free, .conn_connect = rds_loop_conn_connect, - .conn_shutdown = rds_loop_conn_shutdown, + .conn_path_shutdown = rds_loop_conn_path_shutdown, .inc_copy_to_user = rds_message_inc_copy_to_user, .inc_free = rds_loop_inc_free, .t_name = "loopback", diff --git a/net/rds/rds.h b/net/rds/rds.h index 2e35b738176f..5bbad08262f5 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -455,11 +455,8 @@ struct rds_transport { int (*conn_alloc)(struct rds_connection *conn, gfp_t gfp); void (*conn_free)(void *data); int (*conn_connect)(struct rds_connection *conn); - void (*conn_shutdown)(struct rds_connection *conn); void (*conn_path_shutdown)(struct rds_conn_path *conn); - void (*xmit_prepare)(struct rds_connection *conn); void (*xmit_path_prepare)(struct rds_conn_path *cp); - void (*xmit_complete)(struct rds_connection *conn); void (*xmit_path_complete)(struct rds_conn_path *cp); int (*xmit)(struct rds_connection *conn, struct rds_message *rm, unsigned int hdr_off, unsigned int sg, unsigned int off); diff --git a/net/rds/send.c b/net/rds/send.c index ee43d6b2ea8f..5a9caf1da896 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -183,12 +183,8 @@ restart: goto out; } - if (conn->c_trans->t_mp_capable) { - if (conn->c_trans->xmit_path_prepare) - conn->c_trans->xmit_path_prepare(cp); - } else if (conn->c_trans->xmit_prepare) { - conn->c_trans->xmit_prepare(conn); - } + if (conn->c_trans->xmit_path_prepare) + conn->c_trans->xmit_path_prepare(cp); /* * spin trying to push headers and data down the connection until @@ -403,12 +399,8 @@ restart: } over_batch: - if (conn->c_trans->t_mp_capable) { - if (conn->c_trans->xmit_path_complete) - conn->c_trans->xmit_path_complete(cp); - } else if (conn->c_trans->xmit_complete) { - conn->c_trans->xmit_complete(conn); - } + if (conn->c_trans->xmit_path_complete) + conn->c_trans->xmit_path_complete(cp); release_in_xmit(cp); /* Nuke any messages we decided not to retransmit. */ diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 5217d49ce6d6..b139630daaa4 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -340,14 +340,14 @@ static void rds_tcp_exit(void); struct rds_transport rds_tcp_transport = { .laddr_check = rds_tcp_laddr_check, - .xmit_prepare = rds_tcp_xmit_prepare, - .xmit_complete = rds_tcp_xmit_complete, + .xmit_path_prepare = rds_tcp_xmit_path_prepare, + .xmit_path_complete = rds_tcp_xmit_path_complete, .xmit = rds_tcp_xmit, .recv = rds_tcp_recv, .conn_alloc = rds_tcp_conn_alloc, .conn_free = rds_tcp_conn_free, .conn_connect = rds_tcp_conn_connect, - .conn_shutdown = rds_tcp_conn_shutdown, + .conn_path_shutdown = rds_tcp_conn_path_shutdown, .inc_copy_to_user = rds_tcp_inc_copy_to_user, .inc_free = rds_tcp_inc_free, .stats_info_copy = rds_tcp_stats_info_copy, diff --git a/net/rds/tcp.h b/net/rds/tcp.h index 7940babf6c71..728abe22c9a3 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -61,7 +61,7 @@ void rds_tcp_accept_work(struct sock *sk); /* tcp_connect.c */ int rds_tcp_conn_connect(struct rds_connection *conn); -void rds_tcp_conn_shutdown(struct rds_connection *conn); +void rds_tcp_conn_path_shutdown(struct rds_conn_path *conn); void rds_tcp_state_change(struct sock *sk); /* tcp_listen.c */ @@ -80,8 +80,8 @@ void rds_tcp_inc_free(struct rds_incoming *inc); int rds_tcp_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to); /* tcp_send.c */ -void rds_tcp_xmit_prepare(struct rds_connection *conn); -void rds_tcp_xmit_complete(struct rds_connection *conn); +void rds_tcp_xmit_path_prepare(struct rds_conn_path *cp); +void rds_tcp_xmit_path_complete(struct rds_conn_path *cp); int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm, unsigned int hdr_off, unsigned int sg, unsigned int off); void rds_tcp_write_space(struct sock *sk); diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index 96c2c4d17909..aa65c1631c4b 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -144,12 +144,13 @@ out: * callbacks to those set by TCP. Our callbacks won't execute again once we * hold the sock lock. */ -void rds_tcp_conn_shutdown(struct rds_connection *conn) +void rds_tcp_conn_path_shutdown(struct rds_conn_path *cp) { - struct rds_tcp_connection *tc = conn->c_transport_data; + struct rds_tcp_connection *tc = cp->cp_transport_data; struct socket *sock = tc->t_sock; - rdsdebug("shutting down conn %p tc %p sock %p\n", conn, tc, sock); + rdsdebug("shutting down conn %p tc %p sock %p\n", + cp->cp_conn, tc, sock); if (sock) { sock->ops->shutdown(sock, RCV_SHUTDOWN | SEND_SHUTDOWN); diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index 710f1aae97ad..52cda947457b 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c @@ -49,16 +49,16 @@ static void rds_tcp_cork(struct socket *sock, int val) set_fs(oldfs); } -void rds_tcp_xmit_prepare(struct rds_connection *conn) +void rds_tcp_xmit_path_prepare(struct rds_conn_path *cp) { - struct rds_tcp_connection *tc = conn->c_transport_data; + struct rds_tcp_connection *tc = cp->cp_transport_data; rds_tcp_cork(tc->t_sock, 1); } -void rds_tcp_xmit_complete(struct rds_connection *conn) +void rds_tcp_xmit_path_complete(struct rds_conn_path *cp) { - struct rds_tcp_connection *tc = conn->c_transport_data; + struct rds_tcp_connection *tc = cp->cp_transport_data; rds_tcp_cork(tc->t_sock, 0); } -- cgit v1.2.3 From 26e4e6bb683028546f339018ab4cd394300a92a4 Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Thu, 30 Jun 2016 16:11:11 -0700 Subject: RDS: TCP: Remove dead logic around c_passive in rds-tcp The c_passive bit is only intended for the IB transport and will never be encountered in rds-tcp, so remove the dead logic that predicates on this bit. Acked-by: Santosh Shilimkar Signed-off-by: Sowmini Varadhan Signed-off-by: David S. Miller --- net/rds/tcp.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/net/rds/tcp.c b/net/rds/tcp.c index b139630daaa4..c56fff28084f 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -329,11 +329,8 @@ static void rds_tcp_destroy_conns(void) INIT_LIST_HEAD(&rds_tcp_conn_list); spin_unlock_irq(&rds_tcp_conn_lock); - list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) { - if (tc->conn->c_passive) - rds_conn_destroy(tc->conn->c_passive); + list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) rds_conn_destroy(tc->conn); - } } static void rds_tcp_exit(void); @@ -512,8 +509,6 @@ static void rds_tcp_kill_sock(struct net *net) sk = tc->t_sock->sk; sk->sk_prot->disconnect(sk, 0); tcp_done(sk); - if (tc->conn->c_passive) - rds_conn_destroy(tc->conn->c_passive); rds_conn_destroy(tc->conn); } } -- cgit v1.2.3 From 02105b2ccdd6344146e0296172a9e0f17ff624ef Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Thu, 30 Jun 2016 16:11:12 -0700 Subject: RDS: TCP: Make rds_tcp_connection track the rds_conn_path The struct rds_tcp_connection is the transport-specific private data structure that tracks TCP information per rds_conn_path. Modify this structure to have a back-pointer to the rds_conn_path for which it is the ->cp_transport_data. Acked-by: Santosh Shilimkar Signed-off-by: Sowmini Varadhan Signed-off-by: David S. Miller --- net/rds/connection.c | 30 +++++++++++++++--------------- net/rds/tcp.c | 44 +++++++++++++++++++++++++------------------- net/rds/tcp.h | 6 +++--- net/rds/tcp_connect.c | 6 +++--- net/rds/tcp_listen.c | 4 ++-- 5 files changed, 48 insertions(+), 42 deletions(-) diff --git a/net/rds/connection.c b/net/rds/connection.c index 17c2f2591ac4..1b0c2a783b5e 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -253,9 +253,12 @@ static struct rds_connection *__rds_conn_create(struct net *net, for (i = 0; i < RDS_MPATH_WORKERS; i++) { cp = &conn->c_path[i]; - trans->conn_free(cp->cp_transport_data); - if (!trans->t_mp_capable) - break; + /* The ->conn_alloc invocation may have + * allocated resource for all paths, so all + * of them may have to be freed here. + */ + if (cp->cp_transport_data) + trans->conn_free(cp->cp_transport_data); } kmem_cache_free(rds_conn_slab, conn); conn = found; @@ -367,6 +370,9 @@ static void rds_conn_path_destroy(struct rds_conn_path *cp) { struct rds_message *rm, *rtmp; + if (!cp->cp_transport_data) + return; + rds_conn_path_drop(cp); flush_work(&cp->cp_down_w); @@ -398,6 +404,8 @@ static void rds_conn_path_destroy(struct rds_conn_path *cp) void rds_conn_destroy(struct rds_connection *conn) { unsigned long flags; + int i; + struct rds_conn_path *cp; rdsdebug("freeing conn %p for %pI4 -> " "%pI4\n", conn, &conn->c_laddr, @@ -410,18 +418,10 @@ void rds_conn_destroy(struct rds_connection *conn) synchronize_rcu(); /* shut the connection down */ - if (!conn->c_trans->t_mp_capable) { - rds_conn_path_destroy(&conn->c_path[0]); - BUG_ON(!list_empty(&conn->c_path[0].cp_retrans)); - } else { - int i; - struct rds_conn_path *cp; - - for (i = 0; i < RDS_MPATH_WORKERS; i++) { - cp = &conn->c_path[i]; - rds_conn_path_destroy(cp); - BUG_ON(!list_empty(&cp->cp_retrans)); - } + for (i = 0; i < RDS_MPATH_WORKERS; i++) { + cp = &conn->c_path[i]; + rds_conn_path_destroy(cp); + BUG_ON(!list_empty(&cp->cp_retrans)); } /* diff --git a/net/rds/tcp.c b/net/rds/tcp.c index c56fff28084f..c6b47f670990 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -221,7 +221,7 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn) sock->sk->sk_data_ready = sock->sk->sk_user_data; tc->t_sock = sock; - tc->conn = conn; + tc->t_cpath = &conn->c_path[0]; tc->t_orig_data_ready = sock->sk->sk_data_ready; tc->t_orig_write_space = sock->sk->sk_write_space; tc->t_orig_state_change = sock->sk->sk_state_change; @@ -284,24 +284,29 @@ static int rds_tcp_laddr_check(struct net *net, __be32 addr) static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp) { struct rds_tcp_connection *tc; + int i; - tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp); - if (!tc) - return -ENOMEM; + for (i = 0; i < RDS_MPATH_WORKERS; i++) { + tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp); + if (!tc) + return -ENOMEM; - mutex_init(&tc->t_conn_lock); - tc->t_sock = NULL; - tc->t_tinc = NULL; - tc->t_tinc_hdr_rem = sizeof(struct rds_header); - tc->t_tinc_data_rem = 0; + mutex_init(&tc->t_conn_path_lock); + tc->t_sock = NULL; + tc->t_tinc = NULL; + tc->t_tinc_hdr_rem = sizeof(struct rds_header); + tc->t_tinc_data_rem = 0; - conn->c_transport_data = tc; + conn->c_path[i].cp_transport_data = tc; + tc->t_cpath = &conn->c_path[i]; - spin_lock_irq(&rds_tcp_conn_lock); - list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list); - spin_unlock_irq(&rds_tcp_conn_lock); + spin_lock_irq(&rds_tcp_conn_lock); + list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list); + spin_unlock_irq(&rds_tcp_conn_lock); + rdsdebug("rds_conn_path [%d] tc %p\n", i, + conn->c_path[i].cp_transport_data); + } - rdsdebug("alloced tc %p\n", conn->c_transport_data); return 0; } @@ -330,7 +335,7 @@ static void rds_tcp_destroy_conns(void) spin_unlock_irq(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) - rds_conn_destroy(tc->conn); + rds_conn_destroy(tc->t_cpath->cp_conn); } static void rds_tcp_exit(void); @@ -498,7 +503,7 @@ static void rds_tcp_kill_sock(struct net *net) flush_work(&rtn->rds_tcp_accept_w); spin_lock_irq(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { - struct net *c_net = read_pnet(&tc->conn->c_net); + struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net); if (net != c_net || !tc->t_sock) continue; @@ -509,7 +514,7 @@ static void rds_tcp_kill_sock(struct net *net) sk = tc->t_sock->sk; sk->sk_prot->disconnect(sk, 0); tcp_done(sk); - rds_conn_destroy(tc->conn); + rds_conn_destroy(tc->t_cpath->cp_conn); } } @@ -547,12 +552,13 @@ static void rds_tcp_sysctl_reset(struct net *net) spin_lock_irq(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { - struct net *c_net = read_pnet(&tc->conn->c_net); + struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net); if (net != c_net || !tc->t_sock) continue; - rds_conn_drop(tc->conn); /* reconnect with new parameters */ + /* reconnect with new parameters */ + rds_conn_path_drop(tc->t_cpath); } spin_unlock_irq(&rds_tcp_conn_lock); } diff --git a/net/rds/tcp.h b/net/rds/tcp.h index 728abe22c9a3..e1ff16908c5e 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -11,11 +11,11 @@ struct rds_tcp_incoming { struct rds_tcp_connection { struct list_head t_tcp_node; - struct rds_connection *conn; - /* t_conn_lock synchronizes the connection establishment between + struct rds_conn_path *t_cpath; + /* t_conn_path_lock synchronizes the connection establishment between * rds_tcp_accept_one and rds_tcp_conn_connect */ - struct mutex t_conn_lock; + struct mutex t_conn_path_lock; struct socket *t_sock; void *t_orig_write_space; void *t_orig_data_ready; diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index aa65c1631c4b..146692c8afac 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -82,10 +82,10 @@ int rds_tcp_conn_connect(struct rds_connection *conn) int ret; struct rds_tcp_connection *tc = conn->c_transport_data; - mutex_lock(&tc->t_conn_lock); + mutex_lock(&tc->t_conn_path_lock); if (rds_conn_up(conn)) { - mutex_unlock(&tc->t_conn_lock); + mutex_unlock(&tc->t_conn_path_lock); return 0; } ret = sock_create_kern(rds_conn_net(conn), PF_INET, @@ -129,7 +129,7 @@ int rds_tcp_conn_connect(struct rds_connection *conn) } out: - mutex_unlock(&tc->t_conn_lock); + mutex_unlock(&tc->t_conn_path_lock); if (sock) sock_release(sock); return ret; diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index f9cc945a77b3..d8933469ab13 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -121,7 +121,7 @@ int rds_tcp_accept_one(struct socket *sock) */ rs_tcp = (struct rds_tcp_connection *)conn->c_transport_data; rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING); - mutex_lock(&rs_tcp->t_conn_lock); + mutex_lock(&rs_tcp->t_conn_path_lock); conn_state = rds_conn_state(conn); if (conn_state != RDS_CONN_CONNECTING && conn_state != RDS_CONN_UP) goto rst_nsk; @@ -156,7 +156,7 @@ rst_nsk: ret = 0; out: if (rs_tcp) - mutex_unlock(&rs_tcp->t_conn_lock); + mutex_unlock(&rs_tcp->t_conn_path_lock); if (new_sock) sock_release(new_sock); return ret; -- cgit v1.2.3 From afb4164d91c7486a1d4ab098a1b88e27b5e25772 Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Thu, 30 Jun 2016 16:11:13 -0700 Subject: RDS: TCP: Refactor connection destruction to handle multiple paths A single rds_connection may have multiple rds_conn_paths that have to be carefully and correctly destroyed, for both rmmod and netns-delete cases. For both cases, we extract a single rds_tcp_connection for each conn into a temporary list, and then invoke rds_conn_destroy() which iteratively dismantles every path in the rds_connection. For the netns deletion case, we additionally have to make sure that we do not leave a socket in TIME_WAIT state, as this will hold up the netns deletion. Thus we call rds_tcp_conn_paths_destroy() to reset state quickly. Acked-by: Santosh Shilimkar Signed-off-by: Sowmini Varadhan Signed-off-by: David S. Miller --- net/rds/tcp.c | 46 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 39 insertions(+), 7 deletions(-) diff --git a/net/rds/tcp.c b/net/rds/tcp.c index c6b47f670990..b32772759c9d 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -323,6 +323,17 @@ static void rds_tcp_conn_free(void *arg) kmem_cache_free(rds_tcp_conn_slab, tc); } +static bool list_has_conn(struct list_head *list, struct rds_connection *conn) +{ + struct rds_tcp_connection *tc, *_tc; + + list_for_each_entry_safe(tc, _tc, list, t_tcp_node) { + if (tc->t_cpath->cp_conn == conn) + return true; + } + return false; +} + static void rds_tcp_destroy_conns(void) { struct rds_tcp_connection *tc, *_tc; @@ -330,8 +341,10 @@ static void rds_tcp_destroy_conns(void) /* avoid calling conn_destroy with irqs off */ spin_lock_irq(&rds_tcp_conn_lock); - list_splice(&rds_tcp_conn_list, &tmp_list); - INIT_LIST_HEAD(&rds_tcp_conn_list); + list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { + if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn)) + list_move_tail(&tc->t_tcp_node, &tmp_list); + } spin_unlock_irq(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) @@ -491,10 +504,30 @@ static struct pernet_operations rds_tcp_net_ops = { .size = sizeof(struct rds_tcp_net), }; +/* explicitly send a RST on each socket, thereby releasing any socket refcnts + * that may otherwise hold up netns deletion. + */ +static void rds_tcp_conn_paths_destroy(struct rds_connection *conn) +{ + struct rds_conn_path *cp; + struct rds_tcp_connection *tc; + int i; + struct sock *sk; + + for (i = 0; i < RDS_MPATH_WORKERS; i++) { + cp = &conn->c_path[i]; + tc = cp->cp_transport_data; + if (!tc->t_sock) + continue; + sk = tc->t_sock->sk; + sk->sk_prot->disconnect(sk, 0); + tcp_done(sk); + } +} + static void rds_tcp_kill_sock(struct net *net) { struct rds_tcp_connection *tc, *_tc; - struct sock *sk; LIST_HEAD(tmp_list); struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); @@ -507,13 +540,12 @@ static void rds_tcp_kill_sock(struct net *net) if (net != c_net || !tc->t_sock) continue; - list_move_tail(&tc->t_tcp_node, &tmp_list); + if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn)) + list_move_tail(&tc->t_tcp_node, &tmp_list); } spin_unlock_irq(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) { - sk = tc->t_sock->sk; - sk->sk_prot->disconnect(sk, 0); - tcp_done(sk); + rds_tcp_conn_paths_destroy(tc->t_cpath->cp_conn); rds_conn_destroy(tc->t_cpath->cp_conn); } } -- cgit v1.2.3 From ea3b1ea53930879c9847044f5cb9c97411cae797 Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Thu, 30 Jun 2016 16:11:14 -0700 Subject: RDS: TCP: make ->sk_user_data point to a rds_conn_path The socket callbacks should all operate on a struct rds_conn_path, in preparation for a MP capable RDS-TCP. Acked-by: Santosh Shilimkar Signed-off-by: Sowmini Varadhan Signed-off-by: David S. Miller --- net/rds/tcp.c | 25 +++++++++++++------------ net/rds/tcp.h | 4 ++-- net/rds/tcp_connect.c | 16 ++++++++-------- net/rds/tcp_listen.c | 12 ++++++------ net/rds/tcp_recv.c | 12 ++++++------ net/rds/tcp_send.c | 12 ++++++------ 6 files changed, 41 insertions(+), 40 deletions(-) diff --git a/net/rds/tcp.c b/net/rds/tcp.c index b32772759c9d..5658f3e9f601 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -136,9 +136,9 @@ void rds_tcp_restore_callbacks(struct socket *sock, * from being called while it isn't set. */ void rds_tcp_reset_callbacks(struct socket *sock, - struct rds_connection *conn) + struct rds_conn_path *cp) { - struct rds_tcp_connection *tc = conn->c_transport_data; + struct rds_tcp_connection *tc = cp->cp_transport_data; struct socket *osock = tc->t_sock; if (!osock) @@ -148,8 +148,8 @@ void rds_tcp_reset_callbacks(struct socket *sock, * We have an outstanding SYN to this peer, which may * potentially have transitioned to the RDS_CONN_UP state, * so we must quiesce any send threads before resetting - * c_transport_data. We quiesce these threads by setting - * c_state to something other than RDS_CONN_UP, and then + * cp_transport_data. We quiesce these threads by setting + * cp_state to something other than RDS_CONN_UP, and then * waiting for any existing threads in rds_send_xmit to * complete release_in_xmit(). (Subsequent threads entering * rds_send_xmit() will bail on !rds_conn_up(). @@ -164,8 +164,8 @@ void rds_tcp_reset_callbacks(struct socket *sock, * RDS_CONN_RESETTTING, to ensure that rds_tcp_state_change * cannot mark rds_conn_path_up() in the window before lock_sock() */ - atomic_set(&conn->c_state, RDS_CONN_RESETTING); - wait_event(conn->c_waitq, !test_bit(RDS_IN_XMIT, &conn->c_flags)); + atomic_set(&cp->cp_state, RDS_CONN_RESETTING); + wait_event(cp->cp_waitq, !test_bit(RDS_IN_XMIT, &cp->cp_flags)); lock_sock(osock->sk); /* reset receive side state for rds_tcp_data_recv() for osock */ if (tc->t_tinc) { @@ -186,11 +186,12 @@ void rds_tcp_reset_callbacks(struct socket *sock, release_sock(osock->sk); sock_release(osock); newsock: - rds_send_path_reset(&conn->c_path[0]); + rds_send_path_reset(cp); lock_sock(sock->sk); write_lock_bh(&sock->sk->sk_callback_lock); tc->t_sock = sock; - sock->sk->sk_user_data = conn; + tc->t_cpath = cp; + sock->sk->sk_user_data = cp; sock->sk->sk_data_ready = rds_tcp_data_ready; sock->sk->sk_write_space = rds_tcp_write_space; sock->sk->sk_state_change = rds_tcp_state_change; @@ -203,9 +204,9 @@ newsock: * above rds_tcp_reset_callbacks for notes about synchronization * with data path */ -void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn) +void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp) { - struct rds_tcp_connection *tc = conn->c_transport_data; + struct rds_tcp_connection *tc = cp->cp_transport_data; rdsdebug("setting sock %p callbacks to tc %p\n", sock, tc); write_lock_bh(&sock->sk->sk_callback_lock); @@ -221,12 +222,12 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn) sock->sk->sk_data_ready = sock->sk->sk_user_data; tc->t_sock = sock; - tc->t_cpath = &conn->c_path[0]; + tc->t_cpath = cp; tc->t_orig_data_ready = sock->sk->sk_data_ready; tc->t_orig_write_space = sock->sk->sk_write_space; tc->t_orig_state_change = sock->sk->sk_state_change; - sock->sk->sk_user_data = conn; + sock->sk->sk_user_data = cp; sock->sk->sk_data_ready = rds_tcp_data_ready; sock->sk->sk_write_space = rds_tcp_write_space; sock->sk->sk_state_change = rds_tcp_state_change; diff --git a/net/rds/tcp.h b/net/rds/tcp.h index e1ff16908c5e..151b09d1af43 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -49,8 +49,8 @@ struct rds_tcp_statistics { /* tcp.c */ void rds_tcp_tune(struct socket *sock); void rds_tcp_nonagle(struct socket *sock); -void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn); -void rds_tcp_reset_callbacks(struct socket *sock, struct rds_connection *conn); +void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp); +void rds_tcp_reset_callbacks(struct socket *sock, struct rds_conn_path *cp); void rds_tcp_restore_callbacks(struct socket *sock, struct rds_tcp_connection *tc); u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc); diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index 146692c8afac..7eddce50e7a3 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -41,16 +41,16 @@ void rds_tcp_state_change(struct sock *sk) { void (*state_change)(struct sock *sk); - struct rds_connection *conn; + struct rds_conn_path *cp; struct rds_tcp_connection *tc; read_lock_bh(&sk->sk_callback_lock); - conn = sk->sk_user_data; - if (!conn) { + cp = sk->sk_user_data; + if (!cp) { state_change = sk->sk_state_change; goto out; } - tc = conn->c_transport_data; + tc = cp->cp_transport_data; state_change = tc->t_orig_state_change; rdsdebug("sock %p state_change to %d\n", tc->t_sock, sk->sk_state); @@ -61,12 +61,11 @@ void rds_tcp_state_change(struct sock *sk) case TCP_SYN_RECV: break; case TCP_ESTABLISHED: - rds_connect_path_complete(&conn->c_path[0], - RDS_CONN_CONNECTING); + rds_connect_path_complete(cp, RDS_CONN_CONNECTING); break; case TCP_CLOSE_WAIT: case TCP_CLOSE: - rds_conn_drop(conn); + rds_conn_path_drop(cp); default: break; } @@ -81,6 +80,7 @@ int rds_tcp_conn_connect(struct rds_connection *conn) struct sockaddr_in src, dest; int ret; struct rds_tcp_connection *tc = conn->c_transport_data; + struct rds_conn_path *cp = &conn->c_path[0]; mutex_lock(&tc->t_conn_path_lock); @@ -114,7 +114,7 @@ int rds_tcp_conn_connect(struct rds_connection *conn) * once we call connect() we can start getting callbacks and they * own the socket */ - rds_tcp_set_callbacks(sock, conn); + rds_tcp_set_callbacks(sock, cp); ret = sock->ops->connect(sock, (struct sockaddr *)&dest, sizeof(dest), O_NONBLOCK); diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index d8933469ab13..ca975a217a49 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -79,6 +79,7 @@ int rds_tcp_accept_one(struct socket *sock) struct inet_sock *inet; struct rds_tcp_connection *rs_tcp = NULL; int conn_state; + struct rds_conn_path *cp; if (!sock) /* module unload or netns delete in progress */ return -ENETUNREACH; @@ -120,6 +121,7 @@ int rds_tcp_accept_one(struct socket *sock) * rds_tcp_state_change() will do that cleanup */ rs_tcp = (struct rds_tcp_connection *)conn->c_transport_data; + cp = &conn->c_path[0]; rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING); mutex_lock(&rs_tcp->t_conn_path_lock); conn_state = rds_conn_state(conn); @@ -136,16 +138,14 @@ int rds_tcp_accept_one(struct socket *sock) !conn->c_path[0].cp_outgoing) { goto rst_nsk; } else { - rds_tcp_reset_callbacks(new_sock, conn); + rds_tcp_reset_callbacks(new_sock, cp); conn->c_path[0].cp_outgoing = 0; /* rds_connect_path_complete() marks RDS_CONN_UP */ - rds_connect_path_complete(&conn->c_path[0], - RDS_CONN_RESETTING); + rds_connect_path_complete(cp, RDS_CONN_RESETTING); } } else { - rds_tcp_set_callbacks(new_sock, conn); - rds_connect_path_complete(&conn->c_path[0], - RDS_CONN_CONNECTING); + rds_tcp_set_callbacks(new_sock, cp); + rds_connect_path_complete(cp, RDS_CONN_CONNECTING); } new_sock = NULL; ret = 0; diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c index 4a87d9ef3084..aa7a79a00ef7 100644 --- a/net/rds/tcp_recv.c +++ b/net/rds/tcp_recv.c @@ -297,24 +297,24 @@ int rds_tcp_recv(struct rds_connection *conn) void rds_tcp_data_ready(struct sock *sk) { void (*ready)(struct sock *sk); - struct rds_connection *conn; + struct rds_conn_path *cp; struct rds_tcp_connection *tc; rdsdebug("data ready sk %p\n", sk); read_lock_bh(&sk->sk_callback_lock); - conn = sk->sk_user_data; - if (!conn) { /* check for teardown race */ + cp = sk->sk_user_data; + if (!cp) { /* check for teardown race */ ready = sk->sk_data_ready; goto out; } - tc = conn->c_transport_data; + tc = cp->cp_transport_data; ready = tc->t_orig_data_ready; rds_tcp_stats_inc(s_tcp_data_ready_calls); - if (rds_tcp_read_sock(conn, GFP_ATOMIC) == -ENOMEM) - queue_delayed_work(rds_wq, &conn->c_recv_w, 0); + if (rds_tcp_read_sock(cp->cp_conn, GFP_ATOMIC) == -ENOMEM) + queue_delayed_work(rds_wq, &cp->cp_recv_w, 0); out: read_unlock_bh(&sk->sk_callback_lock); ready(sk); diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index 52cda947457b..57e0f5826406 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c @@ -178,27 +178,27 @@ static int rds_tcp_is_acked(struct rds_message *rm, uint64_t ack) void rds_tcp_write_space(struct sock *sk) { void (*write_space)(struct sock *sk); - struct rds_connection *conn; + struct rds_conn_path *cp; struct rds_tcp_connection *tc; read_lock_bh(&sk->sk_callback_lock); - conn = sk->sk_user_data; - if (!conn) { + cp = sk->sk_user_data; + if (!cp) { write_space = sk->sk_write_space; goto out; } - tc = conn->c_transport_data; + tc = cp->cp_transport_data; rdsdebug("write_space for tc %p\n", tc); write_space = tc->t_orig_write_space; rds_tcp_stats_inc(s_tcp_write_space_calls); rdsdebug("tcp una %u\n", rds_tcp_snd_una(tc)); tc->t_last_seen_una = rds_tcp_snd_una(tc); - rds_send_drop_acked(conn, rds_tcp_snd_una(tc), rds_tcp_is_acked); + rds_send_path_drop_acked(cp, rds_tcp_snd_una(tc), rds_tcp_is_acked); if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) - queue_delayed_work(rds_wq, &conn->c_send_w, 0); + queue_delayed_work(rds_wq, &cp->cp_send_w, 0); out: read_unlock_bh(&sk->sk_callback_lock); -- cgit v1.2.3 From 2da43c4a1b517d02e71d9611a2242273e7d399ba Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Thu, 30 Jun 2016 16:11:15 -0700 Subject: RDS: TCP: make receive path use the rds_conn_path The ->sk_user_data contains a pointer to the rds_conn_path for the socket. Use this consistently in the rds_tcp_data_ready callbacks to get the rds_conn_path for rds_recv_incoming. Acked-by: Santosh Shilimkar Signed-off-by: Sowmini Varadhan Signed-off-by: David S. Miller --- net/rds/ib.c | 2 +- net/rds/ib.h | 2 +- net/rds/ib_recv.c | 3 ++- net/rds/loop.c | 4 ++-- net/rds/rds.h | 2 +- net/rds/tcp.c | 2 +- net/rds/tcp.h | 2 +- net/rds/tcp_recv.c | 29 ++++++++++++++++------------- net/rds/threads.c | 2 +- 9 files changed, 26 insertions(+), 22 deletions(-) diff --git a/net/rds/ib.c b/net/rds/ib.c index 1b29ec9445fc..e6ba85671004 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -385,7 +385,7 @@ struct rds_transport rds_ib_transport = { .xmit = rds_ib_xmit, .xmit_rdma = rds_ib_xmit_rdma, .xmit_atomic = rds_ib_xmit_atomic, - .recv = rds_ib_recv, + .recv_path = rds_ib_recv_path, .conn_alloc = rds_ib_conn_alloc, .conn_free = rds_ib_conn_free, .conn_connect = rds_ib_conn_connect, diff --git a/net/rds/ib.h b/net/rds/ib.h index 2051f4bd7a66..579de7e6369c 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -354,7 +354,7 @@ void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc); /* ib_recv.c */ int rds_ib_recv_init(void); void rds_ib_recv_exit(void); -int rds_ib_recv(struct rds_connection *conn); +int rds_ib_recv_path(struct rds_conn_path *conn); int rds_ib_recv_alloc_caches(struct rds_ib_connection *ic); void rds_ib_recv_free_caches(struct rds_ib_connection *ic); void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp); diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index 4ea8cb17cc7a..606a11f681d2 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -1009,8 +1009,9 @@ void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic, rds_ib_recv_refill(conn, 0, GFP_NOWAIT); } -int rds_ib_recv(struct rds_connection *conn) +int rds_ib_recv_path(struct rds_conn_path *cp) { + struct rds_connection *conn = cp->cp_conn; struct rds_ib_connection *ic = conn->c_transport_data; int ret = 0; diff --git a/net/rds/loop.c b/net/rds/loop.c index 318c21d7d8d4..20284a4dca91 100644 --- a/net/rds/loop.c +++ b/net/rds/loop.c @@ -102,7 +102,7 @@ static void rds_loop_inc_free(struct rds_incoming *inc) } /* we need to at least give the thread something to succeed */ -static int rds_loop_recv(struct rds_connection *conn) +static int rds_loop_recv_path(struct rds_conn_path *cp) { return 0; } @@ -185,7 +185,7 @@ void rds_loop_exit(void) */ struct rds_transport rds_loop_transport = { .xmit = rds_loop_xmit, - .recv = rds_loop_recv, + .recv_path = rds_loop_recv_path, .conn_alloc = rds_loop_conn_alloc, .conn_free = rds_loop_conn_free, .conn_connect = rds_loop_conn_connect, diff --git a/net/rds/rds.h b/net/rds/rds.h index 5bbad08262f5..0faca3011370 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -462,7 +462,7 @@ struct rds_transport { unsigned int hdr_off, unsigned int sg, unsigned int off); int (*xmit_rdma)(struct rds_connection *conn, struct rm_rdma_op *op); int (*xmit_atomic)(struct rds_connection *conn, struct rm_atomic_op *op); - int (*recv)(struct rds_connection *conn); + int (*recv_path)(struct rds_conn_path *cp); int (*inc_copy_to_user)(struct rds_incoming *inc, struct iov_iter *to); void (*inc_free)(struct rds_incoming *inc); diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 5658f3e9f601..7bc136c66dbe 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -359,7 +359,7 @@ struct rds_transport rds_tcp_transport = { .xmit_path_prepare = rds_tcp_xmit_path_prepare, .xmit_path_complete = rds_tcp_xmit_path_complete, .xmit = rds_tcp_xmit, - .recv = rds_tcp_recv, + .recv_path = rds_tcp_recv_path, .conn_alloc = rds_tcp_conn_alloc, .conn_free = rds_tcp_conn_free, .conn_connect = rds_tcp_conn_connect, diff --git a/net/rds/tcp.h b/net/rds/tcp.h index 151b09d1af43..5a5f91abe1de 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -75,7 +75,7 @@ int rds_tcp_keepalive(struct socket *sock); int rds_tcp_recv_init(void); void rds_tcp_recv_exit(void); void rds_tcp_data_ready(struct sock *sk); -int rds_tcp_recv(struct rds_connection *conn); +int rds_tcp_recv_path(struct rds_conn_path *cp); void rds_tcp_inc_free(struct rds_incoming *inc); int rds_tcp_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to); diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c index aa7a79a00ef7..ad4892e97f91 100644 --- a/net/rds/tcp_recv.c +++ b/net/rds/tcp_recv.c @@ -34,7 +34,6 @@ #include #include -#include "rds_single_path.h" #include "rds.h" #include "tcp.h" @@ -148,7 +147,7 @@ static void rds_tcp_cong_recv(struct rds_connection *conn, } struct rds_tcp_desc_arg { - struct rds_connection *conn; + struct rds_conn_path *conn_path; gfp_t gfp; }; @@ -156,8 +155,8 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb, unsigned int offset, size_t len) { struct rds_tcp_desc_arg *arg = desc->arg.data; - struct rds_connection *conn = arg->conn; - struct rds_tcp_connection *tc = conn->c_transport_data; + struct rds_conn_path *cp = arg->conn_path; + struct rds_tcp_connection *tc = cp->cp_transport_data; struct rds_tcp_incoming *tinc = tc->t_tinc; struct sk_buff *clone; size_t left = len, to_copy; @@ -179,7 +178,8 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb, } tc->t_tinc = tinc; rdsdebug("alloced tinc %p\n", tinc); - rds_inc_init(&tinc->ti_inc, conn, conn->c_faddr); + rds_inc_path_init(&tinc->ti_inc, cp, + cp->cp_conn->c_faddr); /* * XXX * we might be able to use the __ variants when * we've already serialized at a higher level. @@ -229,6 +229,8 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb, } if (tc->t_tinc_hdr_rem == 0 && tc->t_tinc_data_rem == 0) { + struct rds_connection *conn = cp->cp_conn; + if (tinc->ti_inc.i_hdr.h_flags == RDS_FLAG_CONG_BITMAP) rds_tcp_cong_recv(conn, tinc); else @@ -251,15 +253,15 @@ out: } /* the caller has to hold the sock lock */ -static int rds_tcp_read_sock(struct rds_connection *conn, gfp_t gfp) +static int rds_tcp_read_sock(struct rds_conn_path *cp, gfp_t gfp) { - struct rds_tcp_connection *tc = conn->c_transport_data; + struct rds_tcp_connection *tc = cp->cp_transport_data; struct socket *sock = tc->t_sock; read_descriptor_t desc; struct rds_tcp_desc_arg arg; /* It's like glib in the kernel! */ - arg.conn = conn; + arg.conn_path = cp; arg.gfp = gfp; desc.arg.data = &arg; desc.error = 0; @@ -279,16 +281,17 @@ static int rds_tcp_read_sock(struct rds_connection *conn, gfp_t gfp) * if we fail to allocate we're in trouble.. blindly wait some time before * trying again to see if the VM can free up something for us. */ -int rds_tcp_recv(struct rds_connection *conn) +int rds_tcp_recv_path(struct rds_conn_path *cp) { - struct rds_tcp_connection *tc = conn->c_transport_data; + struct rds_tcp_connection *tc = cp->cp_transport_data; struct socket *sock = tc->t_sock; int ret = 0; - rdsdebug("recv worker conn %p tc %p sock %p\n", conn, tc, sock); + rdsdebug("recv worker path [%d] tc %p sock %p\n", + cp->cp_index, tc, sock); lock_sock(sock->sk); - ret = rds_tcp_read_sock(conn, GFP_KERNEL); + ret = rds_tcp_read_sock(cp, GFP_KERNEL); release_sock(sock->sk); return ret; @@ -313,7 +316,7 @@ void rds_tcp_data_ready(struct sock *sk) ready = tc->t_orig_data_ready; rds_tcp_stats_inc(s_tcp_data_ready_calls); - if (rds_tcp_read_sock(cp->cp_conn, GFP_ATOMIC) == -ENOMEM) + if (rds_tcp_read_sock(cp, GFP_ATOMIC) == -ENOMEM) queue_delayed_work(rds_wq, &cp->cp_recv_w, 0); out: read_unlock_bh(&sk->sk_callback_lock); diff --git a/net/rds/threads.c b/net/rds/threads.c index 9fbe95bb14a9..f717b69e03f9 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -203,7 +203,7 @@ void rds_recv_worker(struct work_struct *work) int ret; if (rds_conn_path_state(cp) == RDS_CONN_UP) { - ret = cp->cp_conn->c_trans->recv(cp->cp_conn); + ret = cp->cp_conn->c_trans->recv_path(cp); rdsdebug("conn %p ret %d\n", cp->cp_conn, ret); switch (ret) { case -EAGAIN: -- cgit v1.2.3 From b04e8554f7637999af8f54cca4dcfcf49f2ae7c8 Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Thu, 30 Jun 2016 16:11:16 -0700 Subject: RDS: TCP: Hooks to set up a single connection path This patch adds ->conn_path_connect callbacks in the rds_transport that are used to set up a single connection path. Acked-by: Santosh Shilimkar Signed-off-by: Sowmini Varadhan Signed-off-by: David S. Miller --- net/rds/ib.c | 2 +- net/rds/ib.h | 2 +- net/rds/ib_cm.c | 3 ++- net/rds/loop.c | 6 +++--- net/rds/rds.h | 2 +- net/rds/tcp.c | 2 +- net/rds/tcp.h | 4 ++-- net/rds/tcp_connect.c | 11 ++++++----- net/rds/threads.c | 5 +++-- 9 files changed, 20 insertions(+), 17 deletions(-) diff --git a/net/rds/ib.c b/net/rds/ib.c index e6ba85671004..7eaf887e46f8 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -388,7 +388,7 @@ struct rds_transport rds_ib_transport = { .recv_path = rds_ib_recv_path, .conn_alloc = rds_ib_conn_alloc, .conn_free = rds_ib_conn_free, - .conn_connect = rds_ib_conn_connect, + .conn_path_connect = rds_ib_conn_path_connect, .conn_path_shutdown = rds_ib_conn_path_shutdown, .inc_copy_to_user = rds_ib_inc_copy_to_user, .inc_free = rds_ib_inc_free, diff --git a/net/rds/ib.h b/net/rds/ib.h index 579de7e6369c..046f7508c06b 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -328,7 +328,7 @@ extern struct list_head ib_nodev_conns; /* ib_cm.c */ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp); void rds_ib_conn_free(void *arg); -int rds_ib_conn_connect(struct rds_connection *conn); +int rds_ib_conn_path_connect(struct rds_conn_path *cp); void rds_ib_conn_path_shutdown(struct rds_conn_path *cp); void rds_ib_state_change(struct sock *sk); int rds_ib_listen_init(void); diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index e34ea0b5c16a..5b2ab95afa07 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -685,8 +685,9 @@ out: return ret; } -int rds_ib_conn_connect(struct rds_connection *conn) +int rds_ib_conn_path_connect(struct rds_conn_path *cp) { + struct rds_connection *conn = cp->cp_conn; struct rds_ib_connection *ic = conn->c_transport_data; struct sockaddr_in src, dest; int ret; diff --git a/net/rds/loop.c b/net/rds/loop.c index 20284a4dca91..f2bf78de5688 100644 --- a/net/rds/loop.c +++ b/net/rds/loop.c @@ -150,9 +150,9 @@ static void rds_loop_conn_free(void *arg) kfree(lc); } -static int rds_loop_conn_connect(struct rds_connection *conn) +static int rds_loop_conn_path_connect(struct rds_conn_path *cp) { - rds_connect_complete(conn); + rds_connect_complete(cp->cp_conn); return 0; } @@ -188,7 +188,7 @@ struct rds_transport rds_loop_transport = { .recv_path = rds_loop_recv_path, .conn_alloc = rds_loop_conn_alloc, .conn_free = rds_loop_conn_free, - .conn_connect = rds_loop_conn_connect, + .conn_path_connect = rds_loop_conn_path_connect, .conn_path_shutdown = rds_loop_conn_path_shutdown, .inc_copy_to_user = rds_message_inc_copy_to_user, .inc_free = rds_loop_inc_free, diff --git a/net/rds/rds.h b/net/rds/rds.h index 0faca3011370..6ef07bd27227 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -454,7 +454,7 @@ struct rds_transport { int (*laddr_check)(struct net *net, __be32 addr); int (*conn_alloc)(struct rds_connection *conn, gfp_t gfp); void (*conn_free)(void *data); - int (*conn_connect)(struct rds_connection *conn); + int (*conn_path_connect)(struct rds_conn_path *cp); void (*conn_path_shutdown)(struct rds_conn_path *conn); void (*xmit_path_prepare)(struct rds_conn_path *cp); void (*xmit_path_complete)(struct rds_conn_path *cp); diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 7bc136c66dbe..d278432f080b 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -362,7 +362,7 @@ struct rds_transport rds_tcp_transport = { .recv_path = rds_tcp_recv_path, .conn_alloc = rds_tcp_conn_alloc, .conn_free = rds_tcp_conn_free, - .conn_connect = rds_tcp_conn_connect, + .conn_path_connect = rds_tcp_conn_path_connect, .conn_path_shutdown = rds_tcp_conn_path_shutdown, .inc_copy_to_user = rds_tcp_inc_copy_to_user, .inc_free = rds_tcp_inc_free, diff --git a/net/rds/tcp.h b/net/rds/tcp.h index 5a5f91abe1de..1c3160faa963 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -13,7 +13,7 @@ struct rds_tcp_connection { struct list_head t_tcp_node; struct rds_conn_path *t_cpath; /* t_conn_path_lock synchronizes the connection establishment between - * rds_tcp_accept_one and rds_tcp_conn_connect + * rds_tcp_accept_one and rds_tcp_conn_path_connect */ struct mutex t_conn_path_lock; struct socket *t_sock; @@ -60,7 +60,7 @@ extern struct rds_transport rds_tcp_transport; void rds_tcp_accept_work(struct sock *sk); /* tcp_connect.c */ -int rds_tcp_conn_connect(struct rds_connection *conn); +int rds_tcp_conn_path_connect(struct rds_conn_path *cp); void rds_tcp_conn_path_shutdown(struct rds_conn_path *conn); void rds_tcp_state_change(struct sock *sk); diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index 7eddce50e7a3..c916715fbe61 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -74,17 +74,17 @@ out: state_change(sk); } -int rds_tcp_conn_connect(struct rds_connection *conn) +int rds_tcp_conn_path_connect(struct rds_conn_path *cp) { struct socket *sock = NULL; struct sockaddr_in src, dest; int ret; - struct rds_tcp_connection *tc = conn->c_transport_data; - struct rds_conn_path *cp = &conn->c_path[0]; + struct rds_connection *conn = cp->cp_conn; + struct rds_tcp_connection *tc = cp->cp_transport_data; mutex_lock(&tc->t_conn_path_lock); - if (rds_conn_up(conn)) { + if (rds_conn_path_up(cp)) { mutex_unlock(&tc->t_conn_path_lock); return 0; } @@ -118,6 +118,7 @@ int rds_tcp_conn_connect(struct rds_connection *conn) ret = sock->ops->connect(sock, (struct sockaddr *)&dest, sizeof(dest), O_NONBLOCK); + cp->cp_outgoing = 1; rdsdebug("connect to address %pI4 returned %d\n", &conn->c_faddr, ret); if (ret == -EINPROGRESS) ret = 0; @@ -125,7 +126,7 @@ int rds_tcp_conn_connect(struct rds_connection *conn) rds_tcp_keepalive(sock); sock = NULL; } else { - rds_tcp_restore_callbacks(sock, conn->c_transport_data); + rds_tcp_restore_callbacks(sock, cp->cp_transport_data); } out: diff --git a/net/rds/threads.c b/net/rds/threads.c index f717b69e03f9..e8f0941f0548 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -152,8 +152,9 @@ void rds_connect_worker(struct work_struct *work) int ret; clear_bit(RDS_RECONNECT_PENDING, &cp->cp_flags); - if (rds_conn_path_transition(cp, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) { - ret = conn->c_trans->conn_connect(conn); + ret = rds_conn_path_transition(cp, RDS_CONN_DOWN, RDS_CONN_CONNECTING); + if (ret) { + ret = conn->c_trans->conn_path_connect(cp); rdsdebug("conn %p for %pI4 to %pI4 dispatched, ret %d\n", conn, &conn->c_laddr, &conn->c_faddr, ret); -- cgit v1.2.3 From 8315011ad67670691545ed394968435f0a0bb29e Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Thu, 30 Jun 2016 16:11:17 -0700 Subject: RDS: TCP: Simplify reconnect to avoid duelling reconnnect attempts When reconnecting, the peer with the smaller IP address will initiate the reconnect, to avoid needless duelling SYN issues. Acked-by: Santosh Shilimkar Signed-off-by: Sowmini Varadhan Signed-off-by: David S. Miller --- net/rds/connection.c | 4 +--- net/rds/threads.c | 5 +++++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/net/rds/connection.c b/net/rds/connection.c index 1b0c2a783b5e..19a4fee5f4dd 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -355,9 +355,7 @@ void rds_conn_shutdown(struct rds_conn_path *cp) rcu_read_lock(); if (!hlist_unhashed(&conn->c_hash_node)) { rcu_read_unlock(); - if (conn->c_trans->t_type != RDS_TRANS_TCP || - cp->cp_outgoing == 1) - rds_queue_reconnect(cp); + rds_queue_reconnect(cp); } else { rcu_read_unlock(); } diff --git a/net/rds/threads.c b/net/rds/threads.c index e8f0941f0548..bc97d67f29cc 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -125,6 +125,11 @@ void rds_queue_reconnect(struct rds_conn_path *cp) conn, &conn->c_laddr, &conn->c_faddr, cp->cp_reconnect_jiffies); + /* let peer with smaller addr initiate reconnect, to avoid duels */ + if (conn->c_trans->t_type == RDS_TRANS_TCP && + conn->c_laddr > conn->c_faddr) + return; + set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags); if (cp->cp_reconnect_jiffies == 0) { cp->cp_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies; -- cgit v1.2.3 From 11bb62f7c05240a933dd2e6b3bf3871d99464524 Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Thu, 30 Jun 2016 16:11:18 -0700 Subject: RDS: Do not send a pong to an incoming ping with 0 src port RDS ping messages are sent with a non-zero src port to a zero dst port, so that the rds pong messages can be sent back to the originators src port. However if a confused/malicious sender sends a ping with a 0 src port, we'd have an infinite ping-pong loop. To avoid this, the receiver should ignore ping messages with a 0 src port. Acked-by: Santosh Shilimkar Signed-off-by: Sowmini Varadhan Signed-off-by: David S. Miller --- net/rds/recv.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/rds/recv.c b/net/rds/recv.c index b58f50571782..fed53a6c2890 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -226,6 +226,10 @@ void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr, cp->cp_next_rx_seq = be64_to_cpu(inc->i_hdr.h_sequence) + 1; if (rds_sysctl_ping_enable && inc->i_hdr.h_dport == 0) { + if (inc->i_hdr.h_sport == 0) { + rdsdebug("ignore ping with 0 sport from 0x%x\n", saddr); + goto out; + } rds_stats_inc(s_recv_ping); rds_send_pong(cp, inc->i_hdr.h_sport); goto out; -- cgit v1.2.3