summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2020-12-09 19:31:58 -0800
committerDavid S. Miller <davem@davemloft.net>2020-12-09 19:31:58 -0800
commit0f86a5be10956aaa77f92d9c25ec508c5495e8af (patch)
tree4835210f79ccb092abb8aff17daecae4f21acd38
parent7bdddc68cda3e0c9ef7711238206881cf016bdbb (diff)
parentd7b1bfd0832c1d005f571203306b6c50e9805150 (diff)
Merge branch 'mptcp-fixes'
Paolo Abeni says: ==================== mptcp: a bunch of fixes This series includes a few fixes following-up the recent code refactor for the MPTCP RX and TX paths. Boundling them together, since the fixes are somewhat related. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/mptcp/options.c7
-rw-r--r--net/mptcp/pm.c8
-rw-r--r--net/mptcp/protocol.c23
-rw-r--r--net/mptcp/protocol.h1
-rw-r--r--net/mptcp/subflow.c19
5 files changed, 48 insertions, 10 deletions
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index d1b4c5d208a9..1ca60d9da3ef 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -802,7 +802,12 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
mptcp_subflow_fully_established(subflow, mp_opt);
fully_established:
- if (likely(subflow->pm_notified))
+ /* if the subflow is not already linked into the conn_list, we can't
+ * notify the PM: this subflow is still on the listener queue
+ * and the PM possibly acquiring the subflow lock could race with
+ * the listener close
+ */
+ if (likely(subflow->pm_notified) || list_empty(&subflow->node))
return true;
subflow->pm_notified = 1;
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 9256bd5d02ed..da2ed576f289 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -126,8 +126,14 @@ void mptcp_pm_fully_established(struct mptcp_sock *msk)
spin_lock_bh(&pm->lock);
- if (READ_ONCE(pm->work_pending))
+ /* mptcp_pm_fully_established() can be invoked by multiple
+ * racing paths - accept() and check_fully_established()
+ * be sure to serve this event only once.
+ */
+ if (READ_ONCE(pm->work_pending) &&
+ !(msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)))
mptcp_pm_schedule_work(msk, MPTCP_PM_ESTABLISHED);
+ msk->pm.status |= BIT(MPTCP_PM_ALREADY_ESTABLISHED);
spin_unlock_bh(&pm->lock);
}
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 57213ff60f78..2540d82742ac 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -701,6 +701,13 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
int sk_rbuf, ssk_rbuf;
bool wake;
+ /* The peer can send data while we are shutting down this
+ * subflow at msk destruction time, but we must avoid enqueuing
+ * more data to the msk receive queue
+ */
+ if (unlikely(subflow->disposable))
+ return;
+
/* move_skbs_to_msk below can legitly clear the data_avail flag,
* but we will need later to properly woke the reader, cache its
* value
@@ -2119,6 +2126,8 @@ void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
sock_orphan(ssk);
}
+ subflow->disposable = 1;
+
/* if ssk hit tcp_done(), tcp_cleanup_ulp() cleared the related ops
* the ssk has been already destroyed, we just need to release the
* reference owned by msk;
@@ -2126,8 +2135,7 @@ void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
if (!inet_csk(ssk)->icsk_ulp_ops) {
kfree_rcu(subflow, rcu);
} else {
- /* otherwise ask tcp do dispose of ssk and subflow ctx */
- subflow->disposable = 1;
+ /* otherwise tcp will dispose of the ssk and subflow ctx */
__tcp_close(ssk, 0);
/* close acquired an extra ref */
@@ -3208,6 +3216,17 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
bool slowpath;
slowpath = lock_sock_fast(newsk);
+
+ /* PM/worker can now acquire the first subflow socket
+ * lock without racing with listener queue cleanup,
+ * we can notify it, if needed.
+ */
+ subflow = mptcp_subflow_ctx(msk->first);
+ list_add(&subflow->node, &msk->conn_list);
+ sock_hold(msk->first);
+ if (mptcp_is_fully_established(newsk))
+ mptcp_pm_fully_established(msk);
+
mptcp_copy_inaddrs(newsk, msk->first);
mptcp_rcv_space_init(msk, msk->first);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 9ff6fd486db6..f6c3c686a34a 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -165,6 +165,7 @@ enum mptcp_pm_status {
MPTCP_PM_ADD_ADDR_SEND_ACK,
MPTCP_PM_RM_ADDR_RECEIVED,
MPTCP_PM_ESTABLISHED,
+ MPTCP_PM_ALREADY_ESTABLISHED, /* persistent status, set after ESTABLISHED event */
MPTCP_PM_SUBFLOW_ESTABLISHED,
};
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 5f5815a1665f..fefcaf497938 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -614,8 +614,9 @@ create_child:
*/
inet_sk_state_store((void *)new_msk, TCP_ESTABLISHED);
- /* link the newly created socket to the msk */
- mptcp_add_pending_subflow(mptcp_sk(new_msk), ctx);
+ /* record the newly created socket as the first msk
+ * subflow, but don't link it yet into conn_list
+ */
WRITE_ONCE(mptcp_sk(new_msk)->first, child);
/* new mpc subflow takes ownership of the newly
@@ -1148,13 +1149,18 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
subflow->request_bkup = !!(loc->flags & MPTCP_PM_ADDR_FLAG_BACKUP);
mptcp_info2sockaddr(remote, &addr);
+ mptcp_add_pending_subflow(msk, subflow);
err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK);
if (err && err != -EINPROGRESS)
- goto failed;
+ goto failed_unlink;
- mptcp_add_pending_subflow(msk, subflow);
return err;
+failed_unlink:
+ spin_lock_bh(&msk->join_list_lock);
+ list_del(&subflow->node);
+ spin_unlock_bh(&msk->join_list_lock);
+
failed:
subflow->disposable = 1;
sock_release(sf);
@@ -1333,9 +1339,10 @@ static void subflow_ulp_release(struct sock *ssk)
sk = ctx->conn;
if (sk) {
/* if the msk has been orphaned, keep the ctx
- * alive, will be freed by mptcp_done()
+ * alive, will be freed by __mptcp_close_ssk(),
+ * when the subflow is still unaccepted
*/
- release = ctx->disposable;
+ release = ctx->disposable || list_empty(&ctx->node);
sock_put(sk);
}