From 0ce294d88457bccd7f9991f883fec80022a1ddbd Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 7 Dec 2017 11:33:30 -0800 Subject: tcp: correctly test congestion state in RACK RACK does not test the loss recovery state correctly to compute the reordering window. It assumes if lost_out is zero then TCP is not in loss recovery. But it can be zero during recovery before calling tcp_rack_detect_loss(): when an ACK acknowledges all packets marked lost before receiving this ACK, but has not yet to discover new ones by tcp_rack_detect_loss(). The fix is to simply test the congestion state directly. Signed-off-by: Yuchung Cheng Reviewed-by: Neal Cardwell Reviewed-by: Priyaranjan Jha Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_recovery.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index d3ea89020c69..3143664902e9 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -55,7 +55,8 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout) * to queuing or delayed ACKs. */ reo_wnd = 1000; - if ((tp->rack.reord || !tp->lost_out) && min_rtt != ~0U) { + if ((tp->rack.reord || inet_csk(sk)->icsk_ca_state < TCP_CA_Recovery) && + min_rtt != ~0U) { reo_wnd = max((min_rtt >> 2) * tp->rack.reo_wnd_steps, reo_wnd); reo_wnd = min(reo_wnd, tp->srtt_us >> 3); } -- cgit v1.2.3 From cd1fc85b4399d47e3d6626301741ba8c38cd475a Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 7 Dec 2017 11:33:31 -0800 Subject: tcp: always evaluate losses in RACK upon undo When sender detects spurious retransmission, all packets marked lost are remarked to be in-flight. However some may be considered lost based on its timestamps in RACK. This patch forces RACK to re-evaluate, which may be skipped previously if the ACK does not advance RACK timestamp. Signed-off-by: Yuchung Cheng Reviewed-by: Neal Cardwell Reviewed-by: Priyaranjan Jha Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 075c559570e6..9550cc42de2d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2329,6 +2329,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss) } tp->snd_cwnd_stamp = tcp_jiffies32; tp->undo_marker = 0; + tp->rack.advanced = 1; /* Force RACK to re-exam losses */ } static inline bool tcp_may_undo(const struct tcp_sock *tp) -- cgit v1.2.3 From 428aec5e69fa17d223e1495f395833c50770f7ae Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 7 Dec 2017 11:33:32 -0800 Subject: tcp: fix off-by-one bug in RACK RACK should mark a packet lost when remaining wait time is zero. Signed-off-by: Yuchung Cheng Reviewed-by: Neal Cardwell Reviewed-by: Priyaranjan Jha Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_recovery.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index 3143664902e9..0c182303e62e 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -80,12 +80,12 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout) */ remaining = tp->rack.rtt_us + reo_wnd - tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp); - if (remaining < 0) { + if (remaining <= 0) { tcp_rack_mark_skb_lost(sk, skb); list_del_init(&skb->tcp_tsorted_anchor); } else { - /* Record maximum wait time (+1 to avoid 0) */ - *reo_timeout = max_t(u32, *reo_timeout, 1 + remaining); + /* Record maximum wait time */ + *reo_timeout = max_t(u32, *reo_timeout, remaining); } } } -- cgit v1.2.3 From 6065fd0d179b96ddc488c76542349bcb148a95fd Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 7 Dec 2017 11:33:33 -0800 Subject: tcp: evaluate packet losses upon RTT change RACK skips an ACK unless it advances the most recently delivered TX timestamp (rack.mstamp). Since RACK also uses the most recent RTT to decide if a packet is lost, RACK should still run the loss detection whenever the most recent RTT changes. For example, an ACK that does not advance the timestamp but triggers the cwnd undo due to reordering, would then use the most recent (higher) RTT measurement to detect further losses. Signed-off-by: Yuchung Cheng Reviewed-by: Neal Cardwell Reviewed-by: Priyaranjan Jha Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_recovery.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index 0c182303e62e..3a81720ac0c4 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -117,13 +117,8 @@ void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq, { u32 rtt_us; - if (tp->rack.mstamp && - !tcp_rack_sent_after(xmit_time, tp->rack.mstamp, - end_seq, tp->rack.end_seq)) - return; - rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, xmit_time); - if (sacked & TCPCB_RETRANS) { + if (rtt_us < tcp_min_rtt(tp) && (sacked & TCPCB_RETRANS)) { /* If the sacked packet was retransmitted, it's ambiguous * whether the retransmission or the original (or the prior * retransmission) was sacked. @@ -134,13 +129,15 @@ void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq, * so it's at least one RTT (i.e., retransmission is at least * an RTT later). */ - if (rtt_us < tcp_min_rtt(tp)) - return; + return; } - tp->rack.rtt_us = rtt_us; - tp->rack.mstamp = xmit_time; - tp->rack.end_seq = end_seq; tp->rack.advanced = 1; + tp->rack.rtt_us = rtt_us; + if (tcp_rack_sent_after(xmit_time, tp->rack.mstamp, + end_seq, tp->rack.end_seq)) { + tp->rack.mstamp = xmit_time; + tp->rack.end_seq = end_seq; + } } /* We have waited long enough to accommodate reordering. Mark the expired -- cgit v1.2.3