From 31954cd8bb667030b1c0d3d77f28fe71f06999f9 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 25 Jan 2019 10:53:19 -0800 Subject: tcp: Refactor pingpong code Instead of using pingpong as a single bit information, we refactor the code to treat it as a counter. When interactive session is detected, we set pingpong count to TCP_PINGPONG_THRESH. And when pingpong count is >= TCP_PINGPONG_THRESH, we consider the session in pingpong mode. This patch is a pure refactor and sets foundation for the next patch. This patch itself does not change any pingpong logic. Signed-off-by: Wei Wang Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 17 +++++++++++++++++ net/dccp/input.c | 2 +- net/dccp/timer.c | 4 ++-- net/ipv4/tcp.c | 10 +++++----- net/ipv4/tcp_input.c | 8 ++++---- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/tcp_output.c | 4 ++-- net/ipv4/tcp_timer.c | 4 ++-- net/ipv6/tcp_ipv6.c | 2 +- 9 files changed, 35 insertions(+), 18 deletions(-) diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index fe0d9b44d6fc..179609d1d1ea 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -314,4 +314,21 @@ int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen); struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu); + +#define TCP_PINGPONG_THRESH 1 + +static inline void inet_csk_enter_pingpong_mode(struct sock *sk) +{ + inet_csk(sk)->icsk_ack.pingpong = TCP_PINGPONG_THRESH; +} + +static inline void inet_csk_exit_pingpong_mode(struct sock *sk) +{ + inet_csk(sk)->icsk_ack.pingpong = 0; +} + +static inline bool inet_csk_in_pingpong_mode(struct sock *sk) +{ + return inet_csk(sk)->icsk_ack.pingpong >= TCP_PINGPONG_THRESH; +} #endif /* _INET_CONNECTION_SOCK_H */ diff --git a/net/dccp/input.c b/net/dccp/input.c index 85d6c879383d..8d03707abdac 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -480,7 +480,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT); } - if (sk->sk_write_pending || icsk->icsk_ack.pingpong || + if (sk->sk_write_pending || inet_csk_in_pingpong_mode(sk) || icsk->icsk_accept_queue.rskq_defer_accept) { /* Save one ACK. Data will be ready after * several ticks, if write_pending is set. diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 1501a20a94ca..74e138495d67 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -199,7 +199,7 @@ static void dccp_delack_timer(struct timer_list *t) icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER; if (inet_csk_ack_scheduled(sk)) { - if (!icsk->icsk_ack.pingpong) { + if (!inet_csk_in_pingpong_mode(sk)) { /* Delayed ACK missed: inflate ATO. */ icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto); @@ -207,7 +207,7 @@ static void dccp_delack_timer(struct timer_list *t) /* Delayed ACK missed: leave pingpong mode and * deflate ATO. */ - icsk->icsk_ack.pingpong = 0; + inet_csk_exit_pingpong_mode(sk); icsk->icsk_ack.ato = TCP_ATO_MIN; } dccp_send_ack(sk); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 12ba21433dd0..6f8d292ad501 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1551,7 +1551,7 @@ static void tcp_cleanup_rbuf(struct sock *sk, int copied) (copied > 0 && ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED2) || ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) && - !icsk->icsk_ack.pingpong)) && + !inet_csk_in_pingpong_mode(sk))) && !atomic_read(&sk->sk_rmem_alloc))) time_to_ack = true; } @@ -2984,16 +2984,16 @@ static int do_tcp_setsockopt(struct sock *sk, int level, case TCP_QUICKACK: if (!val) { - icsk->icsk_ack.pingpong = 1; + inet_csk_enter_pingpong_mode(sk); } else { - icsk->icsk_ack.pingpong = 0; + inet_csk_exit_pingpong_mode(sk); if ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) && inet_csk_ack_scheduled(sk)) { icsk->icsk_ack.pending |= ICSK_ACK_PUSHED; tcp_cleanup_rbuf(sk, 1); if (!(val & 1)) - icsk->icsk_ack.pingpong = 1; + inet_csk_enter_pingpong_mode(sk); } } break; @@ -3407,7 +3407,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level, return 0; } case TCP_QUICKACK: - val = !icsk->icsk_ack.pingpong; + val = !inet_csk_in_pingpong_mode(sk); break; case TCP_CONGESTION: diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 76858b14ebe9..7a027dec649b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -221,7 +221,7 @@ void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks) struct inet_connection_sock *icsk = inet_csk(sk); tcp_incr_quickack(sk, max_quickacks); - icsk->icsk_ack.pingpong = 0; + inet_csk_exit_pingpong_mode(sk); icsk->icsk_ack.ato = TCP_ATO_MIN; } EXPORT_SYMBOL(tcp_enter_quickack_mode); @@ -236,7 +236,7 @@ static bool tcp_in_quickack_mode(struct sock *sk) const struct dst_entry *dst = __sk_dst_get(sk); return (dst && dst_metric(dst, RTAX_QUICKACK)) || - (icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong); + (icsk->icsk_ack.quick && !inet_csk_in_pingpong_mode(sk)); } static void tcp_ecn_queue_cwr(struct tcp_sock *tp) @@ -4094,7 +4094,7 @@ void tcp_fin(struct sock *sk) case TCP_ESTABLISHED: /* Move to CLOSE_WAIT */ tcp_set_state(sk, TCP_CLOSE_WAIT); - inet_csk(sk)->icsk_ack.pingpong = 1; + inet_csk_enter_pingpong_mode(sk); break; case TCP_CLOSE_WAIT: @@ -5889,7 +5889,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, return -1; if (sk->sk_write_pending || icsk->icsk_accept_queue.rskq_defer_accept || - icsk->icsk_ack.pingpong) { + inet_csk_in_pingpong_mode(sk)) { /* Save one ACK. Data will be ready after * several ticks, if write_pending is set. * diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index efc6fef692ff..662b034f1795 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2437,7 +2437,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) refcount_read(&sk->sk_refcnt), sk, jiffies_to_clock_t(icsk->icsk_rto), jiffies_to_clock_t(icsk->icsk_ack.ato), - (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, + (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sk), tp->snd_cwnd, state == TCP_LISTEN ? fastopenq->max_qlen : diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 26a2948dca95..06228e2d010e 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -171,7 +171,7 @@ static void tcp_event_data_sent(struct tcp_sock *tp, * packet, enter pingpong mode. */ if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato) - icsk->icsk_ack.pingpong = 1; + inet_csk_enter_pingpong_mode(sk); } /* Account for an ACK we sent. */ @@ -3569,7 +3569,7 @@ void tcp_send_delayed_ack(struct sock *sk) const struct tcp_sock *tp = tcp_sk(sk); int max_ato = HZ / 2; - if (icsk->icsk_ack.pingpong || + if (inet_csk_in_pingpong_mode(sk) || (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)) max_ato = TCP_DELACK_MAX; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index d7399a89469d..f0c86398e6a7 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -277,14 +277,14 @@ void tcp_delack_timer_handler(struct sock *sk) icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER; if (inet_csk_ack_scheduled(sk)) { - if (!icsk->icsk_ack.pingpong) { + if (!inet_csk_in_pingpong_mode(sk)) { /* Delayed ACK missed: inflate ATO. */ icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto); } else { /* Delayed ACK missed: leave pingpong mode and * deflate ATO. */ - icsk->icsk_ack.pingpong = 0; + inet_csk_exit_pingpong_mode(sk); icsk->icsk_ack.ato = TCP_ATO_MIN; } tcp_mstamp_refresh(tcp_sk(sk)); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b81eb7cb815e..e51cda79f0cc 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1864,7 +1864,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) refcount_read(&sp->sk_refcnt), sp, jiffies_to_clock_t(icsk->icsk_rto), jiffies_to_clock_t(icsk->icsk_ack.ato), - (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, + (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp), tp->snd_cwnd, state == TCP_LISTEN ? fastopenq->max_qlen : -- cgit v1.2.3 From 4a41f453bedfd5e9cd040bad509d9da49feb3e2c Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 25 Jan 2019 10:53:20 -0800 Subject: tcp: change pingpong threshold to 3 In order to be more confident about an on-going interactive session, we increment pingpong count by 1 for every interactive transaction and we adjust TCP_PINGPONG_THRESH to 3. This means, we only consider a session in pingpong mode after we see 3 interactive transactions, and start to activate delayed acks in quick ack mode. And in order to not over-count the credits, we only increase pingpong count for the first packet sent in response for the previous received packet. This is mainly to prevent delaying the ack immediately after some handshake protocol but no real interactive traffic pattern afterwards. Signed-off-by: Wei Wang Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 10 +++++++++- net/ipv4/tcp_output.c | 15 +++++++++------ 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 179609d1d1ea..ff40e1d08157 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -315,7 +315,7 @@ int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname, struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu); -#define TCP_PINGPONG_THRESH 1 +#define TCP_PINGPONG_THRESH 3 static inline void inet_csk_enter_pingpong_mode(struct sock *sk) { @@ -331,4 +331,12 @@ static inline bool inet_csk_in_pingpong_mode(struct sock *sk) { return inet_csk(sk)->icsk_ack.pingpong >= TCP_PINGPONG_THRESH; } + +static inline void inet_csk_inc_pingpong_cnt(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + if (icsk->icsk_ack.pingpong < U8_MAX) + icsk->icsk_ack.pingpong++; +} #endif /* _INET_CONNECTION_SOCK_H */ diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 06228e2d010e..96bdb8eae9bb 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -165,13 +165,16 @@ static void tcp_event_data_sent(struct tcp_sock *tp, if (tcp_packets_in_flight(tp) == 0) tcp_ca_event(sk, CA_EVENT_TX_START); - tp->lsndtime = now; - - /* If it is a reply for ato after last received - * packet, enter pingpong mode. + /* If this is the first data packet sent in response to the + * previous received data, + * and it is a reply for ato after last received packet, + * increase pingpong count. */ - if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato) - inet_csk_enter_pingpong_mode(sk); + if (before(tp->lsndtime, icsk->icsk_ack.lrcvtime) && + (u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato) + inet_csk_inc_pingpong_cnt(sk); + + tp->lsndtime = now; } /* Account for an ACK we sent. */ -- cgit v1.2.3