From ed43594aede9719e56eca72fc6a9a200c60b60e6 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Tue, 8 Aug 2017 22:23:56 +0200 Subject: tipc: remove premature ESTABLISH FSM event at link synchronization When a link between two nodes come up, both endpoints will initially send out a STATE message to the peer, to increase the probability that the peer endpoint also is up when the first traffic message arrives. Thereafter, if the establishing link is the second link between two nodes, this first "traffic" message is a TUNNEL_PROTOCOL/SYNCH message, helping the peer to perform initial synchronization between the two links. However, the initial STATE message may be lost, in which case the SYNCH message will be the first one arriving at the peer. This should also work, as the SYNCH message itself will be used to take up the link endpoint before initializing synchronization. Unfortunately the code for this case is broken. Currently, the link is brought up through a tipc_link_fsm_evt(ESTABLISHED) when a SYNCH arrives, whereupon __tipc_node_link_up() is called to distribute the link slots and take the link into traffic. But, __tipc_node_link_up() is itself starting with a test for whether the link is up, and if true, returns without action. Clearly, the tipc_link_fsm_evt(ESTABLISHED) call is unnecessary, since tipc_node_link_up() is itself issuing such an event, but also harmful, since it inhibits tipc_node_link_up() to perform the test of its tasks, and the link endpoint in question hence is never taken into traffic. This problem has been exposed when we set up dual links between pre- and post-4.4 kernels, because the former ones don't send out the initial STATE message described above. We fix this by removing the unnecessary event call. Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/node.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index aeef8011ac7d..9b4dcb6a16b5 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1455,10 +1455,8 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, /* Initiate synch mode if applicable */ if ((usr == TUNNEL_PROTOCOL) && (mtyp == SYNCH_MSG) && (oseqno == 1)) { syncpt = iseqno + exp_pkts - 1; - if (!tipc_link_is_up(l)) { - tipc_link_fsm_evt(l, LINK_ESTABLISH_EVT); + if (!tipc_link_is_up(l)) __tipc_node_link_up(n, bearer_id, xmitq); - } if (n->state == SELF_UP_PEER_UP) { n->sync_point = syncpt; tipc_link_fsm_evt(l, LINK_SYNCH_BEGIN_EVT); -- cgit v1.2.3 From 50ddfbafcd1e1f95185dfaa4ad794f742b0beaf8 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 8 Aug 2017 14:45:09 -0700 Subject: net: systemport: Fix software statistics for SYSTEMPORT Lite With SYSTEMPORT Lite we have holes in our statistics layout that make us skip over the hardware MIB counters, bcm_sysport_get_stats() was not taking that into account resulting in reporting 0 for all SW-maintained statistics, fix this by skipping accordingly. Fixes: 44a4524c54af ("net: systemport: Add support for SYSTEMPORT Lite") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcmsysport.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 5333601f855f..dc3052751bc1 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -449,6 +449,10 @@ static void bcm_sysport_get_stats(struct net_device *dev, p = (char *)&dev->stats; else p = (char *)priv; + + if (priv->is_lite && !bcm_sysport_lite_stat_valid(s->type)) + continue; + p += s->stat_offset; data[j] = *(unsigned long *)p; j++; -- cgit v1.2.3 From 04db70d9fe7019d96118158fbaaccb4959ba2bd4 Mon Sep 17 00:00:00 2001 From: Girish Moodalbail Date: Tue, 8 Aug 2017 17:26:24 -0700 Subject: geneve: maximum value of VNI cannot be used Geneve's Virtual Network Identifier (VNI) is 24 bit long, so the range of values for it would be from 0 to 16777215 (2^24 -1). However, one cannot create a geneve device with VNI set to 16777215. This patch fixes this issue. Signed-off-by: Girish Moodalbail Signed-off-by: David S. Miller --- drivers/net/geneve.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index de8156c6b292..2bbda71818ad 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1091,7 +1091,7 @@ static int geneve_validate(struct nlattr *tb[], struct nlattr *data[], if (data[IFLA_GENEVE_ID]) { __u32 vni = nla_get_u32(data[IFLA_GENEVE_ID]); - if (vni >= GENEVE_VID_MASK) + if (vni >= GENEVE_N_VID) return -ERANGE; } -- cgit v1.2.3 From 1714020e42b17135032c8606f7185b3fb2ba5d78 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 9 Aug 2017 14:38:04 +0300 Subject: igmp: Fix regression caused by igmp sysctl namespace code. Commit dcd87999d415 ("igmp: net: Move igmp namespace init to correct file") moved the igmp sysctls initialization from tcp_sk_init to igmp_net_init. This function is only called as part of per-namespace initialization, only if CONFIG_IP_MULTICAST is defined, otherwise igmp_mc_init() call in ip_init is compiled out, casuing the igmp pernet ops to not be registerd and those sysctl being left initialized with 0. However, there are certain functions, such as ip_mc_join_group which are always compiled and make use of some of those sysctls. Let's do a partial revert of the aforementioned commit and move the sysctl initialization into inet_init_net, that way they will always have sane values. Fixes: dcd87999d415 ("igmp: net: Move igmp namespace init to correct file") Link: https://bugzilla.kernel.org/show_bug.cgi?id=196595 Reported-by: Gerardo Exequiel Pozzi Signed-off-by: Nikolay Borisov Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 7 +++++++ net/ipv4/igmp.c | 6 ------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 76c2077c3f5b..2e548eca3489 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1731,6 +1731,13 @@ static __net_init int inet_init_net(struct net *net) net->ipv4.sysctl_ip_prot_sock = PROT_SOCK; #endif + /* Some igmp sysctl, whose values are always used */ + net->ipv4.sysctl_igmp_max_memberships = 20; + net->ipv4.sysctl_igmp_max_msf = 10; + /* IGMP reports for link-local multicast groups are enabled by default */ + net->ipv4.sysctl_igmp_llm_reports = 1; + net->ipv4.sysctl_igmp_qrv = 2; + return 0; } diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 28f14afd0dd3..498706b072fb 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2974,12 +2974,6 @@ static int __net_init igmp_net_init(struct net *net) goto out_sock; } - /* Sysctl initialization */ - net->ipv4.sysctl_igmp_max_memberships = 20; - net->ipv4.sysctl_igmp_max_msf = 10; - /* IGMP reports for link-local multicast groups are enabled by default */ - net->ipv4.sysctl_igmp_llm_reports = 1; - net->ipv4.sysctl_igmp_qrv = 2; return 0; out_sock: -- cgit v1.2.3 From 96d9703050a0036a3360ec98bb41e107c90664fe Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 9 Aug 2017 18:15:19 +0800 Subject: net: sched: set xt_tgchk_param par.nft_compat as 0 in ipt_init_target Commit 55917a21d0cc ("netfilter: x_tables: add context to know if extension runs from nft_compat") introduced a member nft_compat to xt_tgchk_param structure. But it didn't set it's value for ipt_init_target. With unexpected value in par.nft_compat, it may return unexpected result in some target's checkentry. This patch is to set all it's fields as 0 and only initialize the non-zero fields in ipt_init_target. v1->v2: As Wang Cong's suggestion, fix it by setting all it's fields as 0 and only initializing the non-zero fields. Fixes: 55917a21d0cc ("netfilter: x_tables: add context to know if extension runs from nft_compat") Suggested-by: Cong Wang Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sched/act_ipt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 94ba5cfab860..d516ba8178b8 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -49,9 +49,9 @@ static int ipt_init_target(struct net *net, struct xt_entry_target *t, return PTR_ERR(target); t->u.kernel.target = target; + memset(&par, 0, sizeof(par)); par.net = net; par.table = table; - par.entryinfo = NULL; par.target = target; par.targinfo = t->data; par.hook_mask = hook; -- cgit v1.2.3 From 85f1bd9a7b5a79d5baa8bf44af19658f7bf77bfa Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 10 Aug 2017 12:29:19 -0400 Subject: udp: consistently apply ufo or fragmentation When iteratively building a UDP datagram with MSG_MORE and that datagram exceeds MTU, consistently choose UFO or fragmentation. Once skb_is_gso, always apply ufo. Conversely, once a datagram is split across multiple skbs, do not consider ufo. Sendpage already maintains the first invariant, only add the second. IPv6 does not have a sendpage implementation to modify. A gso skb must have a partial checksum, do not follow sk_no_check_tx in udp_send_skb. Found by syzkaller. Fixes: e89e9cf539a2 ("[IPv4/IPv6]: UFO Scatter-gather approach") Reported-by: Andrey Konovalov Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 8 +++++--- net/ipv4/udp.c | 2 +- net/ipv6/ip6_output.c | 7 ++++--- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 50c74cd890bc..e153c40c2436 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -965,11 +965,12 @@ static int __ip_append_data(struct sock *sk, csummode = CHECKSUM_PARTIAL; cork->length += length; - if ((((length + (skb ? skb->len : fragheaderlen)) > mtu) || - (skb && skb_is_gso(skb))) && + if ((skb && skb_is_gso(skb)) || + (((length + (skb ? skb->len : fragheaderlen)) > mtu) && + (skb_queue_len(queue) <= 1) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) && - (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) { + (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx)) { err = ip_ufo_append_data(sk, queue, getfrag, from, length, hh_len, fragheaderlen, transhdrlen, maxfraglen, flags); @@ -1288,6 +1289,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, return -EINVAL; if ((size + skb->len > mtu) && + (skb_queue_len(&sk->sk_write_queue) == 1) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO)) { if (skb->ip_summed != CHECKSUM_PARTIAL) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index e6276fa3750b..a7c804f73990 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -802,7 +802,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4) if (is_udplite) /* UDP-Lite */ csum = udplite_csum(skb); - else if (sk->sk_no_check_tx) { /* UDP csum disabled */ + else if (sk->sk_no_check_tx && !skb_is_gso(skb)) { /* UDP csum off */ skb->ip_summed = CHECKSUM_NONE; goto send; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 162efba0d0cd..2dfe50d8d609 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1381,11 +1381,12 @@ emsgsize: */ cork->length += length; - if ((((length + (skb ? skb->len : headersize)) > mtu) || - (skb && skb_is_gso(skb))) && + if ((skb && skb_is_gso(skb)) || + (((length + (skb ? skb->len : headersize)) > mtu) && + (skb_queue_len(queue) <= 1) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) && - (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) { + (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk))) { err = ip6_ufo_append_data(sk, queue, getfrag, from, length, hh_len, fragheaderlen, exthdrlen, transhdrlen, mtu, flags, fl6); -- cgit v1.2.3 From c27927e372f0785f3303e8fad94b85945e2c97b7 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 10 Aug 2017 12:41:58 -0400 Subject: packet: fix tp_reserve race in packet_set_ring Updates to tp_reserve can race with reads of the field in packet_set_ring. Avoid this by holding the socket lock during updates in setsockopt PACKET_RESERVE. This bug was discovered by syzkaller. Fixes: 8913336a7e8d ("packet: add PACKET_RESERVE sockopt") Reported-by: Andrey Konovalov Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/packet/af_packet.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 0615c2a950fa..008a45ca3112 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3700,14 +3700,19 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (optlen != sizeof(val)) return -EINVAL; - if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) - return -EBUSY; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; if (val > INT_MAX) return -EINVAL; - po->tp_reserve = val; - return 0; + lock_sock(sk); + if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { + ret = -EBUSY; + } else { + po->tp_reserve = val; + ret = 0; + } + release_sock(sk); + return ret; } case PACKET_LOSS: { -- cgit v1.2.3