From 955d3411a17f590364238bd0d3329b61f20c1cd2 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 30 Dec 2018 12:46:01 +0100 Subject: batman-adv: Avoid WARN on net_device without parent in netns It is not allowed to use WARN* helpers on potential incorrect input from the user or transient problems because systems configured as panic_on_warn will reboot due to such a problem. A NULL return value of __dev_get_by_index can be caused by various problems which can either be related to the system configuration or problems (incorrectly returned network namespaces) in other (virtual) net_device drivers. batman-adv should not cause a (harmful) WARN in this situation and instead only report it via a simple message. Fixes: b7eddd0b3950 ("batman-adv: prevent using any virtual device created on batman-adv as hard-interface") Reported-by: syzbot+c764de0fcfadca9a8595@syzkaller.appspotmail.com Reported-by: Dmitry Vyukov Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/hard-interface.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 508f4416dfc9..415d494cbe22 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -20,7 +20,6 @@ #include "main.h" #include -#include #include #include #include @@ -179,8 +178,10 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev) parent_dev = __dev_get_by_index((struct net *)parent_net, dev_get_iflink(net_dev)); /* if we got a NULL parent_dev there is something broken.. */ - if (WARN(!parent_dev, "Cannot find parent device")) + if (!parent_dev) { + pr_err("Cannot find parent device\n"); return false; + } if (batadv_mutual_parents(net_dev, net, parent_dev, parent_net)) return false; -- cgit v1.2.3 From 9114daa825fc3f335f9bea3313ce667090187280 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 31 Dec 2018 22:31:01 +0100 Subject: batman-adv: Force mac header to start of data on xmit The caller of ndo_start_xmit may not already have called skb_reset_mac_header. The returned value of skb_mac_header/eth_hdr therefore can be in the wrong position and even outside the current skbuff. This for example happens when the user binds to the device using a PF_PACKET-SOCK_RAW with enabled qdisc-bypass: int opt = 4; setsockopt(sock, SOL_PACKET, PACKET_QDISC_BYPASS, &opt, sizeof(opt)); Since eth_hdr is used all over the codebase, the batadv_interface_tx function must always take care of resetting it. Fixes: c6c8fea29769 ("net: Add batman-adv meshing protocol") Reported-by: syzbot+9d7405c7faa390e60b4e@syzkaller.appspotmail.com Reported-by: syzbot+7d20bc3f1ddddc0f9079@syzkaller.appspotmail.com Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/soft-interface.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 5db5a0a4c959..b85ca809e509 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -221,6 +221,8 @@ static netdev_tx_t batadv_interface_tx(struct sk_buff *skb, netif_trans_update(soft_iface); vid = batadv_get_vid(skb, 0); + + skb_reset_mac_header(skb); ethhdr = eth_hdr(skb); switch (ntohs(ethhdr->h_proto)) { -- cgit v1.2.3 From c9e4576743eeda8d24dedc164d65b78877f9a98c Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Wed, 23 Jan 2019 12:37:19 +0800 Subject: bpf: sock recvbuff must be limited by rmem_max in bpf_setsockopt() When sock recvbuff is set by bpf_setsockopt(), the value must by limited by rmem_max. It is the same with sendbuff. Fixes: 8c4b4c7e9ff0 ("bpf: Add setsockopt helper function to bpf") Signed-off-by: Yafang Shao Acked-by: Martin KaFai Lau Acked-by: Lawrence Brakmo Signed-off-by: Daniel Borkmann --- net/core/filter.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index 7559d6835ecb..7a54dc11ac2d 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4112,10 +4112,12 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, /* Only some socketops are supported */ switch (optname) { case SO_RCVBUF: + val = min_t(u32, val, sysctl_rmem_max); sk->sk_userlocks |= SOCK_RCVBUF_LOCK; sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF); break; case SO_SNDBUF: + val = min_t(u32, val, sysctl_wmem_max); sk->sk_userlocks |= SOCK_SNDBUF_LOCK; sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF); break; -- cgit v1.2.3 From 7d652669b61d702c6e62a39579d17f6881670ab6 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 25 Jan 2019 08:21:26 +0100 Subject: batman-adv: release station info tidstats With the addition of TXQ stats in the per-tid statistics the struct station_info grew significantly. This resulted in stack size warnings due to the structure itself being above the limit for the warnings. To work around this, the TID array was allocated dynamically. Also a function to free this content was introduced with commit 7ea3e110f2f8 ("cfg80211: release station info tidstats where needed") but the necessary changes were not provided for batman-adv's B.A.T.M.A.N. V implementation. Signed-off-by: Felix Fietkau Fixes: 8689c051a201 ("cfg80211: dynamically allocate per-tid stats for station info") [sven@narfation.org: add commit message] Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/bat_v_elp.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index e8090f099eb8..ef0dec20c7d8 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -104,6 +104,9 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh) ret = cfg80211_get_station(real_netdev, neigh->addr, &sinfo); + /* free the TID stats immediately */ + cfg80211_sinfo_release_content(&sinfo); + dev_put(real_netdev); if (ret == -ENOENT) { /* Node is not associated anymore! It would be -- cgit v1.2.3 From 1d79895aef18fa05789995d86d523c9b2ee58a02 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Mon, 28 Jan 2019 10:13:35 +0100 Subject: sk_msg: Always cancel strp work before freeing the psock Despite having stopped the parser, we still need to deinitialize it by calling strp_done so that it cancels its work. Otherwise the worker thread can run after we have freed the parser, and attempt to access its workqueue resulting in a use-after-free: ================================================================== BUG: KASAN: use-after-free in pwq_activate_delayed_work+0x1b/0x1d0 Read of size 8 at addr ffff888069975240 by task kworker/u2:2/93 CPU: 0 PID: 93 Comm: kworker/u2:2 Not tainted 5.0.0-rc2-00335-g28f9d1a3d4fe-dirty #14 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-2.fc27 04/01/2014 Workqueue: (null) (kstrp) Call Trace: print_address_description+0x6e/0x2b0 ? pwq_activate_delayed_work+0x1b/0x1d0 kasan_report+0xfd/0x177 ? pwq_activate_delayed_work+0x1b/0x1d0 ? pwq_activate_delayed_work+0x1b/0x1d0 pwq_activate_delayed_work+0x1b/0x1d0 ? process_one_work+0x4aa/0x660 pwq_dec_nr_in_flight+0x9b/0x100 worker_thread+0x82/0x680 ? process_one_work+0x660/0x660 kthread+0x1b9/0x1e0 ? __kthread_create_on_node+0x250/0x250 ret_from_fork+0x1f/0x30 Allocated by task 111: sk_psock_init+0x3c/0x1b0 sock_map_link.isra.2+0x103/0x4b0 sock_map_update_common+0x94/0x270 sock_map_update_elem+0x145/0x160 __se_sys_bpf+0x152e/0x1e10 do_syscall_64+0xb2/0x3e0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Freed by task 112: kfree+0x7f/0x140 process_one_work+0x40b/0x660 worker_thread+0x82/0x680 kthread+0x1b9/0x1e0 ret_from_fork+0x1f/0x30 The buggy address belongs to the object at ffff888069975180 which belongs to the cache kmalloc-512 of size 512 The buggy address is located 192 bytes inside of 512-byte region [ffff888069975180, ffff888069975380) The buggy address belongs to the page: page:ffffea0001a65d00 count:1 mapcount:0 mapping:ffff88806d401280 index:0x0 compound_mapcount: 0 flags: 0x4000000000010200(slab|head) raw: 4000000000010200 dead000000000100 dead000000000200 ffff88806d401280 raw: 0000000000000000 00000000800c000c 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff888069975100: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff888069975180: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff888069975200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff888069975280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff888069975300: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Reported-by: Marek Majkowski Signed-off-by: Jakub Sitnicki Link: https://lore.kernel.org/netdev/CAJPywTLwgXNEZ2dZVoa=udiZmtrWJ0q5SuBW64aYs0Y1khXX3A@mail.gmail.com Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- net/core/skmsg.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/skmsg.c b/net/core/skmsg.c index d6d5c20d7044..8c826603bf36 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -545,8 +545,7 @@ static void sk_psock_destroy_deferred(struct work_struct *gc) struct sk_psock *psock = container_of(gc, struct sk_psock, gc); /* No sk_callback_lock since already detached. */ - if (psock->parser.enabled) - strp_done(&psock->parser.strp); + strp_done(&psock->parser.strp); cancel_work_sync(&psock->work); -- cgit v1.2.3 From 63ff03ab786ab1bc6cca01d48eacd22c95b9b3eb Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 25 Jan 2019 22:43:17 +0100 Subject: Revert "socket: fix struct ifreq size in compat ioctl" This reverts commit 1cebf8f143c2 ("socket: fix struct ifreq size in compat ioctl"), it's a bugfix for another commit that I'll revert next. This is not a 'perfect' revert, I'm keeping some coding style intact rather than revert to the state with indentation errors. Cc: stable@vger.kernel.org Fixes: 1cebf8f143c2 ("socket: fix struct ifreq size in compat ioctl") Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- net/socket.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index e89884e2197b..63b53af7379b 100644 --- a/net/socket.c +++ b/net/socket.c @@ -941,8 +941,7 @@ void dlci_ioctl_set(int (*hook) (unsigned int, void __user *)) EXPORT_SYMBOL(dlci_ioctl_set); static long sock_do_ioctl(struct net *net, struct socket *sock, - unsigned int cmd, unsigned long arg, - unsigned int ifreq_size) + unsigned int cmd, unsigned long arg) { int err; void __user *argp = (void __user *)arg; @@ -968,11 +967,11 @@ static long sock_do_ioctl(struct net *net, struct socket *sock, } else { struct ifreq ifr; bool need_copyout; - if (copy_from_user(&ifr, argp, ifreq_size)) + if (copy_from_user(&ifr, argp, sizeof(struct ifreq))) return -EFAULT; err = dev_ioctl(net, cmd, &ifr, &need_copyout); if (!err && need_copyout) - if (copy_to_user(argp, &ifr, ifreq_size)) + if (copy_to_user(argp, &ifr, sizeof(struct ifreq))) return -EFAULT; } return err; @@ -1071,8 +1070,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) err = open_related_ns(&net->ns, get_net_ns); break; default: - err = sock_do_ioctl(net, sock, cmd, arg, - sizeof(struct ifreq)); + err = sock_do_ioctl(net, sock, cmd, arg); break; } return err; @@ -2780,8 +2778,7 @@ static int do_siocgstamp(struct net *net, struct socket *sock, int err; set_fs(KERNEL_DS); - err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv, - sizeof(struct compat_ifreq)); + err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv); set_fs(old_fs); if (!err) err = compat_put_timeval(&ktv, up); @@ -2797,8 +2794,7 @@ static int do_siocgstampns(struct net *net, struct socket *sock, int err; set_fs(KERNEL_DS); - err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts, - sizeof(struct compat_ifreq)); + err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts); set_fs(old_fs); if (!err) err = compat_put_timespec(&kts, up); @@ -3109,8 +3105,7 @@ static int routing_ioctl(struct net *net, struct socket *sock, } set_fs(KERNEL_DS); - ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r, - sizeof(struct compat_ifreq)); + ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r); set_fs(old_fs); out: @@ -3223,8 +3218,7 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCBONDSETHWADDR: case SIOCBONDCHANGEACTIVE: case SIOCGIFNAME: - return sock_do_ioctl(net, sock, cmd, arg, - sizeof(struct compat_ifreq)); + return sock_do_ioctl(net, sock, cmd, arg); } return -ENOIOCTLCMD; -- cgit v1.2.3 From 37ac39bdddc528c998a9f36db36937de923fdf2a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 25 Jan 2019 22:43:18 +0100 Subject: Revert "kill dev_ifsioc()" This reverts commit bf4405737f9f ("kill dev_ifsioc()"). This wasn't really unused as implied by the original commit, it still handles the copy to/from user differently, and the commit thus caused issues such as https://bugzilla.kernel.org/show_bug.cgi?id=199469 and https://bugzilla.kernel.org/show_bug.cgi?id=202273 However, deviating from a strict revert, rename dev_ifsioc() to compat_ifreq_ioctl() to be clearer as to its purpose and add a comment. Cc: stable@vger.kernel.org Fixes: bf4405737f9f ("kill dev_ifsioc()") Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- net/socket.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index 63b53af7379b..fbf80f9fb057 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2990,6 +2990,53 @@ static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd, return dev_ioctl(net, cmd, &ifreq, NULL); } +static int compat_ifreq_ioctl(struct net *net, struct socket *sock, + unsigned int cmd, + struct compat_ifreq __user *uifr32) +{ + struct ifreq __user *uifr; + int err; + + /* Handle the fact that while struct ifreq has the same *layout* on + * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data, + * which are handled elsewhere, it still has different *size* due to + * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit, + * resulting in struct ifreq being 32 and 40 bytes respectively). + * As a result, if the struct happens to be at the end of a page and + * the next page isn't readable/writable, we get a fault. To prevent + * that, copy back and forth to the full size. + */ + + uifr = compat_alloc_user_space(sizeof(*uifr)); + if (copy_in_user(uifr, uifr32, sizeof(*uifr32))) + return -EFAULT; + + err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr); + + if (!err) { + switch (cmd) { + case SIOCGIFFLAGS: + case SIOCGIFMETRIC: + case SIOCGIFMTU: + case SIOCGIFMEM: + case SIOCGIFHWADDR: + case SIOCGIFINDEX: + case SIOCGIFADDR: + case SIOCGIFBRDADDR: + case SIOCGIFDSTADDR: + case SIOCGIFNETMASK: + case SIOCGIFPFLAGS: + case SIOCGIFTXQLEN: + case SIOCGMIIPHY: + case SIOCGMIIREG: + if (copy_in_user(uifr32, uifr, sizeof(*uifr32))) + err = -EFAULT; + break; + } + } + return err; +} + static int compat_sioc_ifmap(struct net *net, unsigned int cmd, struct compat_ifreq __user *uifr32) { @@ -3209,6 +3256,8 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSMIIREG: + return compat_ifreq_ioctl(net, sock, cmd, argp); + case SIOCSARP: case SIOCGARP: case SIOCDARP: -- cgit v1.2.3 From c6c9fee35dc27362b7bac34b2fc9f5b8ace2e22c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 25 Jan 2019 22:43:19 +0100 Subject: net: socket: fix SIOCGIFNAME in compat As reported by Robert O'Callahan in https://bugzilla.kernel.org/show_bug.cgi?id=202273 reverting the previous changes in this area broke the SIOCGIFNAME ioctl in compat again (I'd previously fixed it after his previous report of breakage in https://bugzilla.kernel.org/show_bug.cgi?id=199469). This is obviously because I fixed SIOCGIFNAME more or less by accident. Fix it explicitly now by making it pass through the restored compat translation code. Cc: stable@vger.kernel.org Fixes: 4cf808e7ac32 ("kill dev_ifname32()") Reported-by: Robert O'Callahan Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- net/socket.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index fbf80f9fb057..473ac8d7c54e 100644 --- a/net/socket.c +++ b/net/socket.c @@ -3029,6 +3029,7 @@ static int compat_ifreq_ioctl(struct net *net, struct socket *sock, case SIOCGIFTXQLEN: case SIOCGMIIPHY: case SIOCGMIIREG: + case SIOCGIFNAME: if (copy_in_user(uifr32, uifr, sizeof(*uifr32))) err = -EFAULT; break; @@ -3252,6 +3253,7 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCSIFTXQLEN: case SIOCBRADDIF: case SIOCBRDELIF: + case SIOCGIFNAME: case SIOCSIFNAME: case SIOCGMIIPHY: case SIOCGMIIREG: @@ -3266,7 +3268,6 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCBONDRELEASE: case SIOCBONDSETHWADDR: case SIOCBONDCHANGEACTIVE: - case SIOCGIFNAME: return sock_do_ioctl(net, sock, cmd, arg); } -- cgit v1.2.3 From 98406133dd9cb9f195676eab540c270dceca879a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 25 Jan 2019 22:43:20 +0100 Subject: net: socket: make bond ioctls go through compat_ifreq_ioctl() Same story as before, these use struct ifreq and thus need to be read with the shorter version to not cause faults. Cc: stable@vger.kernel.org Fixes: f92d4fc95341 ("kill bond_ioctl()") Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- net/socket.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index 473ac8d7c54e..d80d87a395ea 100644 --- a/net/socket.c +++ b/net/socket.c @@ -3258,16 +3258,16 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSMIIREG: + case SIOCBONDENSLAVE: + case SIOCBONDRELEASE: + case SIOCBONDSETHWADDR: + case SIOCBONDCHANGEACTIVE: return compat_ifreq_ioctl(net, sock, cmd, argp); case SIOCSARP: case SIOCGARP: case SIOCDARP: case SIOCATMARK: - case SIOCBONDENSLAVE: - case SIOCBONDRELEASE: - case SIOCBONDSETHWADDR: - case SIOCBONDCHANGEACTIVE: return sock_do_ioctl(net, sock, cmd, arg); } -- cgit v1.2.3 From feaf5c796b3f0240f10d0d6d0b686715fd58a05b Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 28 Jan 2019 22:23:48 +0100 Subject: net: ip_gre: always reports o_key to userspace Erspan protocol (version 1 and 2) relies on o_key to configure session id header field. However TUNNEL_KEY bit is cleared in erspan_xmit since ERSPAN protocol does not set the key field of the external GRE header and so the configured o_key is not reported to userspace. The issue can be triggered with the following reproducer: $ip link add erspan1 type erspan local 192.168.0.1 remote 192.168.0.2 \ key 1 seq erspan_ver 1 $ip link set erspan1 up $ip -d link sh erspan1 erspan1@NONE: mtu 1450 qdisc pfifo_fast state UNKNOWN mode DEFAULT link/ether 52:aa:99:95:9a:b5 brd ff:ff:ff:ff:ff:ff promiscuity 0 minmtu 68 maxmtu 1500 erspan remote 192.168.0.2 local 192.168.0.1 ttl inherit ikey 0.0.0.1 iseq oseq erspan_index 0 Fix the issue adding TUNNEL_KEY bit to the o_flags parameter in ipgre_fill_info Fixes: 84e54fe0a5ea ("gre: introduce native tunnel support for ERSPAN") Signed-off-by: Lorenzo Bianconi Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 20a64fe6254b..3978f807fa8b 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1455,12 +1455,17 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_parm *p = &t->parms; + __be16 o_flags = p->o_flags; + + if ((t->erspan_ver == 1 || t->erspan_ver == 2) && + !t->collect_md) + o_flags |= TUNNEL_KEY; if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || nla_put_be16(skb, IFLA_GRE_IFLAGS, gre_tnl_flags_to_gre_flags(p->i_flags)) || nla_put_be16(skb, IFLA_GRE_OFLAGS, - gre_tnl_flags_to_gre_flags(p->o_flags)) || + gre_tnl_flags_to_gre_flags(o_flags)) || nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) || -- cgit v1.2.3 From c706863bc8902d0c2d1a5a27ac8e1ead5d06b79d Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 28 Jan 2019 22:23:49 +0100 Subject: net: ip6_gre: always reports o_key to userspace As Erspan_v4, Erspan_v6 protocol relies on o_key to configure session id header field. However TUNNEL_KEY bit is cleared in ip6erspan_tunnel_xmit since ERSPAN protocol does not set the key field of the external GRE header and so the configured o_key is not reported to userspace. The issue can be triggered with the following reproducer: $ip link add ip6erspan1 type ip6erspan local 2000::1 remote 2000::2 \ key 1 seq erspan_ver 1 $ip link set ip6erspan1 up ip -d link sh ip6erspan1 ip6erspan1@NONE: mtu 1422 qdisc noop state DOWN mode DEFAULT link/ether ba:ff:09:24:c3:0e brd ff:ff:ff:ff:ff:ff promiscuity 0 minmtu 68 maxmtu 1500 ip6erspan remote 2000::2 local 2000::1 encaplimit 4 flowlabel 0x00000 ikey 0.0.0.1 iseq oseq Fix the issue adding TUNNEL_KEY bit to the o_flags parameter in ip6gre_fill_info Fixes: 5a963eb61b7c ("ip6_gre: Add ERSPAN native tunnel support") Signed-off-by: Lorenzo Bianconi Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 4416368dbd49..801a9a0c217e 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -2098,12 +2098,17 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); struct __ip6_tnl_parm *p = &t->parms; + __be16 o_flags = p->o_flags; + + if ((p->erspan_ver == 1 || p->erspan_ver == 2) && + !p->collect_md) + o_flags |= TUNNEL_KEY; if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || nla_put_be16(skb, IFLA_GRE_IFLAGS, gre_tnl_flags_to_gre_flags(p->i_flags)) || nla_put_be16(skb, IFLA_GRE_OFLAGS, - gre_tnl_flags_to_gre_flags(p->o_flags)) || + gre_tnl_flags_to_gre_flags(o_flags)) || nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || nla_put_in6_addr(skb, IFLA_GRE_LOCAL, &p->laddr) || -- cgit v1.2.3 From ef489749aae508e6f17886775c075f12ff919fb1 Mon Sep 17 00:00:00 2001 From: Yohei Kanemaru Date: Tue, 29 Jan 2019 15:52:34 +0900 Subject: ipv6: sr: clear IP6CB(skb) on SRH ip4ip6 encapsulation skb->cb may contain data from previous layers (in an observed case IPv4 with L3 Master Device). In the observed scenario, the data in IPCB(skb)->frags was misinterpreted as IP6CB(skb)->frag_max_size, eventually caused an unexpected IPv6 fragmentation in ip6_fragment() through ip6_finish_output(). This patch clears IP6CB(skb), which potentially contains garbage data, on the SRH ip4ip6 encapsulation. Fixes: 32d99d0b6702 ("ipv6: sr: add support for ip4ip6 encapsulation") Signed-off-by: Yohei Kanemaru Signed-off-by: David S. Miller --- net/ipv6/seg6_iptunnel.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 8181ee7e1e27..ee5403cbe655 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -146,6 +146,8 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) } else { ip6_flow_hdr(hdr, 0, flowlabel); hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb)); + + memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); } hdr->nexthdr = NEXTHDR_ROUTING; -- cgit v1.2.3 From 4522a70db7aa5e77526a4079628578599821b193 Mon Sep 17 00:00:00 2001 From: Jacob Wen Date: Wed, 30 Jan 2019 14:55:14 +0800 Subject: l2tp: fix reading optional fields of L2TPv3 Use pskb_may_pull() to make sure the optional fields are in skb linear parts, so we can safely read them later. It's easy to reproduce the issue with a net driver that supports paged skb data. Just create a L2TPv3 over IP tunnel and then generates some network traffic. Once reproduced, rx err in /sys/kernel/debug/l2tp/tunnels will increase. Changes in v4: 1. s/l2tp_v3_pull_opt/l2tp_v3_ensure_opt_in_linear/ 2. s/tunnel->version != L2TP_HDR_VER_2/tunnel->version == L2TP_HDR_VER_3/ 3. Add 'Fixes' in commit messages. Changes in v3: 1. To keep consistency, move the code out of l2tp_recv_common. 2. Use "net" instead of "net-next", since this is a bug fix. Changes in v2: 1. Only fix L2TPv3 to make code simple. To fix both L2TPv3 and L2TPv2, we'd better refactor l2tp_recv_common. It's complicated to do so. 2. Reloading pointers after pskb_may_pull Fixes: f7faffa3ff8e ("l2tp: Add L2TPv3 protocol support") Fixes: 0d76751fad77 ("l2tp: Add L2TPv3 IP encapsulation (no UDP) support") Fixes: a32e0eec7042 ("l2tp: introduce L2TPv3 IP encapsulation support for IPv6") Signed-off-by: Jacob Wen Acked-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 4 ++++ net/l2tp/l2tp_core.h | 20 ++++++++++++++++++++ net/l2tp/l2tp_ip.c | 3 +++ net/l2tp/l2tp_ip6.c | 3 +++ 4 files changed, 30 insertions(+) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 26f1d435696a..dd5ba0c11ab3 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -884,6 +884,10 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb) goto error; } + if (tunnel->version == L2TP_HDR_VER_3 && + l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) + goto error; + l2tp_recv_common(session, skb, ptr, optr, hdrflags, length); l2tp_session_dec_refcount(session); diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 9c9afe94d389..b2ce90260c35 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -301,6 +301,26 @@ static inline bool l2tp_tunnel_uses_xfrm(const struct l2tp_tunnel *tunnel) } #endif +static inline int l2tp_v3_ensure_opt_in_linear(struct l2tp_session *session, struct sk_buff *skb, + unsigned char **ptr, unsigned char **optr) +{ + int opt_len = session->peer_cookie_len + l2tp_get_l2specific_len(session); + + if (opt_len > 0) { + int off = *ptr - *optr; + + if (!pskb_may_pull(skb, off + opt_len)) + return -1; + + if (skb->data != *optr) { + *optr = skb->data; + *ptr = skb->data + off; + } + } + + return 0; +} + #define l2tp_printk(ptr, type, func, fmt, ...) \ do { \ if (((ptr)->debug) & (type)) \ diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 35f6f86d4dcc..d4c60523c549 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -165,6 +165,9 @@ static int l2tp_ip_recv(struct sk_buff *skb) print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length); } + if (l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) + goto discard_sess; + l2tp_recv_common(session, skb, ptr, optr, 0, skb->len); l2tp_session_dec_refcount(session); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 237f1a4a0b0c..0ae6899edac0 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -178,6 +178,9 @@ static int l2tp_ip6_recv(struct sk_buff *skb) print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length); } + if (l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) + goto discard_sess; + l2tp_recv_common(session, skb, ptr, optr, 0, skb->len); l2tp_session_dec_refcount(session); -- cgit v1.2.3 From 91c524708de6207f59dd3512518d8a1c7b434ee3 Mon Sep 17 00:00:00 2001 From: Jacob Wen Date: Thu, 31 Jan 2019 15:18:56 +0800 Subject: l2tp: copy 4 more bytes to linear part if necessary The size of L2TPv2 header with all optional fields is 14 bytes. l2tp_udp_recv_core only moves 10 bytes to the linear part of a skb. This may lead to l2tp_recv_common read data outside of a skb. This patch make sure that there is at least 14 bytes in the linear part of a skb to meet the maximum need of l2tp_udp_recv_core and l2tp_recv_common. The minimum size of both PPP HDLC-like frame and Ethernet frame is larger than 14 bytes, so we are safe to do so. Also remove L2TP_HDR_SIZE_NOSEQ, it is unused now. Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts") Suggested-by: Guillaume Nault Signed-off-by: Jacob Wen Acked-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index dd5ba0c11ab3..fed6becc5daf 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -83,8 +83,7 @@ #define L2TP_SLFLAG_S 0x40000000 #define L2TP_SL_SEQ_MASK 0x00ffffff -#define L2TP_HDR_SIZE_SEQ 10 -#define L2TP_HDR_SIZE_NOSEQ 6 +#define L2TP_HDR_SIZE_MAX 14 /* Default trace flags */ #define L2TP_DEFAULT_DEBUG_FLAGS 0 @@ -808,7 +807,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb) __skb_pull(skb, sizeof(struct udphdr)); /* Short packet? */ - if (!pskb_may_pull(skb, L2TP_HDR_SIZE_SEQ)) { + if (!pskb_may_pull(skb, L2TP_HDR_SIZE_MAX)) { l2tp_info(tunnel, L2TP_MSG_DATA, "%s: recv short packet (len=%d)\n", tunnel->name, skb->len); -- cgit v1.2.3 From 6fa19f5637a6c22bc0999596bcc83bdcac8a4fa6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 31 Jan 2019 08:47:10 -0800 Subject: rds: fix refcount bug in rds_sock_addref syzbot was able to catch a bug in rds [1] The issue here is that the socket might be found in a hash table but that its refcount has already be set to 0 by another cpu. We need to use refcount_inc_not_zero() to be safe here. [1] refcount_t: increment on 0; use-after-free. WARNING: CPU: 1 PID: 23129 at lib/refcount.c:153 refcount_inc_checked lib/refcount.c:153 [inline] WARNING: CPU: 1 PID: 23129 at lib/refcount.c:153 refcount_inc_checked+0x61/0x70 lib/refcount.c:151 Kernel panic - not syncing: panic_on_warn set ... CPU: 1 PID: 23129 Comm: syz-executor3 Not tainted 5.0.0-rc4+ #53 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1db/0x2d0 lib/dump_stack.c:113 panic+0x2cb/0x65c kernel/panic.c:214 __warn.cold+0x20/0x48 kernel/panic.c:571 report_bug+0x263/0x2b0 lib/bug.c:186 fixup_bug arch/x86/kernel/traps.c:178 [inline] fixup_bug arch/x86/kernel/traps.c:173 [inline] do_error_trap+0x11b/0x200 arch/x86/kernel/traps.c:271 do_invalid_op+0x37/0x50 arch/x86/kernel/traps.c:290 invalid_op+0x14/0x20 arch/x86/entry/entry_64.S:973 RIP: 0010:refcount_inc_checked lib/refcount.c:153 [inline] RIP: 0010:refcount_inc_checked+0x61/0x70 lib/refcount.c:151 Code: 1d 51 63 c8 06 31 ff 89 de e8 eb 1b f2 fd 84 db 75 dd e8 a2 1a f2 fd 48 c7 c7 60 9f 81 88 c6 05 31 63 c8 06 01 e8 af 65 bb fd <0f> 0b eb c1 90 66 2e 0f 1f 84 00 00 00 00 00 55 48 89 e5 41 54 49 RSP: 0018:ffff8880a0cbf1e8 EFLAGS: 00010282 RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffffc90006113000 RDX: 000000000001047d RSI: ffffffff81685776 RDI: 0000000000000005 RBP: ffff8880a0cbf1f8 R08: ffff888097c9e100 R09: ffffed1015ce5021 R10: ffffed1015ce5020 R11: ffff8880ae728107 R12: ffff8880723c20c0 R13: ffff8880723c24b0 R14: dffffc0000000000 R15: ffffed1014197e64 sock_hold include/net/sock.h:647 [inline] rds_sock_addref+0x19/0x20 net/rds/af_rds.c:675 rds_find_bound+0x97c/0x1080 net/rds/bind.c:82 rds_recv_incoming+0x3be/0x1430 net/rds/recv.c:362 rds_loop_xmit+0xf3/0x2a0 net/rds/loop.c:96 rds_send_xmit+0x1355/0x2a10 net/rds/send.c:355 rds_sendmsg+0x323c/0x44e0 net/rds/send.c:1368 sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg+0xdd/0x130 net/socket.c:631 __sys_sendto+0x387/0x5f0 net/socket.c:1788 __do_sys_sendto net/socket.c:1800 [inline] __se_sys_sendto net/socket.c:1796 [inline] __x64_sys_sendto+0xe1/0x1a0 net/socket.c:1796 do_syscall_64+0x1a3/0x800 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x458089 Code: 6d b7 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 3b b7 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fc266df8c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000000006 RCX: 0000000000458089 RDX: 0000000000000000 RSI: 00000000204b3fff RDI: 0000000000000005 RBP: 000000000073bf00 R08: 00000000202b4000 R09: 0000000000000010 R10: 0000000000000000 R11: 0000000000000246 R12: 00007fc266df96d4 R13: 00000000004c56e4 R14: 00000000004d94a8 R15: 00000000ffffffff Fixes: cc4dfb7f70a3 ("rds: fix two RCU related problems") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Sowmini Varadhan Cc: Santosh Shilimkar Cc: rds-devel@oss.oracle.com Cc: Cong Wang Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/bind.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/rds/bind.c b/net/rds/bind.c index 762d2c6788a3..17c9d9f0c848 100644 --- a/net/rds/bind.c +++ b/net/rds/bind.c @@ -78,10 +78,10 @@ struct rds_sock *rds_find_bound(const struct in6_addr *addr, __be16 port, __rds_create_bind_key(key, addr, port, scope_id); rcu_read_lock(); rs = rhashtable_lookup(&bind_hash_table, key, ht_parms); - if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD)) - rds_sock_addref(rs); - else + if (rs && (sock_flag(rds_rs_to_sk(rs), SOCK_DEAD) || + !refcount_inc_not_zero(&rds_rs_to_sk(rs)->sk_refcnt))) rs = NULL; + rcu_read_unlock(); rdsdebug("returning rs %p for %pI6c:%u\n", rs, addr, -- cgit v1.2.3 From 9d0f50b80222dc273e67e4e14410fcfa4130a90c Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 29 Jan 2019 11:10:57 +0100 Subject: mac80211: ensure that mgmt tx skbs have tailroom for encryption Some drivers use IEEE80211_KEY_FLAG_SW_MGMT_TX to indicate that management frames need to be software encrypted. Since normal data packets are still encrypted by the hardware, crypto_tx_tailroom_needed_cnt gets decremented after key upload to hw. This can lead to passing skbs to ccmp_encrypt_skb, which don't have the necessary tailroom for software encryption. Change the code to add tailroom for encrypted management packets, even if crypto_tx_tailroom_needed_cnt is 0. Cc: stable@vger.kernel.org Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index f170d6c6629a..928f13a208b0 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1938,9 +1938,16 @@ static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata, int head_need, bool may_encrypt) { struct ieee80211_local *local = sdata->local; + struct ieee80211_hdr *hdr; + bool enc_tailroom; int tail_need = 0; - if (may_encrypt && sdata->crypto_tx_tailroom_needed_cnt) { + hdr = (struct ieee80211_hdr *) skb->data; + enc_tailroom = may_encrypt && + (sdata->crypto_tx_tailroom_needed_cnt || + ieee80211_is_mgmt(hdr->frame_control)); + + if (enc_tailroom) { tail_need = IEEE80211_ENCRYPT_TAILROOM; tail_need -= skb_tailroom(skb); tail_need = max_t(int, tail_need, 0); @@ -1948,8 +1955,7 @@ static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata, if (skb_cloned(skb) && (!ieee80211_hw_check(&local->hw, SUPPORTS_CLONED_SKBS) || - !skb_clone_writable(skb, ETH_HLEN) || - (may_encrypt && sdata->crypto_tx_tailroom_needed_cnt))) + !skb_clone_writable(skb, ETH_HLEN) || enc_tailroom)) I802_DEBUG_INC(local->tx_expand_skb_head_cloned); else if (head_need || tail_need) I802_DEBUG_INC(local->tx_expand_skb_head); -- cgit v1.2.3 From e005bd7ddea06784c1eb91ac5bb6b171a94f3b05 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 1 Feb 2019 11:09:54 +0100 Subject: cfg80211: call disconnect_wk when AP stops Since we now prevent regulatory restore during STA disconnect if concurrent AP interfaces are active, we need to reschedule this check when the AP state changes. This fixes never doing a restore when an AP is the last interface to stop. Or to put it another way: we need to re-check after anything we check here changes. Cc: stable@vger.kernel.org Fixes: 113f3aaa81bd ("cfg80211: Prevent regulatory restore during STA disconnect in concurrent interfaces") Signed-off-by: Johannes Berg --- net/wireless/ap.c | 2 ++ net/wireless/core.h | 2 ++ net/wireless/sme.c | 2 +- 3 files changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/ap.c b/net/wireless/ap.c index 882d97bdc6bf..550ac9d827fe 100644 --- a/net/wireless/ap.c +++ b/net/wireless/ap.c @@ -41,6 +41,8 @@ int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev, cfg80211_sched_dfs_chan_update(rdev); } + schedule_work(&cfg80211_disconnect_work); + return err; } diff --git a/net/wireless/core.h b/net/wireless/core.h index c5d6f3418601..f6b40563dc63 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -445,6 +445,8 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev); bool cfg80211_does_bw_fit_range(const struct ieee80211_freq_range *freq_range, u32 center_freq_khz, u32 bw_khz); +extern struct work_struct cfg80211_disconnect_work; + /** * cfg80211_chandef_dfs_usable - checks if chandef is DFS usable * @wiphy: the wiphy to validate against diff --git a/net/wireless/sme.c b/net/wireless/sme.c index f741d8376a46..7d34cb884840 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -667,7 +667,7 @@ static void disconnect_work(struct work_struct *work) rtnl_unlock(); } -static DECLARE_WORK(cfg80211_disconnect_work, disconnect_work); +DECLARE_WORK(cfg80211_disconnect_work, disconnect_work); /* -- cgit v1.2.3 From ba59fb0273076637f0add4311faa990a5eec27c0 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 1 Feb 2019 15:15:22 +0100 Subject: sctp: walk the list of asoc safely In sctp_sendmesg(), when walking the list of endpoint associations, the association can be dropped from the list, making the list corrupt. Properly handle this by using list_for_each_entry_safe() Fixes: 4910280503f3 ("sctp: add support for snd flag SCTP_SENDALL process in sendmsg") Reported-by: Secunia Research Tested-by: Secunia Research Signed-off-by: Greg Kroah-Hartman Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/socket.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index f93c3cf9e567..65d6d04546ae 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2027,7 +2027,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_transport *transport = NULL; struct sctp_sndrcvinfo _sinfo, *sinfo; - struct sctp_association *asoc; + struct sctp_association *asoc, *tmp; struct sctp_cmsgs cmsgs; union sctp_addr *daddr; bool new = false; @@ -2053,7 +2053,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) /* SCTP_SENDALL process */ if ((sflags & SCTP_SENDALL) && sctp_style(sk, UDP)) { - list_for_each_entry(asoc, &ep->asocs, asocs) { + list_for_each_entry_safe(asoc, tmp, &ep->asocs, asocs) { err = sctp_sendmsg_check_sflags(asoc, sflags, msg, msg_len); if (err == 0) -- cgit v1.2.3 From 14d22d4d61e40623a7c5816728bfe55c322e779a Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Wed, 30 Jan 2019 18:51:00 +0100 Subject: net/smc: fix another sizeof to int comparison Comparing an int to a size, which is unsigned, causes the int to become unsigned, giving the wrong result. kernel_sendmsg can return a negative error code. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_clc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 776e9dfc915d..d53fd588d1f5 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -378,7 +378,7 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info) vec.iov_len = sizeof(struct smc_clc_msg_decline); len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(struct smc_clc_msg_decline)); - if (len < sizeof(struct smc_clc_msg_decline)) + if (len < 0 || len < sizeof(struct smc_clc_msg_decline)) len = -EPROTO; return len > 0 ? 0 : len; } -- cgit v1.2.3 From ca8dc1334a71d6081b09e18d55198a27e28fd44b Mon Sep 17 00:00:00 2001 From: Hans Wippel Date: Wed, 30 Jan 2019 18:51:01 +0100 Subject: net/smc: allow 16 byte pnetids in netlink policy Currently, users can only send pnetids with a maximum length of 15 bytes over the SMC netlink interface although the maximum pnetid length is 16 bytes. This patch changes the SMC netlink policy to accept 16 byte pnetids. Signed-off-by: Hans Wippel Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_pnet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 7cb3e4f07c10..632c3109dee5 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -27,7 +27,7 @@ static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = { [SMC_PNETID_NAME] = { .type = NLA_NUL_STRING, - .len = SMC_MAX_PNETID_LEN - 1 + .len = SMC_MAX_PNETID_LEN }, [SMC_PNETID_ETHNAME] = { .type = NLA_NUL_STRING, -- cgit v1.2.3 From 77f838ace755d2f466536c44dac6c856f62cd901 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Wed, 30 Jan 2019 18:51:02 +0100 Subject: net/smc: prevent races between smc_lgr_terminate() and smc_conn_free() To prevent races between smc_lgr_terminate() and smc_conn_free() add an extra check of the lgr field before accessing it, and cancel a delayed free_work when a new smc connection is created. This fixes the problem that free_work cleared the lgr variable but smc_lgr_terminate() or smc_conn_free() still access it in parallel. Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_core.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 35c1cdc93e1c..097c798983ca 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -128,6 +128,8 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn) { struct smc_link_group *lgr = conn->lgr; + if (!lgr) + return; write_lock_bh(&lgr->conns_lock); if (conn->alert_token_local) { __smc_lgr_unregister_conn(conn); @@ -628,6 +630,8 @@ int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact, local_contact = SMC_REUSE_CONTACT; conn->lgr = lgr; smc_lgr_register_conn(conn); /* add smc conn to lgr */ + if (delayed_work_pending(&lgr->free_work)) + cancel_delayed_work(&lgr->free_work); write_unlock_bh(&lgr->conns_lock); break; } -- cgit v1.2.3 From 6889b36da78a21a312d8b462c1fa25a03c2ff192 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Wed, 30 Jan 2019 18:51:03 +0100 Subject: net/smc: don't wait for send buffer space when data was already sent When there is no more send buffer space and at least 1 byte was already sent then return to user space. The wait is only done when no data was sent by the sendmsg() call. This fixes smc_tx_sendmsg() which tried to always send all user data and started to wait for free send buffer space when needed. During this wait the user space program was blocked in the sendmsg() call and hence not able to receive incoming data. When both sides were in such a situation then the connection stalled forever. Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_tx.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index d8366ed51757..f99951f3f7fd 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -165,12 +165,11 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len) conn->local_tx_ctrl.prod_flags.urg_data_pending = 1; if (!atomic_read(&conn->sndbuf_space) || conn->urg_tx_pend) { + if (send_done) + return send_done; rc = smc_tx_wait(smc, msg->msg_flags); - if (rc) { - if (send_done) - return send_done; + if (rc) goto out_err; - } continue; } -- cgit v1.2.3 From 51c5aba3b672c4285fca052817f34b22dc79dda7 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Wed, 30 Jan 2019 18:51:04 +0100 Subject: net/smc: recvmsg and splice_read should return 0 after shutdown When a socket was connected and is now shut down for read, return 0 to indicate end of data in recvmsg and splice_read (like TCP) and do not return ENOTCONN. This behavior is required by the socket api. Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index c4e56602e0c6..b04a813fc865 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1505,6 +1505,11 @@ static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, smc = smc_sk(sk); lock_sock(sk); + if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) { + /* socket was connected before, no more data to read */ + rc = 0; + goto out; + } if ((sk->sk_state == SMC_INIT) || (sk->sk_state == SMC_LISTEN) || (sk->sk_state == SMC_CLOSED)) @@ -1840,7 +1845,11 @@ static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos, smc = smc_sk(sk); lock_sock(sk); - + if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) { + /* socket was connected before, no more data to read */ + rc = 0; + goto out; + } if (sk->sk_state == SMC_INIT || sk->sk_state == SMC_LISTEN || sk->sk_state == SMC_CLOSED) -- cgit v1.2.3 From 33f3fcc290671590821ff3c0c9396db1ec9b7d4c Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Wed, 30 Jan 2019 18:51:05 +0100 Subject: net/smc: do not wait under send_lock smc_cdc_get_free_slot() might wait for free transfer buffers when using SMC-R. This wait should not be done under the send_lock, which is a spin_lock. This fixes a cpu loop in parallel threads waiting for the send_lock. Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_tx.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index f99951f3f7fd..36af3de731b9 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -488,25 +488,23 @@ static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn) struct smc_wr_buf *wr_buf; int rc; - spin_lock_bh(&conn->send_lock); rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend); if (rc < 0) { if (rc == -EBUSY) { struct smc_sock *smc = container_of(conn, struct smc_sock, conn); - if (smc->sk.sk_err == ECONNABORTED) { - rc = sock_error(&smc->sk); - goto out_unlock; - } + if (smc->sk.sk_err == ECONNABORTED) + return sock_error(&smc->sk); rc = 0; if (conn->alert_token_local) /* connection healthy */ mod_delayed_work(system_wq, &conn->tx_work, SMC_TX_WORK_DELAY); } - goto out_unlock; + return rc; } + spin_lock_bh(&conn->send_lock); if (!conn->local_tx_ctrl.prod_flags.urg_data_present) { rc = smc_tx_rdma_writes(conn); if (rc) { -- cgit v1.2.3 From 2dee25af42f99b476d5916aeac5c994e4dcc910b Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Wed, 30 Jan 2019 18:51:06 +0100 Subject: net/smc: call smc_cdc_msg_send() under send_lock Call smc_cdc_msg_send() under the connection send_lock to make sure all send operations for one connection are serialized. Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_cdc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index db83332ac1c8..1c5333d494e9 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -125,7 +125,10 @@ static int smcr_cdc_get_slot_and_msg_send(struct smc_connection *conn) if (rc) return rc; - return smc_cdc_msg_send(conn, wr_buf, pend); + spin_lock_bh(&conn->send_lock); + rc = smc_cdc_msg_send(conn, wr_buf, pend); + spin_unlock_bh(&conn->send_lock); + return rc; } int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn) -- cgit v1.2.3 From e5f3aa04dbfd92154f21edfeb7a1676a45918928 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Wed, 30 Jan 2019 18:51:07 +0100 Subject: net/smc: use device link provided in qp_context The device field of the IB event structure does not always point to the SMC IB device. Load the pointer from the qp_context which is always provided to smc_ib_qp_event_handler() in the priv field. And for qp events the affected port is given in the qp structure of the ibevent, derive it from there. Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_ib.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index e519ef29c0ff..76487a16934e 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -289,8 +289,8 @@ int smc_ib_create_protection_domain(struct smc_link *lnk) static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv) { - struct smc_ib_device *smcibdev = - (struct smc_ib_device *)ibevent->device; + struct smc_link *lnk = (struct smc_link *)priv; + struct smc_ib_device *smcibdev = lnk->smcibdev; u8 port_idx; switch (ibevent->event) { @@ -298,7 +298,7 @@ static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv) case IB_EVENT_GID_CHANGE: case IB_EVENT_PORT_ERR: case IB_EVENT_QP_ACCESS_ERR: - port_idx = ibevent->element.port_num - 1; + port_idx = ibevent->element.qp->port - 1; set_bit(port_idx, &smcibdev->port_event_mask); schedule_work(&smcibdev->port_event_work); break; -- cgit v1.2.3 From 46ad02229d6b4ee276eb295ca08f039993f323c8 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Wed, 30 Jan 2019 18:51:08 +0100 Subject: net/smc: fix use of variable in cleared area Do not use pend->idx as index for the arrays because its value is located in the cleared area. Use the existing local variable instead. Without this fix the wrong area might be cleared. Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_wr.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index c2694750a6a8..1dc88c32d6bb 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -218,10 +218,10 @@ int smc_wr_tx_put_slot(struct smc_link *link, u32 idx = pend->idx; /* clear the full struct smc_wr_tx_pend including .priv */ - memset(&link->wr_tx_pends[pend->idx], 0, - sizeof(link->wr_tx_pends[pend->idx])); - memset(&link->wr_tx_bufs[pend->idx], 0, - sizeof(link->wr_tx_bufs[pend->idx])); + memset(&link->wr_tx_pends[idx], 0, + sizeof(link->wr_tx_pends[idx])); + memset(&link->wr_tx_bufs[idx], 0, + sizeof(link->wr_tx_bufs[idx])); test_and_clear_bit(idx, link->wr_tx_mask); return 1; } -- cgit v1.2.3 From 9b1f19d810e92d6cdc68455fbc22d9f961a58ce1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 30 Jan 2019 11:39:41 -0800 Subject: dccp: fool proof ccid_hc_[rt]x_parse_options() Similarly to commit 276bdb82dedb ("dccp: check ccid before dereferencing") it is wise to test for a NULL ccid. kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] PREEMPT SMP KASAN CPU: 1 PID: 16 Comm: ksoftirqd/1 Not tainted 5.0.0-rc3+ #37 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:ccid_hc_tx_parse_options net/dccp/ccid.h:205 [inline] RIP: 0010:dccp_parse_options+0x8d9/0x12b0 net/dccp/options.c:233 Code: c5 0f b6 75 b3 80 38 00 0f 85 d6 08 00 00 48 b9 00 00 00 00 00 fc ff df 48 8b 45 b8 4c 8b b8 f8 07 00 00 4c 89 f8 48 c1 e8 03 <80> 3c 08 00 0f 85 95 08 00 00 48 b8 00 00 00 00 00 fc ff df 4d 8b kobject: 'loop5' (0000000080f78fc1): kobject_uevent_env RSP: 0018:ffff8880a94df0b8 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff8880858ac723 RCX: dffffc0000000000 RDX: 0000000000000100 RSI: 0000000000000007 RDI: 0000000000000001 RBP: ffff8880a94df140 R08: 0000000000000001 R09: ffff888061b83a80 R10: ffffed100c370752 R11: ffff888061b83a97 R12: 0000000000000026 R13: 0000000000000001 R14: 0000000000000000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff8880ae700000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f0defa33518 CR3: 000000008db5e000 CR4: 00000000001406e0 kobject: 'loop5' (0000000080f78fc1): fill_kobj_path: path = '/devices/virtual/block/loop5' DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: dccp_rcv_state_process+0x2b6/0x1af6 net/dccp/input.c:654 dccp_v4_do_rcv+0x100/0x190 net/dccp/ipv4.c:688 sk_backlog_rcv include/net/sock.h:936 [inline] __sk_receive_skb+0x3a9/0xea0 net/core/sock.c:473 dccp_v4_rcv+0x10cb/0x1f80 net/dccp/ipv4.c:880 ip_protocol_deliver_rcu+0xb6/0xa20 net/ipv4/ip_input.c:208 ip_local_deliver_finish+0x23b/0x390 net/ipv4/ip_input.c:234 NF_HOOK include/linux/netfilter.h:289 [inline] NF_HOOK include/linux/netfilter.h:283 [inline] ip_local_deliver+0x1f0/0x740 net/ipv4/ip_input.c:255 dst_input include/net/dst.h:450 [inline] ip_rcv_finish+0x1f4/0x2f0 net/ipv4/ip_input.c:414 NF_HOOK include/linux/netfilter.h:289 [inline] NF_HOOK include/linux/netfilter.h:283 [inline] ip_rcv+0xed/0x620 net/ipv4/ip_input.c:524 __netif_receive_skb_one_core+0x160/0x210 net/core/dev.c:4973 __netif_receive_skb+0x2c/0x1c0 net/core/dev.c:5083 process_backlog+0x206/0x750 net/core/dev.c:5923 napi_poll net/core/dev.c:6346 [inline] net_rx_action+0x76d/0x1930 net/core/dev.c:6412 __do_softirq+0x30b/0xb11 kernel/softirq.c:292 run_ksoftirqd kernel/softirq.c:654 [inline] run_ksoftirqd+0x8e/0x110 kernel/softirq.c:646 smpboot_thread_fn+0x6ab/0xa10 kernel/smpboot.c:164 kthread+0x357/0x430 kernel/kthread.c:246 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:352 Modules linked in: ---[ end trace 58a0ba03bea2c376 ]--- RIP: 0010:ccid_hc_tx_parse_options net/dccp/ccid.h:205 [inline] RIP: 0010:dccp_parse_options+0x8d9/0x12b0 net/dccp/options.c:233 Code: c5 0f b6 75 b3 80 38 00 0f 85 d6 08 00 00 48 b9 00 00 00 00 00 fc ff df 48 8b 45 b8 4c 8b b8 f8 07 00 00 4c 89 f8 48 c1 e8 03 <80> 3c 08 00 0f 85 95 08 00 00 48 b8 00 00 00 00 00 fc ff df 4d 8b RSP: 0018:ffff8880a94df0b8 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff8880858ac723 RCX: dffffc0000000000 RDX: 0000000000000100 RSI: 0000000000000007 RDI: 0000000000000001 RBP: ffff8880a94df140 R08: 0000000000000001 R09: ffff888061b83a80 R10: ffffed100c370752 R11: ffff888061b83a97 R12: 0000000000000026 R13: 0000000000000001 R14: 0000000000000000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff8880ae700000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f0defa33518 CR3: 0000000009871000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Gerrit Renker Signed-off-by: David S. Miller --- net/dccp/ccid.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index 6eb837a47b5c..baaaeb2b2c42 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -202,7 +202,7 @@ static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk, static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk, u8 pkt, u8 opt, u8 *val, u8 len) { - if (ccid->ccid_ops->ccid_hc_tx_parse_options == NULL) + if (!ccid || !ccid->ccid_ops->ccid_hc_tx_parse_options) return 0; return ccid->ccid_ops->ccid_hc_tx_parse_options(sk, pkt, opt, val, len); } @@ -214,7 +214,7 @@ static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk, static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk, u8 pkt, u8 opt, u8 *val, u8 len) { - if (ccid->ccid_ops->ccid_hc_rx_parse_options == NULL) + if (!ccid || !ccid->ccid_ops->ccid_hc_rx_parse_options) return 0; return ccid->ccid_ops->ccid_hc_rx_parse_options(sk, pkt, opt, val, len); } -- cgit v1.2.3 From 22b5c0b63f32568e130fa2df4ba23efce3eb495b Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Fri, 1 Feb 2019 12:42:06 +0100 Subject: vsock/virtio: fix kernel panic after device hot-unplug virtio_vsock_remove() invokes the vsock_core_exit() also if there are opened sockets for the AF_VSOCK protocol family. In this way the vsock "transport" pointer is set to NULL, triggering the kernel panic at the first socket activity. This patch move the vsock_core_init()/vsock_core_exit() in the virtio_vsock respectively in module_init and module_exit functions, that cannot be invoked until there are open sockets. Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1609699 Reported-by: Yan Fu Signed-off-by: Stefano Garzarella Acked-by: Stefan Hajnoczi Signed-off-by: David S. Miller --- net/vmw_vsock/virtio_transport.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 5d3cce9e8744..9dae54698737 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -75,6 +75,9 @@ static u32 virtio_transport_get_local_cid(void) { struct virtio_vsock *vsock = virtio_vsock_get(); + if (!vsock) + return VMADDR_CID_ANY; + return vsock->guest_cid; } @@ -584,10 +587,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev) virtio_vsock_update_guest_cid(vsock); - ret = vsock_core_init(&virtio_transport.transport); - if (ret < 0) - goto out_vqs; - vsock->rx_buf_nr = 0; vsock->rx_buf_max_nr = 0; atomic_set(&vsock->queued_replies, 0); @@ -618,8 +617,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev) mutex_unlock(&the_virtio_vsock_mutex); return 0; -out_vqs: - vsock->vdev->config->del_vqs(vsock->vdev); out: kfree(vsock); mutex_unlock(&the_virtio_vsock_mutex); @@ -669,7 +666,6 @@ static void virtio_vsock_remove(struct virtio_device *vdev) mutex_lock(&the_virtio_vsock_mutex); the_virtio_vsock = NULL; - vsock_core_exit(); mutex_unlock(&the_virtio_vsock_mutex); vdev->config->del_vqs(vdev); @@ -702,14 +698,28 @@ static int __init virtio_vsock_init(void) virtio_vsock_workqueue = alloc_workqueue("virtio_vsock", 0, 0); if (!virtio_vsock_workqueue) return -ENOMEM; + ret = register_virtio_driver(&virtio_vsock_driver); if (ret) - destroy_workqueue(virtio_vsock_workqueue); + goto out_wq; + + ret = vsock_core_init(&virtio_transport.transport); + if (ret) + goto out_vdr; + + return 0; + +out_vdr: + unregister_virtio_driver(&virtio_vsock_driver); +out_wq: + destroy_workqueue(virtio_vsock_workqueue); return ret; + } static void __exit virtio_vsock_exit(void) { + vsock_core_exit(); unregister_virtio_driver(&virtio_vsock_driver); destroy_workqueue(virtio_vsock_workqueue); } -- cgit v1.2.3 From 85965487abc540368393a15491e6e7fcd230039d Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Fri, 1 Feb 2019 12:42:07 +0100 Subject: vsock/virtio: reset connected sockets on device removal When the virtio transport device disappear, we should reset all connected sockets in order to inform the users. Signed-off-by: Stefano Garzarella Reviewed-by: Stefan Hajnoczi Signed-off-by: David S. Miller --- net/vmw_vsock/virtio_transport.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 9dae54698737..15eb5d3d4750 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -634,6 +634,9 @@ static void virtio_vsock_remove(struct virtio_device *vdev) flush_work(&vsock->event_work); flush_work(&vsock->send_pkt_work); + /* Reset all connected sockets when the device disappear */ + vsock_for_each_connected_socket(virtio_vsock_reset_sock); + vdev->config->reset(vdev); mutex_lock(&vsock->rx_lock); -- cgit v1.2.3 From cfe4bd7a257f6d6f81d3458d8c9d9ec4957539e6 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 4 Feb 2019 03:27:58 +0800 Subject: sctp: check and update stream->out_curr when allocating stream_out Now when using stream reconfig to add out streams, stream->out will get re-allocated, and all old streams' information will be copied to the new ones and the old ones will be freed. So without stream->out_curr updated, next time when trying to send from stream->out_cur