From 9e8acd9c44a0dd52b2922eeb82398c04e356c058 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Wed, 9 Oct 2019 10:31:24 +0200 Subject: bpf: lwtunnel: Fix reroute supplying invalid dst The dst in bpf_input() has lwtstate field set. As it is of the LWTUNNEL_ENCAP_BPF type, lwtstate->data is struct bpf_lwt. When the bpf program returns BPF_LWT_REROUTE, ip_route_input_noref is directly called on this skb. This causes invalid memory access, as ip_route_input_slow calls skb_tunnel_info(skb) that expects the dst->lwstate->data to be struct ip_tunnel_info. This results to struct bpf_lwt being accessed as struct ip_tunnel_info. Drop the dst before calling the IP route input functions (both for IPv4 and IPv6). Reported by KASAN. Fixes: 3bd0b15281af ("bpf: add handling of BPF_LWT_REROUTE to lwt_bpf.c") Signed-off-by: Jiri Benc Signed-off-by: Alexei Starovoitov Acked-by: Peter Oskolkov Link: https://lore.kernel.org/bpf/111664d58fe4e9dd9c8014bb3d0b2dab93086a9e.1570609794.git.jbenc@redhat.com --- net/core/lwt_bpf.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index f93785e5833c..74cfb8b5ab33 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -88,11 +88,16 @@ static int bpf_lwt_input_reroute(struct sk_buff *skb) int err = -EINVAL; if (skb->protocol == htons(ETH_P_IP)) { + struct net_device *dev = skb_dst(skb)->dev; struct iphdr *iph = ip_hdr(skb); + dev_hold(dev); + skb_dst_drop(skb); err = ip_route_input_noref(skb, iph->daddr, iph->saddr, - iph->tos, skb_dst(skb)->dev); + iph->tos, dev); + dev_put(dev); } else if (skb->protocol == htons(ETH_P_IPV6)) { + skb_dst_drop(skb); err = ipv6_stub->ipv6_route_input(skb); } else { err = -EAFNOSUPPORT; -- cgit v1.2.3 From 55667441c84fa5e0911a0aac44fb059c15ba6da2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 22 Oct 2019 07:57:46 -0700 Subject: net/flow_dissector: switch to siphash UDP IPv6 packets auto flowlabels are using a 32bit secret (static u32 hashrnd in net/core/flow_dissector.c) and apply jhash() over fields known by the receivers. Attackers can easily infer the 32bit secret and use this information to identify a device and/or user, since this 32bit secret is only set at boot time. Really, using jhash() to generate cookies sent on the wire is a serious security concern. Trying to change the rol32(hash, 16) in ip6_make_flowlabel() would be a dead end. Trying to periodically change the secret (like in sch_sfq.c) could change paths taken in the network for long lived flows. Let's switch to siphash, as we did in commit df453700e8d8 ("inet: switch IP ID generator to siphash") Using a cryptographically strong pseudo random function will solve this privacy issue and more generally remove other weak points in the stack. Packet schedulers using skb_get_hash_perturb() benefit from this change. Fixes: b56774163f99 ("ipv6: Enable auto flow labels by default") Fixes: 42240901f7c4 ("ipv6: Implement different admin modes for automatic flow labels") Fixes: 67800f9b1f4e ("ipv6: Call skb_get_hash_flowi6 to get skb->hash in ip6_make_flowlabel") Fixes: cb1ce2ef387b ("ipv6: Implement automatic flow label generation on transmit") Signed-off-by: Eric Dumazet Reported-by: Jonathan Berger Reported-by: Amit Klein Reported-by: Benny Pinkas Cc: Tom Herbert Signed-off-by: David S. Miller --- net/core/flow_dissector.c | 38 ++++++++++++++++---------------------- 1 file changed, 16 insertions(+), 22 deletions(-) (limited to 'net/core') diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 7c09d87d3269..68eda10d0680 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1350,30 +1350,21 @@ out_bad: } EXPORT_SYMBOL(__skb_flow_dissect); -static u32 hashrnd __read_mostly; +static siphash_key_t hashrnd __read_mostly; static __always_inline void __flow_hash_secret_init(void) { net_get_random_once(&hashrnd, sizeof(hashrnd)); } -static __always_inline u32 __flow_hash_words(const u32 *words, u32 length, - u32 keyval) +static const void *flow_keys_hash_start(const struct flow_keys *flow) { - return jhash2(words, length, keyval); -} - -static inline const u32 *flow_keys_hash_start(const struct flow_keys *flow) -{ - const void *p = flow; - - BUILD_BUG_ON(FLOW_KEYS_HASH_OFFSET % sizeof(u32)); - return (const u32 *)(p + FLOW_KEYS_HASH_OFFSET); + BUILD_BUG_ON(FLOW_KEYS_HASH_OFFSET % SIPHASH_ALIGNMENT); + return &flow->FLOW_KEYS_HASH_START_FIELD; } static inline size_t flow_keys_hash_length(const struct flow_keys *flow) { size_t diff = FLOW_KEYS_HASH_OFFSET + sizeof(flow->addrs); - BUILD_BUG_ON((sizeof(*flow) - FLOW_KEYS_HASH_OFFSET) % sizeof(u32)); BUILD_BUG_ON(offsetof(typeof(*flow), addrs) != sizeof(*flow) - sizeof(flow->addrs)); @@ -1388,7 +1379,7 @@ static inline size_t flow_keys_hash_length(const struct flow_keys *flow) diff -= sizeof(flow->addrs.tipckey); break; } - return (sizeof(*flow) - diff) / sizeof(u32); + return sizeof(*flow) - diff; } __be32 flow_get_u32_src(const struct flow_keys *flow) @@ -1454,14 +1445,15 @@ static inline void __flow_hash_consistentify(struct flow_keys *keys) } } -static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval) +static inline u32 __flow_hash_from_keys(struct flow_keys *keys, + const siphash_key_t *keyval) { u32 hash; __flow_hash_consistentify(keys); - hash = __flow_hash_words(flow_keys_hash_start(keys), - flow_keys_hash_length(keys), keyval); + hash = siphash(flow_keys_hash_start(keys), + flow_keys_hash_length(keys), keyval); if (!hash) hash = 1; @@ -1471,12 +1463,13 @@ static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval) u32 flow_hash_from_keys(struct flow_keys *keys) { __flow_hash_secret_init(); - return __flow_hash_from_keys(keys, hashrnd); + return __flow_hash_from_keys(keys, &hashrnd); } EXPORT_SYMBOL(flow_hash_from_keys); static inline u32 ___skb_get_hash(const struct sk_buff *skb, - struct flow_keys *keys, u32 keyval) + struct flow_keys *keys, + const siphash_key_t *keyval) { skb_flow_dissect_flow_keys(skb, keys, FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); @@ -1524,7 +1517,7 @@ u32 __skb_get_hash_symmetric(const struct sk_buff *skb) &keys, NULL, 0, 0, 0, FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); - return __flow_hash_from_keys(&keys, hashrnd); + return __flow_hash_from_keys(&keys, &hashrnd); } EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric); @@ -1544,13 +1537,14 @@ void __skb_get_hash(struct sk_buff *skb) __flow_hash_secret_init(); - hash = ___skb_get_hash(skb, &keys, hashrnd); + hash = ___skb_get_hash(skb, &keys, &hashrnd); __skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys)); } EXPORT_SYMBOL(__skb_get_hash); -__u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb) +__u32 skb_get_hash_perturb(const struct sk_buff *skb, + const siphash_key_t *perturb) { struct flow_keys keys; -- cgit v1.2.3 From 82ecff655e7968151b0047f1b5de03b249e5c1c4 Mon Sep 17 00:00:00 2001 From: Takeshi Misawa Date: Sat, 19 Oct 2019 15:34:43 +0900 Subject: keys: Fix memory leak in copy_net_ns If copy_net_ns() failed after net_alloc(), net->key_domain is leaked. Fix this, by freeing key_domain in error path. syzbot report: BUG: memory leak unreferenced object 0xffff8881175007e0 (size 32): comm "syz-executor902", pid 7069, jiffies 4294944350 (age 28.400s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000a83ed741>] kmemleak_alloc_recursive include/linux/kmemleak.h:43 [inline] [<00000000a83ed741>] slab_post_alloc_hook mm/slab.h:439 [inline] [<00000000a83ed741>] slab_alloc mm/slab.c:3326 [inline] [<00000000a83ed741>] kmem_cache_alloc_trace+0x13d/0x280 mm/slab.c:3553 [<0000000059fc92b9>] kmalloc include/linux/slab.h:547 [inline] [<0000000059fc92b9>] kzalloc include/linux/slab.h:742 [inline] [<0000000059fc92b9>] net_alloc net/core/net_namespace.c:398 [inline] [<0000000059fc92b9>] copy_net_ns+0xb2/0x220 net/core/net_namespace.c:445 [<00000000a9d74bbc>] create_new_namespaces+0x141/0x2a0 kernel/nsproxy.c:103 [<000000008047d645>] unshare_nsproxy_namespaces+0x7f/0x100 kernel/nsproxy.c:202 [<000000005993ea6e>] ksys_unshare+0x236/0x490 kernel/fork.c:2674 [<0000000019417e75>] __do_sys_unshare kernel/fork.c:2742 [inline] [<0000000019417e75>] __se_sys_unshare kernel/fork.c:2740 [inline] [<0000000019417e75>] __x64_sys_unshare+0x16/0x20 kernel/fork.c:2740 [<00000000f4c5f2c8>] do_syscall_64+0x76/0x1a0 arch/x86/entry/common.c:296 [<0000000038550184>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 syzbot also reported other leak in copy_net_ns -> setup_net. This problem is already fixed by cf47a0b882a4e5f6b34c7949d7b293e9287f1972. Fixes: 9b242610514f ("keys: Network namespace domain tag") Reported-and-tested-by: syzbot+3b3296d032353c33184b@syzkaller.appspotmail.com Signed-off-by: Takeshi Misawa Signed-off-by: David S. Miller --- net/core/net_namespace.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/core') diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 6d3e4821b02d..5a4ae0845bac 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -479,6 +479,7 @@ struct net *copy_net_ns(unsigned long flags, if (rv < 0) { put_userns: + key_remove_domain(net->key_domain); put_user_ns(user_ns); net_drop_ns(net); dec_ucounts: -- cgit v1.2.3 From 5343da4c17429efaa5fb1594ea96aee1a283e694 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 21 Oct 2019 18:47:50 +0000 Subject: net: core: limit nested device depth Current code doesn't limit the number of nested devices. Nested devices would be handled recursively and this needs huge stack memory. So, unlimited nested devices could make stack overflow. This patch adds upper_level and lower_level, they are common variables and represent maximum lower/upper depth. When upper/lower device is attached or dettached, {lower/upper}_level are updated. and if maximum depth is bigger than 8, attach routine fails and returns -EMLINK. In addition, this patch converts recursive routine of netdev_walk_all_{lower/upper} to iterator routine. Test commands: ip link add dummy0 type dummy ip link add link dummy0 name vlan1 type vlan id 1 ip link set vlan1 up for i in {2..55} do let A=$i-1 ip link add vlan$i link vlan$A type vlan id $i done ip link del dummy0 Splat looks like: [ 155.513226][ T908] BUG: KASAN: use-after-free in __unwind_start+0x71/0x850 [ 155.514162][ T908] Write of size 88 at addr ffff8880608a6cc0 by task ip/908 [ 155.515048][ T908] [ 155.515333][ T908] CPU: 0 PID: 908 Comm: ip Not tainted 5.4.0-rc3+ #96 [ 155.516147][ T908] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 155.517233][ T908] Call Trace: [ 155.517627][ T908] [ 155.517918][ T908] Allocated by task 0: [ 155.518412][ T908] (stack is not available) [ 155.518955][ T908] [ 155.519228][ T908] Freed by task 0: [ 155.519885][ T908] (stack is not available) [ 155.520452][ T908] [ 155.520729][ T908] The buggy address belongs to the object at ffff8880608a6ac0 [ 155.520729][ T908] which belongs to the cache names_cache of size 4096 [ 155.522387][ T908] The buggy address is located 512 bytes inside of [ 155.522387][ T908] 4096-byte region [ffff8880608a6ac0, ffff8880608a7ac0) [ 155.523920][ T908] The buggy address belongs to the page: [ 155.524552][ T908] page:ffffea0001822800 refcount:1 mapcount:0 mapping:ffff88806c657cc0 index:0x0 compound_mapcount:0 [ 155.525836][ T908] flags: 0x100000000010200(slab|head) [ 155.526445][ T908] raw: 0100000000010200 ffffea0001813808 ffffea0001a26c08 ffff88806c657cc0 [ 155.527424][ T908] raw: 0000000000000000 0000000000070007 00000001ffffffff 0000000000000000 [ 155.528429][ T908] page dumped because: kasan: bad access detected [ 155.529158][ T908] [ 155.529410][ T908] Memory state around the buggy address: [ 155.530060][ T908] ffff8880608a6b80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 155.530971][ T908] ffff8880608a6c00: fb fb fb fb fb f1 f1 f1 f1 00 f2 f2 f2 f3 f3 f3 [ 155.531889][ T908] >ffff8880608a6c80: f3 fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 155.532806][ T908] ^ [ 155.533509][ T908] ffff8880608a6d00: fb fb fb fb fb fb fb fb fb f1 f1 f1 f1 00 00 00 [ 155.534436][ T908] ffff8880608a6d80: f2 f3 f3 f3 f3 fb fb fb 00 00 00 00 00 00 00 00 [ ... ] Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- net/core/dev.c | 272 +++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 227 insertions(+), 45 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index bf3ed413abaf..ab0edfc4a422 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -146,6 +146,7 @@ #include "net-sysfs.h" #define MAX_GRO_SKBS 8 +#define MAX_NEST_DEV 8 /* This should be increased if a protocol with a bigger head is added. */ #define GRO_MAX_HEAD (MAX_HEADER + 128) @@ -6644,6 +6645,21 @@ struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev, } EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu); +static struct net_device *netdev_next_upper_dev(struct net_device *dev, + struct list_head **iter) +{ + struct netdev_adjacent *upper; + + upper = list_entry((*iter)->next, struct netdev_adjacent, list); + + if (&upper->list == &dev->adj_list.upper) + return NULL; + + *iter = &upper->list; + + return upper->dev; +} + static struct net_device *netdev_next_upper_dev_rcu(struct net_device *dev, struct list_head **iter) { @@ -6661,28 +6677,93 @@ static struct net_device *netdev_next_upper_dev_rcu(struct net_device *dev, return upper->dev; } +static int netdev_walk_all_upper_dev(struct net_device *dev, + int (*fn)(struct net_device *dev, + void *data), + void *data) +{ + struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; + struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; + int ret, cur = 0; + + now = dev; + iter = &dev->adj_list.upper; + + while (1) { + if (now != dev) { + ret = fn(now, data); + if (ret) + return ret; + } + + next = NULL; + while (1) { + udev = netdev_next_upper_dev(now, &iter); + if (!udev) + break; + + next = udev; + niter = &udev->adj_list.upper; + dev_stack[cur] = now; + iter_stack[cur++] = iter; + break; + } + + if (!next) { + if (!cur) + return 0; + next = dev_stack[--cur]; + niter = iter_stack[cur]; + } + + now = next; + iter = niter; + } + + return 0; +} + int netdev_walk_all_upper_dev_rcu(struct net_device *dev, int (*fn)(struct net_device *dev, void *data), void *data) { - struct net_device *udev; - struct list_head *iter; - int ret; + struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; + struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; + int ret, cur = 0; - for (iter = &dev->adj_list.upper, - udev = netdev_next_upper_dev_rcu(dev, &iter); - udev; - udev = netdev_next_upper_dev_rcu(dev, &iter)) { - /* first is the upper device itself */ - ret = fn(udev, data); - if (ret) - return ret; + now = dev; + iter = &dev->adj_list.upper; - /* then look at all of its upper devices */ - ret = netdev_walk_all_upper_dev_rcu(udev, fn, data); - if (ret) - return ret; + while (1) { + if (now != dev) { + ret = fn(now, data); + if (ret) + return ret; + } + + next = NULL; + while (1) { + udev = netdev_next_upper_dev_rcu(now, &iter); + if (!udev) + break; + + next = udev; + niter = &udev->adj_list.upper; + dev_stack[cur] = now; + iter_stack[cur++] = iter; + break; + } + + if (!next) { + if (!cur) + return 0; + next = dev_stack[--cur]; + niter = iter_stack[cur]; + } + + now = next; + iter = niter; } return 0; @@ -6790,23 +6871,42 @@ int netdev_walk_all_lower_dev(struct net_device *dev, void *data), void *data) { - struct net_device *ldev; - struct list_head *iter; - int ret; + struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; + struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; + int ret, cur = 0; - for (iter = &dev->adj_list.lower, - ldev = netdev_next_lower_dev(dev, &iter); - ldev; - ldev = netdev_next_lower_dev(dev, &iter)) { - /* first is the lower device itself */ - ret = fn(ldev, data); - if (ret) - return ret; + now = dev; + iter = &dev->adj_list.lower; - /* then look at all of its lower devices */ - ret = netdev_walk_all_lower_dev(ldev, fn, data); - if (ret) - return ret; + while (1) { + if (now != dev) { + ret = fn(now, data); + if (ret) + return ret; + } + + next = NULL; + while (1) { + ldev = netdev_next_lower_dev(now, &iter); + if (!ldev) + break; + + next = ldev; + niter = &ldev->adj_list.lower; + dev_stack[cur] = now; + iter_stack[cur++] = iter; + break; + } + + if (!next) { + if (!cur) + return 0; + next = dev_stack[--cur]; + niter = iter_stack[cur]; + } + + now = next; + iter = niter; } return 0; @@ -6827,28 +6927,93 @@ static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev, return lower->dev; } -int netdev_walk_all_lower_dev_rcu(struct net_device *dev, - int (*fn)(struct net_device *dev, - void *data), - void *data) +static u8 __netdev_upper_depth(struct net_device *dev) +{ + struct net_device *udev; + struct list_head *iter; + u8 max_depth = 0; + + for (iter = &dev->adj_list.upper, + udev = netdev_next_upper_dev(dev, &iter); + udev; + udev = netdev_next_upper_dev(dev, &iter)) { + if (max_depth < udev->upper_level) + max_depth = udev->upper_level; + } + + return max_depth; +} + +static u8 __netdev_lower_depth(struct net_device *dev) { struct net_device *ldev; struct list_head *iter; - int ret; + u8 max_depth = 0; for (iter = &dev->adj_list.lower, - ldev = netdev_next_lower_dev_rcu(dev, &iter); + ldev = netdev_next_lower_dev(dev, &iter); ldev; - ldev = netdev_next_lower_dev_rcu(dev, &iter)) { - /* first is the lower device itself */ - ret = fn(ldev, data); - if (ret) - return ret; + ldev = netdev_next_lower_dev(dev, &iter)) { + if (max_depth < ldev->lower_level) + max_depth = ldev->lower_level; + } - /* then look at all of its lower devices */ - ret = netdev_walk_all_lower_dev_rcu(ldev, fn, data); - if (ret) - return ret; + return max_depth; +} + +static int __netdev_update_upper_level(struct net_device *dev, void *data) +{ + dev->upper_level = __netdev_upper_depth(dev) + 1; + return 0; +} + +static int __netdev_update_lower_level(struct net_device *dev, void *data) +{ + dev->lower_level = __netdev_lower_depth(dev) + 1; + return 0; +} + +int netdev_walk_all_lower_dev_rcu(struct net_device *dev, + int (*fn)(struct net_device *dev, + void *data), + void *data) +{ + struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; + struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; + int ret, cur = 0; + + now = dev; + iter = &dev->adj_list.lower; + + while (1) { + if (now != dev) { + ret = fn(now, data); + if (ret) + return ret; + } + + next = NULL; + while (1) { + ldev = netdev_next_lower_dev_rcu(now, &iter); + if (!ldev) + break; + + next = ldev; + niter = &ldev->adj_list.lower; + dev_stack[cur] = now; + iter_stack[cur++] = iter; + break; + } + + if (!next) { + if (!cur) + return 0; + next = dev_stack[--cur]; + niter = iter_stack[cur]; + } + + now = next; + iter = niter; } return 0; @@ -7105,6 +7270,9 @@ static int __netdev_upper_dev_link(struct net_device *dev, if (netdev_has_upper_dev(upper_dev, dev)) return -EBUSY; + if ((dev->lower_level + upper_dev->upper_level) > MAX_NEST_DEV) + return -EMLINK; + if (!master) { if (netdev_has_upper_dev(dev, upper_dev)) return -EEXIST; @@ -7131,6 +7299,12 @@ static int __netdev_upper_dev_link(struct net_device *dev, if (ret) goto rollback; + __netdev_update_upper_level(dev, NULL); + netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL); + + __netdev_update_lower_level(upper_dev, NULL); + netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level, NULL); + return 0; rollback: @@ -7213,6 +7387,12 @@ void netdev_upper_dev_unlink(struct net_device *dev, call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, &changeupper_info.info); + + __netdev_update_upper_level(dev, NULL); + netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL); + + __netdev_update_lower_level(upper_dev, NULL); + netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level, NULL); } EXPORT_SYMBOL(netdev_upper_dev_unlink); @@ -9212,6 +9392,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev->gso_max_size = GSO_MAX_SIZE; dev->gso_max_segs = GSO_MAX_SEGS; + dev->upper_level = 1; + dev->lower_level = 1; INIT_LIST_HEAD(&dev->napi_list); INIT_LIST_HEAD(&dev->unreg_list); -- cgit v1.2.3 From ab92d68fc22f9afab480153bd82a20f6e2533769 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 21 Oct 2019 18:47:51 +0000 Subject: net: core: add generic lockdep keys Some interface types could be nested. (VLAN, BONDING, TEAM, MACSEC, MACVLAN, IPVLAN, VIRT_WIFI, VXLAN, etc..) These interface types should set lockdep class because, without lockdep class key, lockdep always warn about unexisting circular locking. In the current code, these interfaces have their own lockdep class keys and these manage itself. So that there are so many duplicate code around the /driver/net and /net/. This patch adds new generic lockdep keys and some helper functions for it. This patch does below changes. a) Add lockdep class keys in struct net_device - qdisc_running, xmit, addr_list, qdisc_busylock - these keys are used as dynamic lockdep key. b) When net_device is being allocated, lockdep keys are registered. - alloc_netdev_mqs() c) When net_device is being free'd llockdep keys are unregistered. - free_netdev() d) Add generic lockdep key helper function - netdev_register_lockdep_key() - netdev_unregister_lockdep_key() - netdev_update_lockdep_key() e) Remove unnecessary generic lockdep macro and functions f) Remove unnecessary lockdep code of each interfaces. After this patch, each interface modules don't need to maintain their lockdep keys. Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- net/core/dev.c | 127 +++++++++++++++++---------------------------------- net/core/rtnetlink.c | 1 + 2 files changed, 44 insertions(+), 84 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index ab0edfc4a422..5722a81b6edd 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -277,88 +277,6 @@ static RAW_NOTIFIER_HEAD(netdev_chain); DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data); EXPORT_PER_CPU_SYMBOL(softnet_data); -#ifdef CONFIG_LOCKDEP -/* - * register_netdevice() inits txq->_xmit_lock and sets lockdep class - * according to dev->type - */ -static const unsigned short netdev_lock_type[] = { - ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25, - ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET, - ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM, - ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP, - ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD, - ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25, - ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP, - ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD, - ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI, - ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE, - ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET, - ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL, - ARPHRD_FCFABRIC, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM, - ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE, - ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE}; - -static const char *const netdev_lock_name[] = { - "_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25", - "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET", - "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM", - "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP", - "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD", - "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25", - "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP", - "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD", - "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI", - "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE", - "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET", - "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL", - "_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM", - "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE", - "_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"}; - -static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)]; -static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)]; - -static inline unsigned short netdev_lock_pos(unsigned short dev_type) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++) - if (netdev_lock_type[i] == dev_type) - return i; - /* the last key is used by default */ - return ARRAY_SIZE(netdev_lock_type) - 1; -} - -static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock, - unsigned short dev_type) -{ - int i; - - i = netdev_lock_pos(dev_type); - lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i], - netdev_lock_name[i]); -} - -static inline void netdev_set_addr_lockdep_class(struct net_device *dev) -{ - int i; - - i = netdev_lock_pos(dev->type); - lockdep_set_class_and_name(&dev->addr_list_lock, - &netdev_addr_lock_key[i], - netdev_lock_name[i]); -} -#else -static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock, - unsigned short dev_type) -{ -} -static inline void netdev_set_addr_lockdep_class(struct net_device *dev) -{ -} -#endif - /******************************************************************************* * * Protocol management and registration routines @@ -8799,7 +8717,7 @@ static void netdev_init_one_queue(struct net_device *dev, { /* Initialize queue lock */ spin_lock_init(&queue->_xmit_lock); - netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type); + lockdep_set_class(&queue->_xmit_lock, &dev->qdisc_xmit_lock_key); queue->xmit_lock_owner = -1; netdev_queue_numa_node_write(queue, NUMA_NO_NODE); queue->dev = dev; @@ -8846,6 +8764,43 @@ void netif_tx_stop_all_queues(struct net_device *dev) } EXPORT_SYMBOL(netif_tx_stop_all_queues); +static void netdev_register_lockdep_key(struct net_device *dev) +{ + lockdep_register_key(&dev->qdisc_tx_busylock_key); + lockdep_register_key(&dev->qdisc_running_key); + lockdep_register_key(&dev->qdisc_xmit_lock_key); + lockdep_register_key(&dev->addr_list_lock_key); +} + +static void netdev_unregister_lockdep_key(struct net_device *dev) +{ + lockdep_unregister_key(&dev->qdisc_tx_busylock_key); + lockdep_unregister_key(&dev->qdisc_running_key); + lockdep_unregister_key(&dev->qdisc_xmit_lock_key); + lockdep_unregister_key(&dev->addr_list_lock_key); +} + +void netdev_update_lockdep_key(struct net_device *dev) +{ + struct netdev_queue *queue; + int i; + + lockdep_unregister_key(&dev->qdisc_xmit_lock_key); + lockdep_unregister_key(&dev->addr_list_lock_key); + + lockdep_register_key(&dev->qdisc_xmit_lock_key); + lockdep_register_key(&dev->addr_list_lock_key); + + lockdep_set_class(&dev->addr_list_lock, &dev->addr_list_lock_key); + for (i = 0; i < dev->num_tx_queues; i++) { + queue = netdev_get_tx_queue(dev, i); + + lockdep_set_class(&queue->_xmit_lock, + &dev->qdisc_xmit_lock_key); + } +} +EXPORT_SYMBOL(netdev_update_lockdep_key); + /** * register_netdevice - register a network device * @dev: device to register @@ -8880,7 +8835,7 @@ int register_netdevice(struct net_device *dev) BUG_ON(!net); spin_lock_init(&dev->addr_list_lock); - netdev_set_addr_lockdep_class(dev); + lockdep_set_class(&dev->addr_list_lock, &dev->addr_list_lock_key); ret = dev_get_valid_name(net, dev, dev->name); if (ret < 0) @@ -9390,6 +9345,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev_net_set(dev, &init_net); + netdev_register_lockdep_key(dev); + dev->gso_max_size = GSO_MAX_SIZE; dev->gso_max_segs = GSO_MAX_SEGS; dev->upper_level = 1; @@ -9474,6 +9431,8 @@ void free_netdev(struct net_device *dev) free_percpu(dev->pcpu_refcnt); dev->pcpu_refcnt = NULL; + netdev_unregister_lockdep_key(dev); + /* Compatibility with error handling in drivers */ if (dev->reg_state == NETREG_UNINITIALIZED) { netdev_freemem(dev); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 1ee6460f8275..13493aae4e6c 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2355,6 +2355,7 @@ static int do_set_master(struct net_device *dev, int ifindex, err = ops->ndo_del_slave(upper_dev, dev); if (err) return err; + netdev_update_lockdep_key(dev); } else { return -EOPNOTSUPP; } -- cgit v1.2.3 From 32b6d34fedc2229cdf6a047fdbc0704085441915 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 21 Oct 2019 18:47:56 +0000 Subject: net: core: add ignore flag to netdev_adjacent structure In order to link an adjacent node, netdev_upper_dev_link() is used and in order to unlink an adjacent node, netdev_upper_dev_unlink() is used. unlink operation does not fail, but link operation can fail. In order to exchange adjacent nodes, we should unlink an old adjacent node first. then, link a new adjacent node. If link operation is failed, we should link an old adjacent node again. But this link operation can fail too. It eventually breaks the adjacent link relationship. This patch adds an ignore flag into the netdev_adjacent structure. If this flag is set, netdev_upper_dev_link() ignores an old adjacent node for a moment. This patch also adds new functions for other modules. netdev_adjacent_change_prepare() netdev_adjacent_change_commit() netdev_adjacent_change_abort() netdev_adjacent_change_prepare() inserts new device into adjacent list but new device is not allowed to use immediately. If netdev_adjacent_change_prepare() fails, it internally rollbacks adjacent list so that we don't need any other action. netdev_adjacent_change_commit() deletes old device in the adjacent list and allows new device to use. netdev_adjacent_change_abort() rollbacks adjacent list. Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- net/core/dev.c | 230 +++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 209 insertions(+), 21 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 5722a81b6edd..092c094038b6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6408,6 +6408,9 @@ struct netdev_adjacent { /* upper master flag, there can only be one master device per list */ bool master; + /* lookup ignore flag */ + bool ignore; + /* counter for the number of times this device was added to us */ u16 ref_nr; @@ -6430,7 +6433,7 @@ static struct netdev_adjacent *__netdev_find_adj(struct net_device *adj_dev, return NULL; } -static int __netdev_has_upper_dev(struct net_device *upper_dev, void *data) +static int ____netdev_has_upper_dev(struct net_device *upper_dev, void *data) { struct net_device *dev = data; @@ -6451,7 +6454,7 @@ bool netdev_has_upper_dev(struct net_device *dev, { ASSERT_RTNL(); - return netdev_walk_all_upper_dev_rcu(dev, __netdev_has_upper_dev, + return netdev_walk_all_upper_dev_rcu(dev, ____netdev_has_upper_dev, upper_dev); } EXPORT_SYMBOL(netdev_has_upper_dev); @@ -6469,7 +6472,7 @@ EXPORT_SYMBOL(netdev_has_upper_dev); bool netdev_has_upper_dev_all_rcu(struct net_device *dev, struct net_device *upper_dev) { - return !!netdev_walk_all_upper_dev_rcu(dev, __netdev_has_upper_dev, + return !!netdev_walk_all_upper_dev_rcu(dev, ____netdev_has_upper_dev, upper_dev); } EXPORT_SYMBOL(netdev_has_upper_dev_all_rcu); @@ -6513,6 +6516,22 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev) } EXPORT_SYMBOL(netdev_master_upper_dev_get); +static struct net_device *__netdev_master_upper_dev_get(struct net_device *dev) +{ + struct netdev_adjacent *upper; + + ASSERT_RTNL(); + + if (list_empty(&dev->adj_list.upper)) + return NULL; + + upper = list_first_entry(&dev->adj_list.upper, + struct netdev_adjacent, list); + if (likely(upper->master) && !upper->ignore) + return upper->dev; + return NULL; +} + /** * netdev_has_any_lower_dev - Check if device is linked to some device * @dev: device @@ -6563,8 +6582,9 @@ struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev, } EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu); -static struct net_device *netdev_next_upper_dev(struct net_device *dev, - struct list_head **iter) +static struct net_device *__netdev_next_upper_dev(struct net_device *dev, + struct list_head **iter, + bool *ignore) { struct netdev_adjacent *upper; @@ -6574,6 +6594,7 @@ static struct net_device *netdev_next_upper_dev(struct net_device *dev, return NULL; *iter = &upper->list; + *ignore = upper->ignore; return upper->dev; } @@ -6595,14 +6616,15 @@ static struct net_device *netdev_next_upper_dev_rcu(struct net_device *dev, return upper->dev; } -static int netdev_walk_all_upper_dev(struct net_device *dev, - int (*fn)(struct net_device *dev, - void *data), - void *data) +static int __netdev_walk_all_upper_dev(struct net_device *dev, + int (*fn)(struct net_device *dev, + void *data), + void *data) { struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; int ret, cur = 0; + bool ignore; now = dev; iter = &dev->adj_list.upper; @@ -6616,9 +6638,11 @@ static int netdev_walk_all_upper_dev(struct net_device *dev, next = NULL; while (1) { - udev = netdev_next_upper_dev(now, &iter); + udev = __netdev_next_upper_dev(now, &iter, &ignore); if (!udev) break; + if (ignore) + continue; next = udev; niter = &udev->adj_list.upper; @@ -6688,6 +6712,15 @@ int netdev_walk_all_upper_dev_rcu(struct net_device *dev, } EXPORT_SYMBOL_GPL(netdev_walk_all_upper_dev_rcu); +static bool __netdev_has_upper_dev(struct net_device *dev, + struct net_device *upper_dev) +{ + ASSERT_RTNL(); + + return __netdev_walk_all_upper_dev(dev, ____netdev_has_upper_dev, + upper_dev); +} + /** * netdev_lower_get_next_private - Get the next ->private from the * lower neighbour list @@ -6784,6 +6817,23 @@ static struct net_device *netdev_next_lower_dev(struct net_device *dev, return lower->dev; } +static struct net_device *__netdev_next_lower_dev(struct net_device *dev, + struct list_head **iter, + bool *ignore) +{ + struct netdev_adjacent *lower; + + lower = list_entry((*iter)->next, struct netdev_adjacent, list); + + if (&lower->list == &dev->adj_list.lower) + return NULL; + + *iter = &lower->list; + *ignore = lower->ignore; + + return lower->dev; +} + int netdev_walk_all_lower_dev(struct net_device *dev, int (*fn)(struct net_device *dev, void *data), @@ -6831,6 +6881,55 @@ int netdev_walk_all_lower_dev(struct net_device *dev, } EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev); +static int __netdev_walk_all_lower_dev(struct net_device *dev, + int (*fn)(struct net_device *dev, + void *data), + void *data) +{ + struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; + struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; + int ret, cur = 0; + bool ignore; + + now = dev; + iter = &dev->adj_list.lower; + + while (1) { + if (now != dev) { + ret = fn(now, data); + if (ret) + return ret; + } + + next = NULL; + while (1) { + ldev = __netdev_next_lower_dev(now, &iter, &ignore); + if (!ldev) + break; + if (ignore) + continue; + + next = ldev; + niter = &ldev->adj_list.lower; + dev_stack[cur] = now; + iter_stack[cur++] = iter; + break; + } + + if (!next) { + if (!cur) + return 0; + next = dev_stack[--cur]; + niter = iter_stack[cur]; + } + + now = next; + iter = niter; + } + + return 0; +} + static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev, struct list_head **iter) { @@ -6850,11 +6949,14 @@ static u8 __netdev_upper_depth(struct net_device *dev) struct net_device *udev; struct list_head *iter; u8 max_depth = 0; + bool ignore; for (iter = &dev->adj_list.upper, - udev = netdev_next_upper_dev(dev, &iter); + udev = __netdev_next_upper_dev(dev, &iter, &ignore); udev; - udev = netdev_next_upper_dev(dev, &iter)) { + udev = __netdev_next_upper_dev(dev, &iter, &ignore)) { + if (ignore) + continue; if (max_depth < udev->upper_level) max_depth = udev->upper_level; } @@ -6867,11 +6969,14 @@ static u8 __netdev_lower_depth(struct net_device *dev) struct net_device *ldev; struct list_head *iter; u8 max_depth = 0; + bool ignore; for (iter = &dev->adj_list.lower, - ldev = netdev_next_lower_dev(dev, &iter); + ldev = __netdev_next_lower_dev(dev, &iter, &ignore); ldev; - ldev = netdev_next_lower_dev(dev, &iter)) { + ldev = __netdev_next_lower_dev(dev, &iter, &ignore)) { + if (ignore) + continue; if (max_depth < ldev->lower_level) max_depth = ldev->lower_level; } @@ -7035,6 +7140,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, adj->master = master; adj->ref_nr = 1; adj->private = private; + adj->ignore = false; dev_hold(adj_dev); pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d; dev_hold on %s\n", @@ -7185,17 +7291,17 @@ static int __netdev_upper_dev_link(struct net_device *dev, return -EBUSY; /* To prevent loops, check if dev is not upper device to upper_dev. */ - if (netdev_has_upper_dev(upper_dev, dev)) + if (__netdev_has_upper_dev(upper_dev, dev)) return -EBUSY; if ((dev->lower_level + upper_dev->upper_level) > MAX_NEST_DEV) return -EMLINK; if (!master) { - if (netdev_has_upper_dev(dev, upper_dev)) + if (__netdev_has_upper_dev(dev, upper_dev)) return -EEXIST; } else { - master_dev = netdev_master_upper_dev_get(dev); + master_dev = __netdev_master_upper_dev_get(dev); if (master_dev) return master_dev == upper_dev ? -EEXIST : -EBUSY; } @@ -7218,10 +7324,11 @@ static int __netdev_upper_dev_link(struct net_device *dev, goto rollback; __netdev_update_upper_level(dev, NULL); - netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL); + __netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL); __netdev_update_lower_level(upper_dev, NULL); - netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level, NULL); + __netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level, + NULL); return 0; @@ -7307,13 +7414,94 @@ void netdev_upper_dev_unlink(struct net_device *dev, &changeupper_info.info); __netdev_update_upper_level(dev, NULL); - netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL); + __netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL); __netdev_update_lower_level(upper_dev, NULL); - netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level, NULL); + __netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level, + NULL); } EXPORT_SYMBOL(netdev_upper_dev_unlink); +static void __netdev_adjacent_dev_set(struct net_device *upper_dev, + struct net_device *lower_dev, + bool val) +{ + struct netdev_adjacent *adj; + + adj = __netdev_find_adj(lower_dev, &upper_dev->adj_list.lower); + if (adj) + adj->ignore = val; + + adj = __netdev_find_adj(upper_dev, &lower_dev->adj_list.upper); + if (adj) + adj->ignore = val; +} + +static void netdev_adjacent_dev_disable(struct net_device *upper_dev, + struct net_device *lower_dev) +{ + __netdev_adjacent_dev_set(upper_dev, lower_dev, true); +} + +static void netdev_adjacent_dev_enable(struct net_device *upper_dev, + struct net_device *lower_dev) +{ + __netdev_adjacent_dev_set(upper_dev, lower_dev, false); +} + +int netdev_adjacent_change_prepare(struct net_device *old_dev, + struct net_device *new_dev, + struct net_device *dev, + struct netlink_ext_ack *extack) +{ + int err; + + if (!new_dev) + return 0; + + if (old_dev && new_dev != old_dev) + netdev_adjacent_dev_disable(dev, old_dev); + + err = netdev_upper_dev_link(new_dev, dev, extack); + if (err) { + if (old_dev && new_dev != old_dev) + netdev_adjacent_dev_enable(dev, old_dev); + return err; + } + + return 0; +} +EXPORT_SYMBOL(netdev_adjacent_change_prepare); + +void netdev_adjacent_change_commit(struct net_device *old_dev, + struct net_device *new_dev, + struct net_device *dev) +{ + if (!new_dev || !old_dev) + return; + + if (new_dev == old_dev) + return; + + netdev_adjacent_dev_enable(dev, old_dev); + netdev_upper_dev_unlink(old_dev, dev); +} +EXPORT_SYMBOL(netdev_adjacent_change_commit); + +void netdev_adjacent_change_abort(struct net_device *old_dev, + struct net_device *new_dev, + struct net_device *dev) +{ + if (!new_dev) + return; + + if (old_dev && new_dev != old_dev) + netdev_adjacent_dev_enable(dev, old_dev); + + netdev_upper_dev_unlink(new_dev, dev); +} +EXPORT_SYMBOL(netdev_adjacent_change_abort); + /** * netdev_bonding_info_change - Dispatch event about slave change * @dev: device -- cgit v1.2.3 From f3b0a18bb6cb07a9abb75e21b1f08eeaefa78e81 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 21 Oct 2019 18:47:58 +0000 Subject: net: remove unnecessary variables and callback This patch removes variables and callback these are related to the nested device structure. devices that can be nested have their own nest_level variable that represents the depth of nested devices. In the previous patch, new {lower/upper}_level variables are added and they replace old private nest_level variable. So, this patch removes all 'nest_level' variables. In order to avoid lockdep warning, ->ndo_get_lock_subclass() was added to get lockdep subclass value, which is actually lower nested depth value. But now, they use the dynamic lockdep key to avoid lockdep warning instead of the subclass. So, this patch removes ->ndo_get_lock_subclass() callback. Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- net/core/dev.c | 19 ------------------- net/core/dev_addr_lists.c | 12 ++++++------ 2 files changed, 6 insertions(+), 25 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 092c094038b6..1482e2ef2d25 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7615,25 +7615,6 @@ void *netdev_lower_dev_get_private(struct net_device *dev, EXPORT_SYMBOL(netdev_lower_dev_get_private); -int dev_get_nest_level(struct net_device *dev) -{ - struct net_device *lower = NULL; - struct list_head *iter; - int max_nest = -1; - int nest; - - ASSERT_RTNL(); - - netdev_for_each_lower_dev(dev, lower, iter) { - nest = dev_get_nest_level(lower); - if (max_nest < nest) - max_nest = nest; - } - - return max_nest + 1; -} -EXPORT_SYMBOL(dev_get_nest_level); - /** * netdev_lower_change - Dispatch event about lower device state change * @lower_dev: device diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index 6393ba930097..2f949b5a1eb9 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -637,7 +637,7 @@ int dev_uc_sync(struct net_device *to, struct net_device *from) if (to->addr_len != from->addr_len) return -EINVAL; - netif_addr_lock_nested(to); + netif_addr_lock(to); err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len); if (!err) __dev_set_rx_mode(to); @@ -667,7 +667,7 @@ int dev_uc_sync_multiple(struct net_device *to, struct net_device *from) if (to->addr_len != from->addr_len) return -EINVAL; - netif_addr_lock_nested(to); + netif_addr_lock(to); err = __hw_addr_sync_multiple(&to->uc, &from->uc, to->addr_len); if (!err) __dev_set_rx_mode(to); @@ -691,7 +691,7 @@ void dev_uc_unsync(struct net_device *to, struct net_device *from) return; netif_addr_lock_bh(from); - netif_addr_lock_nested(to); + netif_addr_lock(to); __hw_addr_unsync(&to->uc, &from->uc, to->addr_len); __dev_set_rx_mode(to); netif_addr_unlock(to); @@ -858,7 +858,7 @@ int dev_mc_sync(struct net_device *to, struct net_device *from) if (to->addr_len != from->addr_len) return -EINVAL; - netif_addr_lock_nested(to); + netif_addr_lock(to); err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len); if (!err) __dev_set_rx_mode(to); @@ -888,7 +888,7 @@ int dev_mc_sync_multiple(struct net_device *to, struct net_device *from) if (to->addr_len != from->addr_len) return -EINVAL; - netif_addr_lock_nested(to); + netif_addr_lock(to); err = __hw_addr_sync_multiple(&to->mc, &from->mc, to->addr_len); if (!err) __dev_set_rx_mode(to); @@ -912,7 +912,7 @@ void dev_mc_unsync(struct net_device *to, struct net_device *from) return; netif_addr_lock_bh(from); - netif_addr_lock_nested(to); + netif_addr_lock(to); __hw_addr_unsync(&to->mc, &from->mc, to->addr_len); __dev_set_rx_mode(to); netif_addr_unlock(to); -- cgit v1.2.3 From d4e4fdf9e4a27c87edb79b1478955075be141f67 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 23 Oct 2019 18:39:04 +0200 Subject: netns: fix GFP flags in rtnl_net_notifyid() In rtnl_net_notifyid(), we certainly can't pass a null GFP flag to rtnl_notify(). A GFP_KERNEL flag would be fine in most circumstances, but there are a few paths calling rtnl_net_notifyid() from atomic context or from RCU critical sections. The later also precludes the use of gfp_any() as it wouldn't detect the RCU case. Also, the nlmsg_new() call is wrong too, as it uses GFP_KERNEL unconditionally. Therefore, we need to pass the GFP flags as parameter and propagate it through function calls until the proper flags can be determined. In most cases, GFP_KERNEL is fine. The exceptions are: * openvswitch: ovs_vport_cmd_get() and ovs_vport_cmd_dump() indirectly call rtnl_net_notifyid() from RCU critical section, * rtnetlink: rtmsg_ifinfo_build_skb() already receives GFP flags as parameter. Also, in ovs_vport_cmd_build_info(), let's change the GFP flags used by nlmsg_new(). The function is allowed to sleep, so better make the flags consistent with the ones used in the following ovs_vport_cmd_fill_info() call. Found by code inspection. Fixes: 9a9634545c70 ("netns: notify netns id events") Signed-off-by: Guillaume Nault Acked-by: Nicolas Dichtel Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- net/core/net_namespace.c | 17 +++++++++-------- net/core/rtnetlink.c | 14 +++++++------- 3 files changed, 17 insertions(+), 16 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 1482e2ef2d25..96afd464284a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9770,7 +9770,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char call_netdevice_notifiers(NETDEV_UNREGISTER, dev); rcu_barrier(); - new_nsid = peernet2id_alloc(dev_net(dev), net); + new_nsid = peernet2id_alloc(dev_net(dev), net, GFP_KERNEL); /* If there is an ifindex conflict assign a new one */ if (__dev_get_by_index(net, dev->ifindex)) new_ifindex = dev_new_index(net); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 5a4ae0845bac..39402840025e 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -246,11 +246,11 @@ static int __peernet2id(struct net *net, struct net *peer) } static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid, - struct nlmsghdr *nlh); + struct nlmsghdr *nlh, gfp_t gfp); /* This function returns the id of a peer netns. If no id is assigned, one will * be allocated and returned. */ -int peernet2id_alloc(struct net *net, struct net *peer) +int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp) { bool alloc = false, alive = false; int id; @@ -269,7 +269,7 @@ int peernet2id_alloc(struct net *net, struct net *peer) id = __peernet2id_alloc(net, peer, &alloc); spin_unlock_bh(&net->nsid_lock); if (alloc && id >= 0) - rtnl_net_notifyid(net, RTM_NEWNSID, id, 0, NULL); + rtnl_net_notifyid(net, RTM_NEWNSID, id, 0, NULL, gfp); if (alive) put_net(peer); return id; @@ -534,7 +534,8 @@ static void unhash_nsid(struct net *net, struct net *last) idr_remove(&tmp->netns_ids, id); spin_unlock_bh(&tmp->nsid_lock); if (id >= 0) - rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0, NULL); + rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0, NULL, + GFP_KERNEL); if (tmp == last) break; } @@ -767,7 +768,7 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh, spin_unlock_bh(&net->nsid_lock); if (err >= 0) { rtnl_net_notifyid(net, RTM_NEWNSID, err, NETLINK_CB(skb).portid, - nlh); + nlh, GFP_KERNEL); err = 0; } else if (err == -ENOSPC && nsid >= 0) { err = -EEXIST; @@ -1055,7 +1056,7 @@ end: } static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid, - struct nlmsghdr *nlh) + struct nlmsghdr *nlh, gfp_t gfp) { struct net_fill_args fillargs = { .portid = portid, @@ -1066,7 +1067,7 @@ static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid, struct sk_buff *msg; int err = -ENOMEM; - msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL); + msg = nlmsg_new(rtnl_net_get_size(), gfp); if (!msg) goto out; @@ -1074,7 +1075,7 @@ static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid, if (err < 0) goto err_out; - rtnl_notify(msg, net, portid, RTNLGRP_NSID, nlh, 0); + rtnl_notify(msg, net, portid, RTNLGRP_NSID, nlh, gfp); return; err_out: diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 13493aae4e6c..ba4b4048ec3e 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1523,7 +1523,7 @@ static noinline_for_stack int nla_put_ifalias(struct sk_buff *skb, static int rtnl_fill_link_netnsid(struct sk_buff *skb, const struct net_device *dev, - struct net *src_net) + struct net *src_net, gfp_t gfp) { bool put_iflink = false; @@ -1531,7 +1531,7 @@ static int rtnl_fill_link_netnsid(struct sk_buff *skb, struct net *link_net = dev->rtnl_link_ops->get_link_net(dev); if (!net_eq(dev_net(dev), link_net)) { - int id = peernet2id_alloc(src_net, link_net); + int id = peernet2id_alloc(src_net, link_net, gfp); if (nla_put_s32(skb, IFLA_LINK_NETNSID, id)) return -EMSGSIZE; @@ -1589,7 +1589,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, int type, u32 pid, u32 seq, u32 change, unsigned int flags, u32 ext_filter_mask, u32 event, int *new_nsid, int new_ifindex, - int tgt_netnsid) + int tgt_netnsid, gfp_t gfp) { struct ifinfomsg *ifm; struct nlmsghdr *nlh; @@ -1681,7 +1681,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, goto nla_put_failure; } - if (rtnl_fill_link_netnsid(skb, dev, src_net)) + if (rtnl_fill_link_netnsid(skb, dev, src_net, gfp)) goto nla_put_failure; if (new_nsid && @@ -2001,7 +2001,7 @@ walk_entries: NETLINK_CB(cb->skb).portid, nlh->nlmsg_seq, 0, flags, ext_filter_mask, 0, NULL, 0, - netnsid); + netnsid, GFP_KERNEL); if (err < 0) { if (likely(skb->len)) @@ -3360,7 +3360,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh, err = rtnl_fill_ifinfo(nskb, dev, net, RTM_NEWLINK, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, 0, ext_filter_mask, - 0, NULL, 0, netnsid); + 0, NULL, 0, netnsid, GFP_KERNEL); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size */ WARN_ON(err == -EMSGSIZE); @@ -3472,7 +3472,7 @@ struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, err = rtnl_fill_ifinfo(skb, dev, dev_net(dev), type, 0, 0, change, 0, 0, event, - new_nsid, new_ifindex, -1); + new_nsid, new_ifindex, -1, flags); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); -- cgit v1.2.3 From 5ff223e86f5addbfae26419cbb5d61d98f6fbf7d Mon Sep 17 00:00:00 2001 From: zhanglin Date: Sat, 26 Oct 2019 15:54:16 +0800 Subject: net: Zeroing the structure ethtool_wolinfo in ethtool_get_wol() memset() the structure ethtool_wolinfo that has padded bytes but the padded bytes have not been zeroed out. Signed-off-by: zhanglin Signed-off-by: David S. Miller --- net/core/ethtool.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index c763106c73fc..cd9bc67381b2 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1396,11 +1396,13 @@ static int ethtool_reset(struct net_device *dev, char __user *useraddr) static int ethtool_get_wol(struct net_device *dev, char __user *useraddr) { - struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL }; + struct ethtool_wolinfo wol; if (!dev->ethtool_ops->get_wol) return -EOPNOTSUPP; + memset(&wol, 0, sizeof(struct ethtool_wolinfo)); + wol.cmd = ETHTOOL_GWOL; dev->ethtool_ops->get_wol(dev, &wol); if (copy_to_user(useraddr, &wol, sizeof(wol))) -- cgit v1.2.3 From 3ef7cf57c72f32f61e97f8fa401bc39ea1f1a5d4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 23 Oct 2019 22:44:50 -0700 Subject: net: use skb_queue_empty_lockless() in poll() handlers Many poll() handlers are lockless. Using skb_queue_empty_lockless() instead of skb_queue_empty() is more appropriate. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/datagram.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/core') diff --git a/net/core/datagram.c b/net/core/datagram.c index c210fc116103..5b685e110aff 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -767,7 +767,7 @@ __poll_t datagram_poll(struct file *file, struct socket *sock, mask = 0; /* exceptional events? */ - if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) + if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= EPOLLERR | (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); @@ -777,7 +777,7 @@ __poll_t datagram_poll(struct file *file, struct socket *sock, mask |= EPOLLHUP; /* readable? */ - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= EPOLLIN | EPOLLRDNORM; /* Connection-based need to check for termination and startup */ -- cgit v1.2.3 From 3f926af3f4d688e2e11e7f8ed04e277a14d4d4a4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 23 Oct 2019 22:44:51 -0700 Subject: net: use skb_queue_empty_lockless() in busy poll contexts Busy polling usually runs without locks. Let's use skb_queue_empty_lockless() instead of skb_queue_empty() Also uses READ_ONCE() in __skb_try_recv_datagram() to address a similar potential problem. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/datagram.c | 2 +- net/core/sock.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net/core') diff --git a/net/core/datagram.c b/net/core/datagram.c index 5b685e110aff..03515e46a49a 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -278,7 +278,7 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags, break; sk_busy_loop(sk, flags & MSG_DONTWAIT); - } while (sk->sk_receive_queue.prev != *last); + } while (READ_ONCE(sk->sk_receive_queue.prev) != *last); error = -EAGAIN; diff --git a/net/core/sock.c b/net/core/sock.c index a515392ba84b..b8e758bcb6ad 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3600,7 +3600,7 @@ bool sk_busy_loop_end(void *p, unsigned long start_time) { struct sock *sk = p; - return !skb_queue_empty(&sk->sk_receive_queue) || + return !skb_queue_empty_lockless(&sk->sk_receive_queue) || sk_busy_loop_timeout(sk, start_time); } EXPORT_SYMBOL(sk_busy_loop_end); -- cgit v1.2.3 From 7c422d0ce97552dde4a97e6290de70ec6efb0fc6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 23 Oct 2019 22:44:52 -0700 Subject: net: add READ_ONCE() annotation in __skb_wait_for_more_packets() __skb_wait_for_more_packets() can be called while other cpus can feed packets to the socket receive queue. KCSAN reported : BUG: KCSAN: data-race in __skb_wait_for_more_packets / __udp_enqueue_schedule_skb write to 0xffff888102e40b58 of 8 bytes by interrupt on cpu 0: __skb_insert include/linux/skbuff.h:1852 [inline] __skb_queue_before include/linux/skbuff.h:1958 [inline] __skb_queue_tail include/linux/skbuff.h:1991 [inline] __udp_enqueue_schedule_skb+0x2d7/0x410 net/ipv4/udp.c:1470 __udp_queue_rcv_skb net/ipv4/udp.c:1940 [inline] udp_queue_rcv_one_skb+0x7bd/0xc70 net/ipv4/udp.c:2057 udp_queue_rcv_skb+0xb5/0x400 net/ipv4/udp.c:2074 udp_unicast_rcv_skb.isra.0+0x7e/0x1c0 net/ipv4/udp.c:2233 __udp4_lib_rcv+0xa44/0x17c0 net/ipv4/udp.c:2300 udp_rcv+0x2b/0x40 net/ipv4/udp.c:2470 ip_protocol_deliver_rcu+0x4d/0x420 net/ipv4/ip_input.c:204 ip_local_deliver_finish+0x110/0x140 net/ipv4/ip_input.c:231 NF_HOOK include/linux/netfilter.h:305 [inline] NF_HOOK include/linux/netfilter.h:299 [inline] ip_local_deliver+0x133/0x210 net/ipv4/ip_input.c:252 dst_input include/net/dst.h:442 [inline] ip_rcv_finish+0x121/0x160 net/ipv4/ip_input.c:413 NF_HOOK include/linux/netfilter.h:305 [inline] NF_HOOK include/linux/netfilter.h:299 [inline] ip_rcv+0x18f/0x1a0 net/ipv4/ip_input.c:523 __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5010 __netif_receive_skb+0x37/0xf0 net/core/dev.c:5124 process_backlog+0x1d3/0x420 net/core/dev.c:5955 read to 0xffff888102e40b58 of 8 bytes by task 13035 on cpu 1: __skb_wait_for_more_packets+0xfa/0x320 net/core/datagram.c:100 __skb_recv_udp+0x374/0x500 net/ipv4/udp.c:1683 udp_recvmsg+0xe1/0xb10 net/ipv4/udp.c:1712 inet_recvmsg+0xbb/0x250 net/ipv4/af_inet.c:838 sock_recvmsg_nosec+0x5c/0x70 net/socket.c:871 ___sys_recvmsg+0x1a0/0x3e0 net/socket.c:2480 do_recvmmsg+0x19a/0x5c0 net/socket.c:2601 __sys_recvmmsg+0x1ef/0x200 net/socket.c:2680 __do_sys_recvmmsg net/socket.c:2703 [inline] __se_sys_recvmmsg net/socket.c:2696 [inline] __x64_sys_recvmmsg+0x89/0xb0 net/socket.c:2696 do_syscall_64+0xcc/0x370 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 13035 Comm: syz-executor.3 Not tainted 5.4.0-rc3+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- net/core/datagram.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/datagram.c b/net/core/datagram.c index 03515e46a49a..da3c24ed129c 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -97,7 +97,7 @@ int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p, if (error) goto out_err; - if (sk->sk_receive_queue.prev != skb) + if (READ_ONCE(sk->sk_receive_queue.prev) != skb) goto out; /* Socket shut down? */ -- cgit v1.2.3 From 8b73018fe44521c1cf59d7bac53624c87d3f10e2 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 29 Oct 2019 13:59:32 +0200 Subject: net: rtnetlink: fix a typo fbd -> fdb A simple typo fix in the nl error message (fbd -> fdb). CC: David Ahern Fixes: 8c6e137fbc7f ("rtnetlink: Update rtnl_fdb_dump for strict data checking") Signed-off-by: Nikolay Aleksandrov Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index ba4b4048ec3e..c81cd80114d9 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3917,7 +3917,7 @@ static int valid_fdb_dump_strict(const struct nlmsghdr *nlh, ndm = nlmsg_data(nlh); if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state || ndm->ndm_flags || ndm->ndm_type) { - NL_SET_ERR_MSG(extack, "Invalid values in header for fbd dump request"); + NL_SET_ERR_MSG(extack, "Invalid values in header for fdb dump request"); return -EINVAL; } -- cgit v1.2.3 From 7170a977743b72cf3eb46ef6ef89885dc7ad3621 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 30 Oct 2019 13:00:04 -0700 Subject: net: annotate accesses to sk->sk_incoming_cpu This socket field can be read and written by concurrent cpus. Use READ_ONCE() and WRITE_ONCE() annotations to document this, and avoid some compiler 'optimizations'. KCSAN reported : BUG: KCSAN: data-race in tcp_v4_rcv / tcp_v4_rcv write to 0xffff88812220763c of 4 bytes by interrupt on cpu 0: sk_incoming_cpu_update include/net/sock.h:953 [inline] tcp_v4_rcv+0x1b3c/0x1bb0 net/ipv4/tcp_ipv4.c:1934 ip_protocol_deliver_rcu+0x4d/0x420 net/ipv4/ip_input.c:204 ip_local_deliver_finish+0x110/0x140 net/ipv4/ip_input.c:231 NF_HOOK include/linux/netfilter.h:305 [inline] NF_HOOK include/linux/netfilter.h:299 [inline] ip_local_deliver+0x133/0x210 net/ipv4/ip_input.c:252 dst_input include/net/dst.h:442 [inline] ip_rcv_finish+0x121/0x160 net/ipv4/ip_input.c:413 NF_HOOK include/linux/netfilter.h:305 [inline] NF_HOOK include/linux/netfilter.h:299 [inline] ip_rcv+0x18f/0x1a0 net/ipv4/ip_input.c:523 __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5010 __netif_receive_skb+0x37/0xf0 net/core/dev.c:5124 process_backlog+0x1d3/0x420 net/core/dev.c:5955 napi_poll net/core/dev.c:6392 [inline] net_rx_action+0x3ae/0xa90 net/core/dev.c:6460 __do_softirq+0x115/0x33f kernel/softirq.c:292 do_softirq_own_stack+0x2a/0x40 arch/x86/entry/entry_64.S:1082 do_softirq.part.0+0x6b/0x80 kernel/softirq.c:337 do_softirq kernel/softirq.c:329 [inline] __local_bh_enable_ip+0x76/0x80 kernel/softirq.c:189 read to 0xffff88812220763c of 4 bytes by interrupt on cpu 1: sk_incoming_cpu_update include/net/sock.h:952 [inline] tcp_v4_rcv+0x181a/0x1bb0 net/ipv4/tcp_ipv4.c:1934 ip_protocol_deliver_rcu+0x4d/0x420 net/ipv4/ip_input.c:204 ip_local_deliver_finish+0x110/0x140 net/ipv4/ip_input.c:231 NF_HOOK include/linux/netfilter.h:305 [inline] NF_HOOK include/linux/netfilter.h:299 [inline] ip_local_deliver+0x133/0x210 net/ipv4/ip_input.c:252 dst_input include/net/dst.h:442 [inline] ip_rcv_finish+0x121/0x160 net/ipv4/ip_input.c:413 NF_HOOK include/linux/netfilter.h:305 [inline] NF_HOOK include/linux/netfilter.h:299 [inline] ip_rcv+0x18f/0x1a0 net/ipv4/ip_input.c:523 __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5010 __netif_receive_skb+0x37/0xf0 net/core/dev.c:5124 process_backlog+0x1d3/0x420 net/core/dev.c:5955 napi_poll net/core/dev.c:6392 [inline] net_rx_action+0x3ae/0xa90 net/core/dev.c:6460 __do_softirq+0x115/0x33f kernel/softirq.c:292 run_ksoftirqd+0x46/0x60 kernel/softirq.c:603 smpboot_thread_fn+0x37d/0x4a0 kernel/smpboot.c:165 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 16 Comm: ksoftirqd/1 Not tainted 5.4.0-rc3+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Goo