diff options
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/datagram.c | 301 | ||||
-rw-r--r-- | net/core/dev.c | 182 | ||||
-rw-r--r-- | net/core/dev_addr_lists.c | 4 | ||||
-rw-r--r-- | net/core/dev_ioctl.c | 9 | ||||
-rw-r--r-- | net/core/dst.c | 24 | ||||
-rw-r--r-- | net/core/ethtool.c | 81 | ||||
-rw-r--r-- | net/core/filter.c | 97 | ||||
-rw-r--r-- | net/core/iovec.c | 47 | ||||
-rw-r--r-- | net/core/link_watch.c | 2 | ||||
-rw-r--r-- | net/core/neighbour.c | 279 | ||||
-rw-r--r-- | net/core/net-sysfs.c | 44 | ||||
-rw-r--r-- | net/core/netpoll.c | 4 | ||||
-rw-r--r-- | net/core/pktgen.c | 3 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 188 | ||||
-rw-r--r-- | net/core/scm.c | 3 | ||||
-rw-r--r-- | net/core/skbuff.c | 356 | ||||
-rw-r--r-- | net/core/sock.c | 20 | ||||
-rw-r--r-- | net/core/sysctl_net_core.c | 21 | ||||
-rw-r--r-- | net/core/utils.c | 3 |
19 files changed, 1013 insertions, 655 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c index fdbc9a81d4c2..df493d68330c 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -49,6 +49,7 @@ #include <linux/spinlock.h> #include <linux/slab.h> #include <linux/pagemap.h> +#include <linux/uio.h> #include <net/protocol.h> #include <linux/skbuff.h> @@ -309,16 +310,14 @@ int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags) EXPORT_SYMBOL(skb_kill_datagram); /** - * skb_copy_datagram_iovec - Copy a datagram to an iovec. + * skb_copy_datagram_iter - Copy a datagram to an iovec iterator. * @skb: buffer to copy * @offset: offset in the buffer to start copying from - * @to: io vector to copy to + * @to: iovec iterator to copy to * @len: amount of data to copy from buffer to iovec - * - * Note: the iovec is modified during the copy. */ -int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset, - struct iovec *to, int len) +int skb_copy_datagram_iter(const struct sk_buff *skb, int offset, + struct iov_iter *to, int len) { int start = skb_headlen(skb); int i, copy = start - offset; @@ -330,8 +329,8 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset, if (copy > 0) { if (copy > len) copy = len; - if (memcpy_toiovec(to, skb->data + offset, copy)) - goto fault; + if (copy_to_iter(skb->data + offset, copy, to) != copy) + goto short_copy; if ((len -= copy) == 0) return 0; offset += copy; @@ -346,18 +345,12 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset, end = start + skb_frag_size(frag); if ((copy = end - offset) > 0) { - int err; - u8 *vaddr; - struct page *page = skb_frag_page(frag); - if (copy > len) copy = len; - vaddr = kmap(page); - err = memcpy_toiovec(to, vaddr + frag->page_offset + - offset - start, copy); - kunmap(page); - if (err) - goto fault; + if (copy_page_to_iter(skb_frag_page(frag), + frag->page_offset + offset - + start, copy, to) != copy) + goto short_copy; if (!(len -= copy)) return 0; offset += copy; @@ -374,9 +367,8 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset, if ((copy = end - offset) > 0) { if (copy > len) copy = len; - if (skb_copy_datagram_iovec(frag_iter, - offset - start, - to, copy)) + if (skb_copy_datagram_iter(frag_iter, offset - start, + to, copy)) goto fault; if ((len -= copy) == 0) return 0; @@ -387,113 +379,33 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset, if (!len) return 0; + /* This is not really a user copy fault, but rather someone + * gave us a bogus length on the skb. We should probably + * print a warning here as it may indicate a kernel bug. + */ + fault: return -EFAULT; -} -EXPORT_SYMBOL(skb_copy_datagram_iovec); -/** - * skb_copy_datagram_const_iovec - Copy a datagram to an iovec. - * @skb: buffer to copy - * @offset: offset in the buffer to start copying from - * @to: io vector to copy to - * @to_offset: offset in the io vector to start copying to - * @len: amount of data to copy from buffer to iovec - * - * Returns 0 or -EFAULT. - * Note: the iovec is not modified during the copy. - */ -int skb_copy_datagram_const_iovec(const struct sk_buff *skb, int offset, - const struct iovec *to, int to_offset, - int len) -{ - int start = skb_headlen(skb); - int i, copy = start - offset; - struct sk_buff *frag_iter; +short_copy: + if (iov_iter_count(to)) + goto fault; - /* Copy header. */ - if (copy > 0) { - if (copy > len) - copy = len; - if (memcpy_toiovecend(to, skb->data + offset, to_offset, copy)) - goto fault; - if ((len -= copy) == 0) - return 0; - offset += copy; - to_offset += copy; - } - - /* Copy paged appendix. Hmm... why does this look so complicated? */ - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - int end; - const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - - WARN_ON(start > offset + len); - - end = start + skb_frag_size(frag); - if ((copy = end - offset) > 0) { - int err; - u8 *vaddr; - struct page *page = skb_frag_page(frag); - - if (copy > len) - copy = len; - vaddr = kmap(page); - err = memcpy_toiovecend(to, vaddr + frag->page_offset + - offset - start, to_offset, copy); - kunmap(page); - if (err) - goto fault; - if (!(len -= copy)) - return 0; - offset += copy; - to_offset += copy; - } - start = end; - } - - skb_walk_frags(skb, frag_iter) { - int end; - - WARN_ON(start > offset + len); - - end = start + frag_iter->len; - if ((copy = end - offset) > 0) { - if (copy > len) - copy = len; - if (skb_copy_datagram_const_iovec(frag_iter, - offset - start, - to, to_offset, - copy)) - goto fault; - if ((len -= copy) == 0) - return 0; - offset += copy; - to_offset += copy; - } - start = end; - } - if (!len) - return 0; - -fault: - return -EFAULT; + return 0; } -EXPORT_SYMBOL(skb_copy_datagram_const_iovec); +EXPORT_SYMBOL(skb_copy_datagram_iter); /** - * skb_copy_datagram_from_iovec - Copy a datagram from an iovec. + * skb_copy_datagram_from_iter - Copy a datagram from an iov_iter. * @skb: buffer to copy * @offset: offset in the buffer to start copying to - * @from: io vector to copy to - * @from_offset: offset in the io vector to start copying from + * @from: the copy source * @len: amount of data to copy to buffer from iovec * * Returns 0 or -EFAULT. - * Note: the iovec is not modified during the copy. */ -int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, - const struct iovec *from, int from_offset, +int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset, + struct iov_iter *from, int len) { int start = skb_headlen(skb); @@ -504,13 +416,11 @@ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, if (copy > 0) { if (copy > len) copy = len; - if (memcpy_fromiovecend(skb->data + offset, from, from_offset, - copy)) + if (copy_from_iter(skb->data + offset, copy, from) != copy) goto fault; if ((len -= copy) == 0) return 0; offset += copy; - from_offset += copy; } /* Copy paged appendix. Hmm... why does this look so complicated? */ @@ -522,24 +432,19 @@ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, end = start + skb_frag_size(frag); if ((copy = end - offset) > 0) { - int err; - u8 *vaddr; - struct page *page = skb_frag_page(frag); + size_t copied; if (copy > len) copy = len; - vaddr = kmap(page); - err = memcpy_fromiovecend(vaddr + frag->page_offset + - offset - start, - from, from_offset, copy); - kunmap(page); - if (err) + copied = copy_page_from_iter(skb_frag_page(frag), + frag->page_offset + offset - start, + copy, from); + if (copied != copy) goto fault; if (!(len -= copy)) return 0; offset += copy; - from_offset += copy; } start = end; } @@ -553,16 +458,13 @@ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, if ((copy = end - offset) > 0) { if (copy > len) copy = len; - if (skb_copy_datagram_from_iovec(frag_iter, - offset - start, - from, - from_offset, - copy)) + if (skb_copy_datagram_from_iter(frag_iter, + offset - start, + from, copy)) goto fault; if ((len -= copy) == 0) return 0; offset += copy; - from_offset += copy; } start = end; } @@ -572,101 +474,82 @@ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, fault: return -EFAULT; } -EXPORT_SYMBOL(skb_copy_datagram_from_iovec); +EXPORT_SYMBOL(skb_copy_datagram_from_iter); /** - * zerocopy_sg_from_iovec - Build a zerocopy datagram from an iovec + * zerocopy_sg_from_iter - Build a zerocopy datagram from an iov_iter * @skb: buffer to copy - * @from: io vector to copy from - * @offset: offset in the io vector to start copying from - * @count: amount of vectors to copy to buffer from + * @from: the source to copy from * * The function will first copy up to headlen, and then pin the userspace * pages and build frags through them. * * Returns 0, -EFAULT or -EMSGSIZE. - * Note: the iovec is not modified during the copy */ -int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from, - int offset, size_t count) +int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from) { - int len = iov_length(from, count) - offset; + int len = iov_iter_count(from); int copy = min_t(int, skb_headlen(skb), len); - int size; - int i = 0; + int frag = 0; /* copy up to skb headlen */ - if (skb_copy_datagram_from_iovec(skb, 0, from, offset, copy)) + if (skb_copy_datagram_from_iter(skb, 0, from, copy)) return -EFAULT; - if (len == copy) - return 0; - - offset += copy; - while (count--) { - struct page *page[MAX_SKB_FRAGS]; - int num_pages; - unsigned long base; + while (iov_iter_count(from)) { + struct page *pages[MAX_SKB_FRAGS]; + size_t start; + ssize_t copied; unsigned long truesize; + int n = 0; - /* Skip over from offset and copied */ - if (offset >= from->iov_len) { - offset -= from->iov_len; - ++from; - continue; - } - len = from->iov_len - offset; - base = (unsigned long)from->iov_base + offset; - size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT; - if (i + size > MAX_SKB_FRAGS) + if (frag == MAX_SKB_FRAGS) return -EMSGSIZE; - num_pages = get_user_pages_fast(base, size, 0, &page[i]); - if (num_pages != size) { - release_pages(&page[i], num_pages, 0); + + copied = iov_iter_get_pages(from, pages, ~0U, + MAX_SKB_FRAGS - frag, &start); + if (copied < 0) return -EFAULT; - } - truesize = size * PAGE_SIZE; - skb->data_len += len; - skb->len += len; + + iov_iter_advance(from, copied); + + truesize = PAGE_ALIGN(copied + start); + skb->data_len += copied; + skb->len += copied; skb->truesize += truesize; atomic_add(truesize, &skb->sk->sk_wmem_alloc); - while (len) { - int off = base & ~PAGE_MASK; - int size = min_t(int, len, PAGE_SIZE - off); - skb_fill_page_desc(skb, i, page[i], off, size); - base += size; - len -= size; - i++; + while (copied) { + int size = min_t(int, copied, PAGE_SIZE - start); + skb_fill_page_desc(skb, frag++, pages[n], start, size); + start = 0; + copied -= size; + n++; } - offset = 0; - ++from; } return 0; } -EXPORT_SYMBOL(zerocopy_sg_from_iovec); +EXPORT_SYMBOL(zerocopy_sg_from_iter); static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, - u8 __user *to, int len, + struct iov_iter *to, int len, __wsum *csump) { int start = skb_headlen(skb); int i, copy = start - offset; struct sk_buff *frag_iter; int pos = 0; + int n; /* Copy header. */ if (copy > 0) { - int err = 0; if (copy > len) copy = len; - *csump = csum_and_copy_to_user(skb->data + offset, to, copy, - *csump, &err); - if (err) + n = csum_and_copy_to_iter(skb->data + offset, copy, csump, to); + if (n != copy) goto fault; if ((len -= copy) == 0) return 0; offset += copy; - to += copy; pos = copy; } @@ -678,26 +561,22 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, end = start + skb_frag_size(frag); if ((copy = end - offset) > 0) { - __wsum csum2; - int err = 0; - u8 *vaddr; + __wsum csum2 = 0; struct page *page = skb_frag_page(frag); + u8 *vaddr = kmap(page); if (copy > len) copy = len; - vaddr = kmap(page); - csum2 = csum_and_copy_to_user(vaddr + - frag->page_offset + - offset - start, - to, copy, 0, &err); + n = csum_and_copy_to_iter(vaddr + frag->page_offset + + offset - start, copy, + &csum2, to); kunmap(page); - if (err) + if (n != copy) goto fault; *csump = csum_block_add(*csump, csum2, pos); if (!(len -= copy)) return 0; offset += copy; - to += copy; pos += copy; } start = end; @@ -722,7 +601,6 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, if ((len -= copy) == 0) return 0; offset += copy; - to += copy; pos += copy; } start = end; @@ -775,20 +653,19 @@ __sum16 __skb_checksum_complete(struct sk_buff *skb) EXPORT_SYMBOL(__skb_checksum_complete); /** - * skb_copy_and_csum_datagram_iovec - Copy and checksum skb to user iovec. + * skb_copy_and_csum_datagram_msg - Copy and checksum skb to user iovec. * @skb: skbuff * @hlen: hardware length - * @iov: io vector + * @msg: destination * * Caller _must_ check that skb will fit to this iovec. * * Returns: 0 - success. * -EINVAL - checksum failure. - * -EFAULT - fault during copy. Beware, in this case iovec - * can be modified! + * -EFAULT - fault during copy. */ -int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, - int hlen, struct iovec *iov) +int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, + int hlen, struct msghdr *msg) { __wsum csum; int chunk = skb->len - hlen; @@ -796,28 +673,20 @@ int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, if (!chunk) return 0; - /* Skip filled elements. - * Pretty silly, look at memcpy_toiovec, though 8) - */ - while (!iov->iov_len) - iov++; - - if (iov->iov_len < chunk) { + if (iov_iter_count(&msg->msg_iter) < chunk) { if (__skb_checksum_complete(skb)) goto csum_error; - if (skb_copy_datagram_iovec(skb, hlen, iov, chunk)) + if (skb_copy_datagram_msg(skb, hlen, msg, chunk)) goto fault; } else { csum = csum_partial(skb->data, hlen, skb->csum); - if (skb_copy_and_csum_datagram(skb, hlen, iov->iov_base, + if (skb_copy_and_csum_datagram(skb, hlen, &msg->msg_iter, chunk, &csum)) goto fault; if (csum_fold(csum)) goto csum_error; if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) netdev_rx_csum_fault(skb->dev); - iov->iov_len -= chunk; - iov->iov_base += chunk; } return 0; csum_error: @@ -825,7 +694,7 @@ csum_error: fault: return -EFAULT; } -EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec); +EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg); /** * datagram_poll - generic datagram poll diff --git a/net/core/dev.c b/net/core/dev.c index 945bbd001359..f411c28d0a66 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -118,6 +118,7 @@ #include <linux/if_vlan.h> #include <linux/ip.h> #include <net/ip.h> +#include <net/mpls.h> #include <linux/ipv6.h> #include <linux/in.h> #include <linux/jhash.h> @@ -133,6 +134,7 @@ #include <linux/vmalloc.h> #include <linux/if_macvlan.h> #include <linux/errqueue.h> +#include <linux/hrtimer.h> #include "net-sysfs.h" @@ -1435,22 +1437,17 @@ EXPORT_SYMBOL(dev_close); */ void dev_disable_lro(struct net_device *dev) { - /* - * If we're trying to disable lro on a vlan device - * use the underlying physical device instead - */ - if (is_vlan_dev(dev)) - dev = vlan_dev_real_dev(dev); - - /* the same for macvlan devices */ - if (netif_is_macvlan(dev)) - dev = macvlan_dev_real_dev(dev); + struct net_device *lower_dev; + struct list_head *iter; dev->wanted_features &= ~NETIF_F_LRO; netdev_update_features(dev); if (unlikely(dev->features & NETIF_F_LRO)) netdev_WARN(dev, "failed to disable LRO!\n"); + + netdev_for_each_lower_dev(dev, lower_dev, iter) + dev_disable_lro(lower_dev); } EXPORT_SYMBOL(dev_disable_lro); @@ -2530,7 +2527,7 @@ static netdev_features_t net_mpls_features(struct sk_buff *skb, netdev_features_t features, __be16 type) { - if (type == htons(ETH_P_MPLS_UC) || type == htons(ETH_P_MPLS_MC)) + if (eth_p_mpls(type)) features &= skb->dev->mpls_features; return features; @@ -2647,12 +2644,8 @@ static struct sk_buff *validate_xmit_vlan(struct sk_buff *skb, netdev_features_t features) { if (vlan_tx_tag_present(skb) && - !vlan_hw_offload_capable(features, skb->vlan_proto)) { - skb = __vlan_put_tag(skb, skb->vlan_proto, - vlan_tx_tag_get(skb)); - if (skb) - skb->vlan_tci = 0; - } + !vlan_hw_offload_capable(features, skb->vlan_proto)) + skb = __vlan_hwaccel_push_inside(skb); return skb; } @@ -3304,7 +3297,7 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu, rps_lock(sd); qlen = skb_queue_len(&sd->input_pkt_queue); if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) { - if (skb_queue_len(&sd->input_pkt_queue)) { + if (qlen) { enqueue: __skb_queue_tail(&sd->input_pkt_queue, skb); input_queue_tail_incr_save(sd, qtail); @@ -4179,7 +4172,7 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi) struct sk_buff *skb = napi->skb; if (!skb) { - skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD); + skb = napi_alloc_skb(napi, GRO_MAX_HEAD); napi->skb = skb; } return skb; @@ -4316,20 +4309,28 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd) local_irq_enable(); } +static bool sd_has_rps_ipi_waiting(struct softnet_data *sd) +{ +#ifdef CONFIG_RPS + return sd->rps_ipi_list != NULL; +#else + return false; +#endif +} + static int process_backlog(struct napi_struct *napi, int quota) { int work = 0; struct softnet_data *sd = container_of(napi, struct softnet_data, backlog); -#ifdef CONFIG_RPS /* Check if we have pending ipi, its better to send them now, * not waiting net_rx_action() end. */ - if (sd->rps_ipi_list) { + if (sd_has_rps_ipi_waiting(sd)) { local_irq_disable(); net_rps_action_and_irq_enable(sd); } -#endif + napi->weight = weight_p; local_irq_disable(); while (1) { @@ -4356,7 +4357,6 @@ static int process_backlog(struct napi_struct *napi, int quota) * We can use a plain write instead of clear_bit(), * and we dont need an smp_mb() memory barrier. */ - list_del(&napi->poll_list); napi->state = 0; rps_unlock(sd); @@ -4376,7 +4376,8 @@ static int process_backlog(struct napi_struct *napi, int quota) * __napi_schedule - schedule for receive * @n: entry to schedule * - * The entry's receive function will be scheduled to run + * The entry's receive function will be scheduled to run. + * Consider using __napi_schedule_irqoff() if hard irqs are masked. */ void __napi_schedule(struct napi_struct *n) { @@ -4388,18 +4389,29 @@ void __napi_schedule(struct napi_struct *n) } EXPORT_SYMBOL(__napi_schedule); +/** + * __napi_schedule_irqoff - schedule for receive + * @n: entry to schedule + * + * Variant of __napi_schedule() assuming hard irqs are masked + */ +void __napi_schedule_irqoff(struct napi_struct *n) +{ + ____napi_schedule(this_cpu_ptr(&softnet_data), n); +} +EXPORT_SYMBOL(__napi_schedule_irqoff); + void __napi_complete(struct napi_struct *n) { BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); - BUG_ON(n->gro_list); - list_del(&n->poll_list); + list_del_init(&n->poll_list); smp_mb__before_atomic(); clear_bit(NAPI_STATE_SCHED, &n->state); } EXPORT_SYMBOL(__napi_complete); -void napi_complete(struct napi_struct *n) +void napi_complete_done(struct napi_struct *n, int work_done) { unsigned long flags; @@ -4410,12 +4422,28 @@ void napi_complete(struct napi_struct *n) if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state))) return; - napi_gro_flush(n, false); - local_irq_save(flags); - __napi_complete(n); - local_irq_restore(flags); + if (n->gro_list) { + unsigned long timeout = 0; + + if (work_done) + timeout = n->dev->gro_flush_timeout; + + if (timeout) + hrtimer_start(&n->timer, ns_to_ktime(timeout), + HRTIMER_MODE_REL_PINNED); + else + napi_gro_flush(n, false); + } + if (likely(list_empty(&n->poll_list))) { + WARN_ON_ONCE(!test_and_clear_bit(NAPI_STATE_SCHED, &n->state)); + } else { + /* If n->poll_list is not empty, we need to mask irqs */ + local_irq_save(flags); + __napi_complete(n); + local_irq_restore(flags); + } } -EXPORT_SYMBOL(napi_complete); +EXPORT_SYMBOL(napi_complete_done); /* must be called under rcu_read_lock(), as we dont take a reference */ struct napi_struct *napi_by_id(unsigned int napi_id) @@ -4469,10 +4497,23 @@ void napi_hash_del(struct napi_struct *napi) } EXPORT_SYMBOL_GPL(napi_hash_del); +static enum hrtimer_restart napi_watchdog(struct hrtimer *timer) +{ + struct napi_struct *napi; + + napi = container_of(timer, struct napi_struct, timer); + if (napi->gro_list) + napi_schedule(napi); + + return HRTIMER_NORESTART; +} + void netif_napi_add(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), int weight) { INIT_LIST_HEAD(&napi->poll_list); + hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); + napi->timer.function = napi_watchdog; napi->gro_count = 0; napi->gro_list = NULL; napi->skb = NULL; @@ -4491,6 +4532,20 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi, } EXPORT_SYMBOL(netif_napi_add); +void napi_disable(struct napi_struct *n) +{ + might_sleep(); + set_bit(NAPI_STATE_DISABLE, &n->state); + + while (test_and_set_bit(NAPI_STATE_SCHED, &n->state)) + msleep(1); + + hrtimer_cancel(&n->timer); + + clear_bit(NAPI_STATE_DISABLE, &n->state); +} +EXPORT_SYMBOL(napi_disable); + void netif_napi_del(struct napi_struct *napi) { list_del_init(&napi->dev_list); @@ -4507,29 +4562,28 @@ static void net_rx_action(struct softirq_action *h) struct softnet_data *sd = this_cpu_ptr(&softnet_data); unsigned long time_limit = jiffies + 2; int budget = netdev_budget; + LIST_HEAD(list); + LIST_HEAD(repoll); void *have; local_irq_disable(); + list_splice_init(&sd->poll_list, &list); + local_irq_enable(); - while (!list_empty(&sd->poll_list)) { + while (!list_empty(&list)) { struct napi_struct *n; int work, weight; - /* If softirq window is exhuasted then punt. + /* If softirq window is exhausted then punt. * Allow this to run for 2 jiffies since which will allow * an average latency of 1.5/HZ. */ if (unlikely(budget <= 0 || time_after_eq(jiffies, time_limit))) goto softnet_break; - local_irq_enable(); - /* Even though interrupts have been re-enabled, this - * access is safe because interrupts can only add new - * entries to the tail of this list, and only ->poll() - * calls can remove this head entry from the list. - */ - n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list); + n = list_first_entry(&list, struct napi_struct, poll_list); + list_del_init(&n->poll_list); have = netpoll_poll_lock(n); @@ -4551,8 +4605,6 @@ static void net_rx_action(struct softirq_action *h) budget -= work; - local_irq_disable(); - /* Drivers must not modify the NAPI state if they * consume the entire weight. In such cases this code * still "owns" the NAPI instance and therefore can @@ -4560,32 +4612,40 @@ static void net_rx_action(struct softirq_action *h) */ if (unlikely(work == weight)) { if (unlikely(napi_disable_pending(n))) { - local_irq_enable(); napi_complete(n); - local_irq_disable(); } else { if (n->gro_list) { /* flush too old packets * If HZ < 1000, flush all packets. */ - local_irq_enable(); napi_gro_flush(n, HZ >= 1000); - local_irq_disable(); } - list_move_tail(&n->poll_list, &sd->poll_list); + list_add_tail(&n->poll_list, &repoll); } } netpoll_poll_unlock(have); } + + if (!sd_has_rps_ipi_waiting(sd) && + list_empty(&list) && + list_empty(&repoll)) + return; out: + local_irq_disable(); + + list_splice_tail_init(&sd->poll_list, &list); + list_splice_tail(&repoll, &list); + list_splice(&list, &sd->poll_list); + if (!list_empty(&sd->poll_list)) + __raise_softirq_irqoff(NET_RX_SOFTIRQ); + net_rps_action_and_irq_enable(sd); return; softnet_break: sd->time_squeeze++; - __raise_softirq_irqoff(NET_RX_SOFTIRQ); goto out; } @@ -5786,7 +5846,7 @@ EXPORT_SYMBOL(dev_change_carrier); * Get device physical port ID */ int dev_get_phys_port_id(struct net_device *dev, - struct netdev_phys_port_id *ppid) + struct netdev_phys_item_id *ppid) { const struct net_device_ops *ops = dev->netdev_ops; @@ -5865,6 +5925,8 @@ static void rollback_registered_many(struct list_head *head) synchronize_net(); list_for_each_entry(dev, head, unreg_list) { + struct sk_buff *skb = NULL; + /* Shutdown queueing discipline. */ dev_shutdown(dev); @@ -5874,6 +5936,11 @@ static void rollback_registered_many(struct list_head *head) */ call_netdevice_notifiers(NETDEV_UNREGISTER, dev); + if (!dev->rtnl_link_ops || + dev->rtnl_link_state == RTNL_LINK_INITIALIZED) + skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, + GFP_KERNEL); + /* * Flush the unicast and multicast chains */ @@ -5883,9 +5950,8 @@ static void rollback_registered_many(struct list_head *head) if (dev->netdev_ops->ndo_uninit) dev->netdev_ops->ndo_uninit(dev); - if (!dev->rtnl_link_ops || - dev->rtnl_link_state == RTNL_LINK_INITIALIZED) - rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL); + if (skb) + rtmsg_ifinfo_send(skb, dev, GFP_KERNEL); /* Notifier chain MUST detach us all upper devices. */ WARN_ON(netdev_has_any_upper_dev(dev)); @@ -7200,11 +7266,10 @@ static void __net_exit rtnl_lock_unregistering(struct list_head *net_list) */ struct net *net; bool unregistering; - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); + add_wait_queue(&netdev_unregistering_wq, &wait); for (;;) { - prepare_to_wait(&netdev_unregistering_wq, &wait, - TASK_UNINTERRUPTIBLE); unregistering = false; rtnl_lock(); list_for_each_entry(net, net_list, exit_list) { @@ -7216,9 +7281,10 @@ static void __net_exit rtnl_lock_unregistering(struct list_head *net_list) if (!unregistering) break; __rtnl_unlock(); - schedule(); + + wait_woken(&wait, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); } - finish_wait(&netdev_unregistering_wq, &wait); + remove_wait_queue(&netdev_unregistering_wq, &wait); } static void __net_exit default_device_exit_batch(struct list_head *net_list) diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index b6b230600b97..c0548d268e1a 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -278,8 +278,8 @@ int __hw_addr_sync_dev(struct netdev_hw_addr_list *list, EXPORT_SYMBOL(__hw_addr_sync_dev); /** - * __hw_addr_unsync_dev - Remove synchonized addresses from device - * @list: address list to remove syncronized addresses from + * __hw_addr_unsync_dev - Remove synchronized addresses from device + * @list: address list to remove synchronized addresses from * @dev: device to sync * @unsync: function to call if address should be removed * diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 72e899a3efda..b94b1d293506 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -142,10 +142,12 @@ static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cm case SIOCGIFHWADDR: if (!dev->addr_len) - memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data); + memset(ifr->ifr_hwaddr.sa_data, 0, + sizeof(ifr->ifr_hwaddr.sa_data)); else memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr, - min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); + min(sizeof(ifr->ifr_hwaddr.sa_data), + (size_t)dev->addr_len)); ifr->ifr_hwaddr.sa_family = dev->type; return 0; @@ -265,7 +267,8 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) if (ifr->ifr_hwaddr.sa_family != dev->type) return -EINVAL; memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, - min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); + min(sizeof(ifr->ifr_hwaddr.sa_data), + (size_t)dev->addr_len)); call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); return 0; diff --git a/net/core/dst.c b/net/core/dst.c index a028409ee438..e956ce6d1378 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -327,30 +327,6 @@ void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old) } EXPORT_SYMBOL(__dst_destroy_metrics_generic); -/** - * __skb_dst_set_noref - sets skb dst, without a reference - * @skb: buffer - * @dst: dst entry - * @force: if force is set, use noref version even for DST_NOCACHE entries - * - * Sets skb dst, assuming a reference was not taken on dst - * skb_dst_drop() should not dst_release() this dst - */ |