summaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/datagram.c301
-rw-r--r--net/core/dev.c182
-rw-r--r--net/core/dev_addr_lists.c4
-rw-r--r--net/core/dev_ioctl.c9
-rw-r--r--net/core/dst.c24
-rw-r--r--net/core/ethtool.c81
-rw-r--r--net/core/filter.c97
-rw-r--r--net/core/iovec.c47
-rw-r--r--net/core/link_watch.c2
-rw-r--r--net/core/neighbour.c279
-rw-r--r--net/core/net-sysfs.c44
-rw-r--r--net/core/netpoll.c4
-rw-r--r--net/core/pktgen.c3
-rw-r--r--net/core/rtnetlink.c188
-rw-r--r--net/core/scm.c3
-rw-r--r--net/core/skbuff.c356
-rw-r--r--net/core/sock.c20
-rw-r--r--net/core/sysctl_net_core.c21
-rw-r--r--net/core/utils.c3
19 files changed, 1013 insertions, 655 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c
index fdbc9a81d4c2..df493d68330c 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -49,6 +49,7 @@
#include <linux/spinlock.h>
#include <linux/slab.h>
#include <linux/pagemap.h>
+#include <linux/uio.h>
#include <net/protocol.h>
#include <linux/skbuff.h>
@@ -309,16 +310,14 @@ int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
EXPORT_SYMBOL(skb_kill_datagram);
/**
- * skb_copy_datagram_iovec - Copy a datagram to an iovec.
+ * skb_copy_datagram_iter - Copy a datagram to an iovec iterator.
* @skb: buffer to copy
* @offset: offset in the buffer to start copying from
- * @to: io vector to copy to
+ * @to: iovec iterator to copy to
* @len: amount of data to copy from buffer to iovec
- *
- * Note: the iovec is modified during the copy.
*/
-int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
- struct iovec *to, int len)
+int skb_copy_datagram_iter(const struct sk_buff *skb, int offset,
+ struct iov_iter *to, int len)
{
int start = skb_headlen(skb);
int i, copy = start - offset;
@@ -330,8 +329,8 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
if (copy > 0) {
if (copy > len)
copy = len;
- if (memcpy_toiovec(to, skb->data + offset, copy))
- goto fault;
+ if (copy_to_iter(skb->data + offset, copy, to) != copy)
+ goto short_copy;
if ((len -= copy) == 0)
return 0;
offset += copy;
@@ -346,18 +345,12 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
end = start + skb_frag_size(frag);
if ((copy = end - offset) > 0) {
- int err;
- u8 *vaddr;
- struct page *page = skb_frag_page(frag);
-
if (copy > len)
copy = len;
- vaddr = kmap(page);
- err = memcpy_toiovec(to, vaddr + frag->page_offset +
- offset - start, copy);
- kunmap(page);
- if (err)
- goto fault;
+ if (copy_page_to_iter(skb_frag_page(frag),
+ frag->page_offset + offset -
+ start, copy, to) != copy)
+ goto short_copy;
if (!(len -= copy))
return 0;
offset += copy;
@@ -374,9 +367,8 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
if ((copy = end - offset) > 0) {
if (copy > len)
copy = len;
- if (skb_copy_datagram_iovec(frag_iter,
- offset - start,
- to, copy))
+ if (skb_copy_datagram_iter(frag_iter, offset - start,
+ to, copy))
goto fault;
if ((len -= copy) == 0)
return 0;
@@ -387,113 +379,33 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
if (!len)
return 0;
+ /* This is not really a user copy fault, but rather someone
+ * gave us a bogus length on the skb. We should probably
+ * print a warning here as it may indicate a kernel bug.
+ */
+
fault:
return -EFAULT;
-}
-EXPORT_SYMBOL(skb_copy_datagram_iovec);
-/**
- * skb_copy_datagram_const_iovec - Copy a datagram to an iovec.
- * @skb: buffer to copy
- * @offset: offset in the buffer to start copying from
- * @to: io vector to copy to
- * @to_offset: offset in the io vector to start copying to
- * @len: amount of data to copy from buffer to iovec
- *
- * Returns 0 or -EFAULT.
- * Note: the iovec is not modified during the copy.
- */
-int skb_copy_datagram_const_iovec(const struct sk_buff *skb, int offset,
- const struct iovec *to, int to_offset,
- int len)
-{
- int start = skb_headlen(skb);
- int i, copy = start - offset;
- struct sk_buff *frag_iter;
+short_copy:
+ if (iov_iter_count(to))
+ goto fault;
- /* Copy header. */
- if (copy > 0) {
- if (copy > len)
- copy = len;
- if (memcpy_toiovecend(to, skb->data + offset, to_offset, copy))
- goto fault;
- if ((len -= copy) == 0)
- return 0;
- offset += copy;
- to_offset += copy;
- }
-
- /* Copy paged appendix. Hmm... why does this look so complicated? */
- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
- int end;
- const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-
- WARN_ON(start > offset + len);
-
- end = start + skb_frag_size(frag);
- if ((copy = end - offset) > 0) {
- int err;
- u8 *vaddr;
- struct page *page = skb_frag_page(frag);
-
- if (copy > len)
- copy = len;
- vaddr = kmap(page);
- err = memcpy_toiovecend(to, vaddr + frag->page_offset +
- offset - start, to_offset, copy);
- kunmap(page);
- if (err)
- goto fault;
- if (!(len -= copy))
- return 0;
- offset += copy;
- to_offset += copy;
- }
- start = end;
- }
-
- skb_walk_frags(skb, frag_iter) {
- int end;
-
- WARN_ON(start > offset + len);
-
- end = start + frag_iter->len;
- if ((copy = end - offset) > 0) {
- if (copy > len)
- copy = len;
- if (skb_copy_datagram_const_iovec(frag_iter,
- offset - start,
- to, to_offset,
- copy))
- goto fault;
- if ((len -= copy) == 0)
- return 0;
- offset += copy;
- to_offset += copy;
- }
- start = end;
- }
- if (!len)
- return 0;
-
-fault:
- return -EFAULT;
+ return 0;
}
-EXPORT_SYMBOL(skb_copy_datagram_const_iovec);
+EXPORT_SYMBOL(skb_copy_datagram_iter);
/**
- * skb_copy_datagram_from_iovec - Copy a datagram from an iovec.
+ * skb_copy_datagram_from_iter - Copy a datagram from an iov_iter.
* @skb: buffer to copy
* @offset: offset in the buffer to start copying to
- * @from: io vector to copy to
- * @from_offset: offset in the io vector to start copying from
+ * @from: the copy source
* @len: amount of data to copy to buffer from iovec
*
* Returns 0 or -EFAULT.
- * Note: the iovec is not modified during the copy.
*/
-int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
- const struct iovec *from, int from_offset,
+int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
+ struct iov_iter *from,
int len)
{
int start = skb_headlen(skb);
@@ -504,13 +416,11 @@ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
if (copy > 0) {
if (copy > len)
copy = len;
- if (memcpy_fromiovecend(skb->data + offset, from, from_offset,
- copy))
+ if (copy_from_iter(skb->data + offset, copy, from) != copy)
goto fault;
if ((len -= copy) == 0)
return 0;
offset += copy;
- from_offset += copy;
}
/* Copy paged appendix. Hmm... why does this look so complicated? */
@@ -522,24 +432,19 @@ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
end = start + skb_frag_size(frag);
if ((copy = end - offset) > 0) {
- int err;
- u8 *vaddr;
- struct page *page = skb_frag_page(frag);
+ size_t copied;
if (copy > len)
copy = len;
- vaddr = kmap(page);
- err = memcpy_fromiovecend(vaddr + frag->page_offset +
- offset - start,
- from, from_offset, copy);
- kunmap(page);
- if (err)
+ copied = copy_page_from_iter(skb_frag_page(frag),
+ frag->page_offset + offset - start,
+ copy, from);
+ if (copied != copy)
goto fault;
if (!(len -= copy))
return 0;
offset += copy;
- from_offset += copy;
}
start = end;
}
@@ -553,16 +458,13 @@ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
if ((copy = end - offset) > 0) {
if (copy > len)
copy = len;
- if (skb_copy_datagram_from_iovec(frag_iter,
- offset - start,
- from,
- from_offset,
- copy))
+ if (skb_copy_datagram_from_iter(frag_iter,
+ offset - start,
+ from, copy))
goto fault;
if ((len -= copy) == 0)
return 0;
offset += copy;
- from_offset += copy;
}
start = end;
}
@@ -572,101 +474,82 @@ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
fault:
return -EFAULT;
}
-EXPORT_SYMBOL(skb_copy_datagram_from_iovec);
+EXPORT_SYMBOL(skb_copy_datagram_from_iter);
/**
- * zerocopy_sg_from_iovec - Build a zerocopy datagram from an iovec
+ * zerocopy_sg_from_iter - Build a zerocopy datagram from an iov_iter
* @skb: buffer to copy
- * @from: io vector to copy from
- * @offset: offset in the io vector to start copying from
- * @count: amount of vectors to copy to buffer from
+ * @from: the source to copy from
*
* The function will first copy up to headlen, and then pin the userspace
* pages and build frags through them.
*
* Returns 0, -EFAULT or -EMSGSIZE.
- * Note: the iovec is not modified during the copy
*/
-int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
- int offset, size_t count)
+int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
{
- int len = iov_length(from, count) - offset;
+ int len = iov_iter_count(from);
int copy = min_t(int, skb_headlen(skb), len);
- int size;
- int i = 0;
+ int frag = 0;
/* copy up to skb headlen */
- if (skb_copy_datagram_from_iovec(skb, 0, from, offset, copy))
+ if (skb_copy_datagram_from_iter(skb, 0, from, copy))
return -EFAULT;
- if (len == copy)
- return 0;
-
- offset += copy;
- while (count--) {
- struct page *page[MAX_SKB_FRAGS];
- int num_pages;
- unsigned long base;
+ while (iov_iter_count(from)) {
+ struct page *pages[MAX_SKB_FRAGS];
+ size_t start;
+ ssize_t copied;
unsigned long truesize;
+ int n = 0;
- /* Skip over from offset and copied */
- if (offset >= from->iov_len) {
- offset -= from->iov_len;
- ++from;
- continue;
- }
- len = from->iov_len - offset;
- base = (unsigned long)from->iov_base + offset;
- size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT;
- if (i + size > MAX_SKB_FRAGS)
+ if (frag == MAX_SKB_FRAGS)
return -EMSGSIZE;
- num_pages = get_user_pages_fast(base, size, 0, &page[i]);
- if (num_pages != size) {
- release_pages(&page[i], num_pages, 0);
+
+ copied = iov_iter_get_pages(from, pages, ~0U,
+ MAX_SKB_FRAGS - frag, &start);
+ if (copied < 0)
return -EFAULT;
- }
- truesize = size * PAGE_SIZE;
- skb->data_len += len;
- skb->len += len;
+
+ iov_iter_advance(from, copied);
+
+ truesize = PAGE_ALIGN(copied + start);
+ skb->data_len += copied;
+ skb->len += copied;
skb->truesize += truesize;
atomic_add(truesize, &skb->sk->sk_wmem_alloc);
- while (len) {
- int off = base & ~PAGE_MASK;
- int size = min_t(int, len, PAGE_SIZE - off);
- skb_fill_page_desc(skb, i, page[i], off, size);
- base += size;
- len -= size;
- i++;
+ while (copied) {
+ int size = min_t(int, copied, PAGE_SIZE - start);
+ skb_fill_page_desc(skb, frag++, pages[n], start, size);
+ start = 0;
+ copied -= size;
+ n++;
}
- offset = 0;
- ++from;
}
return 0;
}
-EXPORT_SYMBOL(zerocopy_sg_from_iovec);
+EXPORT_SYMBOL(zerocopy_sg_from_iter);
static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
- u8 __user *to, int len,
+ struct iov_iter *to, int len,
__wsum *csump)
{
int start = skb_headlen(skb);
int i, copy = start - offset;
struct sk_buff *frag_iter;
int pos = 0;
+ int n;
/* Copy header. */
if (copy > 0) {
- int err = 0;
if (copy > len)
copy = len;
- *csump = csum_and_copy_to_user(skb->data + offset, to, copy,
- *csump, &err);
- if (err)
+ n = csum_and_copy_to_iter(skb->data + offset, copy, csump, to);
+ if (n != copy)
goto fault;
if ((len -= copy) == 0)
return 0;
offset += copy;
- to += copy;
pos = copy;
}
@@ -678,26 +561,22 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
end = start + skb_frag_size(frag);
if ((copy = end - offset) > 0) {
- __wsum csum2;
- int err = 0;
- u8 *vaddr;
+ __wsum csum2 = 0;
struct page *page = skb_frag_page(frag);
+ u8 *vaddr = kmap(page);
if (copy > len)
copy = len;
- vaddr = kmap(page);
- csum2 = csum_and_copy_to_user(vaddr +
- frag->page_offset +
- offset - start,
- to, copy, 0, &err);
+ n = csum_and_copy_to_iter(vaddr + frag->page_offset +
+ offset - start, copy,
+ &csum2, to);
kunmap(page);
- if (err)
+ if (n != copy)
goto fault;
*csump = csum_block_add(*csump, csum2, pos);
if (!(len -= copy))
return 0;
offset += copy;
- to += copy;
pos += copy;
}
start = end;
@@ -722,7 +601,6 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
if ((len -= copy) == 0)
return 0;
offset += copy;
- to += copy;
pos += copy;
}
start = end;
@@ -775,20 +653,19 @@ __sum16 __skb_checksum_complete(struct sk_buff *skb)
EXPORT_SYMBOL(__skb_checksum_complete);
/**
- * skb_copy_and_csum_datagram_iovec - Copy and checksum skb to user iovec.
+ * skb_copy_and_csum_datagram_msg - Copy and checksum skb to user iovec.
* @skb: skbuff
* @hlen: hardware length
- * @iov: io vector
+ * @msg: destination
*
* Caller _must_ check that skb will fit to this iovec.
*
* Returns: 0 - success.
* -EINVAL - checksum failure.
- * -EFAULT - fault during copy. Beware, in this case iovec
- * can be modified!
+ * -EFAULT - fault during copy.
*/
-int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb,
- int hlen, struct iovec *iov)
+int skb_copy_and_csum_datagram_msg(struct sk_buff *skb,
+ int hlen, struct msghdr *msg)
{
__wsum csum;
int chunk = skb->len - hlen;
@@ -796,28 +673,20 @@ int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb,
if (!chunk)
return 0;
- /* Skip filled elements.
- * Pretty silly, look at memcpy_toiovec, though 8)
- */
- while (!iov->iov_len)
- iov++;
-
- if (iov->iov_len < chunk) {
+ if (iov_iter_count(&msg->msg_iter) < chunk) {
if (__skb_checksum_complete(skb))
goto csum_error;
- if (skb_copy_datagram_iovec(skb, hlen, iov, chunk))
+ if (skb_copy_datagram_msg(skb, hlen, msg, chunk))
goto fault;
} else {
csum = csum_partial(skb->data, hlen, skb->csum);
- if (skb_copy_and_csum_datagram(skb, hlen, iov->iov_base,
+ if (skb_copy_and_csum_datagram(skb, hlen, &msg->msg_iter,
chunk, &csum))
goto fault;
if (csum_fold(csum))
goto csum_error;
if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
netdev_rx_csum_fault(skb->dev);
- iov->iov_len -= chunk;
- iov->iov_base += chunk;
}
return 0;
csum_error:
@@ -825,7 +694,7 @@ csum_error:
fault:
return -EFAULT;
}
-EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec);
+EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg);
/**
* datagram_poll - generic datagram poll
diff --git a/net/core/dev.c b/net/core/dev.c
index 945bbd001359..f411c28d0a66 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -118,6 +118,7 @@
#include <linux/if_vlan.h>
#include <linux/ip.h>
#include <net/ip.h>
+#include <net/mpls.h>
#include <linux/ipv6.h>
#include <linux/in.h>
#include <linux/jhash.h>
@@ -133,6 +134,7 @@
#include <linux/vmalloc.h>
#include <linux/if_macvlan.h>
#include <linux/errqueue.h>
+#include <linux/hrtimer.h>
#include "net-sysfs.h"
@@ -1435,22 +1437,17 @@ EXPORT_SYMBOL(dev_close);
*/
void dev_disable_lro(struct net_device *dev)
{
- /*
- * If we're trying to disable lro on a vlan device
- * use the underlying physical device instead
- */
- if (is_vlan_dev(dev))
- dev = vlan_dev_real_dev(dev);
-
- /* the same for macvlan devices */
- if (netif_is_macvlan(dev))
- dev = macvlan_dev_real_dev(dev);
+ struct net_device *lower_dev;
+ struct list_head *iter;
dev->wanted_features &= ~NETIF_F_LRO;
netdev_update_features(dev);
if (unlikely(dev->features & NETIF_F_LRO))
netdev_WARN(dev, "failed to disable LRO!\n");
+
+ netdev_for_each_lower_dev(dev, lower_dev, iter)
+ dev_disable_lro(lower_dev);
}
EXPORT_SYMBOL(dev_disable_lro);
@@ -2530,7 +2527,7 @@ static netdev_features_t net_mpls_features(struct sk_buff *skb,
netdev_features_t features,
__be16 type)
{
- if (type == htons(ETH_P_MPLS_UC) || type == htons(ETH_P_MPLS_MC))
+ if (eth_p_mpls(type))
features &= skb->dev->mpls_features;
return features;
@@ -2647,12 +2644,8 @@ static struct sk_buff *validate_xmit_vlan(struct sk_buff *skb,
netdev_features_t features)
{
if (vlan_tx_tag_present(skb) &&
- !vlan_hw_offload_capable(features, skb->vlan_proto)) {
- skb = __vlan_put_tag(skb, skb->vlan_proto,
- vlan_tx_tag_get(skb));
- if (skb)
- skb->vlan_tci = 0;
- }
+ !vlan_hw_offload_capable(features, skb->vlan_proto))
+ skb = __vlan_hwaccel_push_inside(skb);
return skb;
}
@@ -3304,7 +3297,7 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
rps_lock(sd);
qlen = skb_queue_len(&sd->input_pkt_queue);
if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) {
- if (skb_queue_len(&sd->input_pkt_queue)) {
+ if (qlen) {
enqueue:
__skb_queue_tail(&sd->input_pkt_queue, skb);
input_queue_tail_incr_save(sd, qtail);
@@ -4179,7 +4172,7 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi)
struct sk_buff *skb = napi->skb;
if (!skb) {
- skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD);
+ skb = napi_alloc_skb(napi, GRO_MAX_HEAD);
napi->skb = skb;
}
return skb;
@@ -4316,20 +4309,28 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd)
local_irq_enable();
}
+static bool sd_has_rps_ipi_waiting(struct softnet_data *sd)
+{
+#ifdef CONFIG_RPS
+ return sd->rps_ipi_list != NULL;
+#else
+ return false;
+#endif
+}
+
static int process_backlog(struct napi_struct *napi, int quota)
{
int work = 0;
struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
-#ifdef CONFIG_RPS
/* Check if we have pending ipi, its better to send them now,
* not waiting net_rx_action() end.
*/
- if (sd->rps_ipi_list) {
+ if (sd_has_rps_ipi_waiting(sd)) {
local_irq_disable();
net_rps_action_and_irq_enable(sd);
}
-#endif
+
napi->weight = weight_p;
local_irq_disable();
while (1) {
@@ -4356,7 +4357,6 @@ static int process_backlog(struct napi_struct *napi, int quota)
* We can use a plain write instead of clear_bit(),
* and we dont need an smp_mb() memory barrier.
*/
- list_del(&napi->poll_list);
napi->state = 0;
rps_unlock(sd);
@@ -4376,7 +4376,8 @@ static int process_backlog(struct napi_struct *napi, int quota)
* __napi_schedule - schedule for receive
* @n: entry to schedule
*
- * The entry's receive function will be scheduled to run
+ * The entry's receive function will be scheduled to run.
+ * Consider using __napi_schedule_irqoff() if hard irqs are masked.
*/
void __napi_schedule(struct napi_struct *n)
{
@@ -4388,18 +4389,29 @@ void __napi_schedule(struct napi_struct *n)
}
EXPORT_SYMBOL(__napi_schedule);
+/**
+ * __napi_schedule_irqoff - schedule for receive
+ * @n: entry to schedule
+ *
+ * Variant of __napi_schedule() assuming hard irqs are masked
+ */
+void __napi_schedule_irqoff(struct napi_struct *n)
+{
+ ____napi_schedule(this_cpu_ptr(&softnet_data), n);
+}
+EXPORT_SYMBOL(__napi_schedule_irqoff);
+
void __napi_complete(struct napi_struct *n)
{
BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
- BUG_ON(n->gro_list);
- list_del(&n->poll_list);
+ list_del_init(&n->poll_list);
smp_mb__before_atomic();
clear_bit(NAPI_STATE_SCHED, &n->state);
}
EXPORT_SYMBOL(__napi_complete);
-void napi_complete(struct napi_struct *n)
+void napi_complete_done(struct napi_struct *n, int work_done)
{
unsigned long flags;
@@ -4410,12 +4422,28 @@ void napi_complete(struct napi_struct *n)
if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
return;
- napi_gro_flush(n, false);
- local_irq_save(flags);
- __napi_complete(n);
- local_irq_restore(flags);
+ if (n->gro_list) {
+ unsigned long timeout = 0;
+
+ if (work_done)
+ timeout = n->dev->gro_flush_timeout;
+
+ if (timeout)
+ hrtimer_start(&n->timer, ns_to_ktime(timeout),
+ HRTIMER_MODE_REL_PINNED);
+ else
+ napi_gro_flush(n, false);
+ }
+ if (likely(list_empty(&n->poll_list))) {
+ WARN_ON_ONCE(!test_and_clear_bit(NAPI_STATE_SCHED, &n->state));
+ } else {
+ /* If n->poll_list is not empty, we need to mask irqs */
+ local_irq_save(flags);
+ __napi_complete(n);
+ local_irq_restore(flags);
+ }
}
-EXPORT_SYMBOL(napi_complete);
+EXPORT_SYMBOL(napi_complete_done);
/* must be called under rcu_read_lock(), as we dont take a reference */
struct napi_struct *napi_by_id(unsigned int napi_id)
@@ -4469,10 +4497,23 @@ void napi_hash_del(struct napi_struct *napi)
}
EXPORT_SYMBOL_GPL(napi_hash_del);
+static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
+{
+ struct napi_struct *napi;
+
+ napi = container_of(timer, struct napi_struct, timer);
+ if (napi->gro_list)
+ napi_schedule(napi);
+
+ return HRTIMER_NORESTART;
+}
+
void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
int (*poll)(struct napi_struct *, int), int weight)
{
INIT_LIST_HEAD(&napi->poll_list);
+ hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
+ napi->timer.function = napi_watchdog;
napi->gro_count = 0;
napi->gro_list = NULL;
napi->skb = NULL;
@@ -4491,6 +4532,20 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
}
EXPORT_SYMBOL(netif_napi_add);
+void napi_disable(struct napi_struct *n)
+{
+ might_sleep();
+ set_bit(NAPI_STATE_DISABLE, &n->state);
+
+ while (test_and_set_bit(NAPI_STATE_SCHED, &n->state))
+ msleep(1);
+
+ hrtimer_cancel(&n->timer);
+
+ clear_bit(NAPI_STATE_DISABLE, &n->state);
+}
+EXPORT_SYMBOL(napi_disable);
+
void netif_napi_del(struct napi_struct *napi)
{
list_del_init(&napi->dev_list);
@@ -4507,29 +4562,28 @@ static void net_rx_action(struct softirq_action *h)
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
unsigned long time_limit = jiffies + 2;
int budget = netdev_budget;
+ LIST_HEAD(list);
+ LIST_HEAD(repoll);
void *have;
local_irq_disable();
+ list_splice_init(&sd->poll_list, &list);
+ local_irq_enable();
- while (!list_empty(&sd->poll_list)) {
+ while (!list_empty(&list)) {
struct napi_struct *n;
int work, weight;
- /* If softirq window is exhuasted then punt.
+ /* If softirq window is exhausted then punt.
* Allow this to run for 2 jiffies since which will allow
* an average latency of 1.5/HZ.
*/
if (unlikely(budget <= 0 || time_after_eq(jiffies, time_limit)))
goto softnet_break;
- local_irq_enable();
- /* Even though interrupts have been re-enabled, this
- * access is safe because interrupts can only add new
- * entries to the tail of this list, and only ->poll()
- * calls can remove this head entry from the list.
- */
- n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);
+ n = list_first_entry(&list, struct napi_struct, poll_list);
+ list_del_init(&n->poll_list);
have = netpoll_poll_lock(n);
@@ -4551,8 +4605,6 @@ static void net_rx_action(struct softirq_action *h)
budget -= work;
- local_irq_disable();
-
/* Drivers must not modify the NAPI state if they
* consume the entire weight. In such cases this code
* still "owns" the NAPI instance and therefore can
@@ -4560,32 +4612,40 @@ static void net_rx_action(struct softirq_action *h)
*/
if (unlikely(work == weight)) {
if (unlikely(napi_disable_pending(n))) {
- local_irq_enable();
napi_complete(n);
- local_irq_disable();
} else {
if (n->gro_list) {
/* flush too old packets
* If HZ < 1000, flush all packets.
*/
- local_irq_enable();
napi_gro_flush(n, HZ >= 1000);
- local_irq_disable();
}
- list_move_tail(&n->poll_list, &sd->poll_list);
+ list_add_tail(&n->poll_list, &repoll);
}
}
netpoll_poll_unlock(have);
}
+
+ if (!sd_has_rps_ipi_waiting(sd) &&
+ list_empty(&list) &&
+ list_empty(&repoll))
+ return;
out:
+ local_irq_disable();
+
+ list_splice_tail_init(&sd->poll_list, &list);
+ list_splice_tail(&repoll, &list);
+ list_splice(&list, &sd->poll_list);
+ if (!list_empty(&sd->poll_list))
+ __raise_softirq_irqoff(NET_RX_SOFTIRQ);
+
net_rps_action_and_irq_enable(sd);
return;
softnet_break:
sd->time_squeeze++;
- __raise_softirq_irqoff(NET_RX_SOFTIRQ);
goto out;
}
@@ -5786,7 +5846,7 @@ EXPORT_SYMBOL(dev_change_carrier);
* Get device physical port ID
*/
int dev_get_phys_port_id(struct net_device *dev,
- struct netdev_phys_port_id *ppid)
+ struct netdev_phys_item_id *ppid)
{
const struct net_device_ops *ops = dev->netdev_ops;
@@ -5865,6 +5925,8 @@ static void rollback_registered_many(struct list_head *head)
synchronize_net();
list_for_each_entry(dev, head, unreg_list) {
+ struct sk_buff *skb = NULL;
+
/* Shutdown queueing discipline. */
dev_shutdown(dev);
@@ -5874,6 +5936,11 @@ static void rollback_registered_many(struct list_head *head)
*/
call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
+ if (!dev->rtnl_link_ops ||
+ dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
+ skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U,
+ GFP_KERNEL);
+
/*
* Flush the unicast and multicast chains
*/
@@ -5883,9 +5950,8 @@ static void rollback_registered_many(struct list_head *head)
if (dev->netdev_ops->ndo_uninit)
dev->netdev_ops->ndo_uninit(dev);
- if (!dev->rtnl_link_ops ||
- dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
- rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
+ if (skb)
+ rtmsg_ifinfo_send(skb, dev, GFP_KERNEL);
/* Notifier chain MUST detach us all upper devices. */
WARN_ON(netdev_has_any_upper_dev(dev));
@@ -7200,11 +7266,10 @@ static void __net_exit rtnl_lock_unregistering(struct list_head *net_list)
*/
struct net *net;
bool unregistering;
- DEFINE_WAIT(wait);
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
+ add_wait_queue(&netdev_unregistering_wq, &wait);
for (;;) {
- prepare_to_wait(&netdev_unregistering_wq, &wait,
- TASK_UNINTERRUPTIBLE);
unregistering = false;
rtnl_lock();
list_for_each_entry(net, net_list, exit_list) {
@@ -7216,9 +7281,10 @@ static void __net_exit rtnl_lock_unregistering(struct list_head *net_list)
if (!unregistering)
break;
__rtnl_unlock();
- schedule();
+
+ wait_woken(&wait, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
}
- finish_wait(&netdev_unregistering_wq, &wait);
+ remove_wait_queue(&netdev_unregistering_wq, &wait);
}
static void __net_exit default_device_exit_batch(struct list_head *net_list)
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index b6b230600b97..c0548d268e1a 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -278,8 +278,8 @@ int __hw_addr_sync_dev(struct netdev_hw_addr_list *list,
EXPORT_SYMBOL(__hw_addr_sync_dev);
/**
- * __hw_addr_unsync_dev - Remove synchonized addresses from device
- * @list: address list to remove syncronized addresses from
+ * __hw_addr_unsync_dev - Remove synchronized addresses from device
+ * @list: address list to remove synchronized addresses from
* @dev: device to sync
* @unsync: function to call if address should be removed
*
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 72e899a3efda..b94b1d293506 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -142,10 +142,12 @@ static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cm
case SIOCGIFHWADDR:
if (!dev->addr_len)
- memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
+ memset(ifr->ifr_hwaddr.sa_data, 0,
+ sizeof(ifr->ifr_hwaddr.sa_data));
else
memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
- min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
+ min(sizeof(ifr->ifr_hwaddr.sa_data),
+ (size_t)dev->addr_len));
ifr->ifr_hwaddr.sa_family = dev->type;
return 0;
@@ -265,7 +267,8 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
if (ifr->ifr_hwaddr.sa_family != dev->type)
return -EINVAL;
memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
- min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
+ min(sizeof(ifr->ifr_hwaddr.sa_data),
+ (size_t)dev->addr_len));
call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
return 0;
diff --git a/net/core/dst.c b/net/core/dst.c
index a028409ee438..e956ce6d1378 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -327,30 +327,6 @@ void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old)
}
EXPORT_SYMBOL(__dst_destroy_metrics_generic);
-/**
- * __skb_dst_set_noref - sets skb dst, without a reference
- * @skb: buffer
- * @dst: dst entry
- * @force: if force is set, use noref version even for DST_NOCACHE entries
- *
- * Sets skb dst, assuming a reference was not taken on dst
- * skb_dst_drop() should not dst_release() this dst
- */