summaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/datagram.c104
-rw-r--r--net/core/dev.c254
-rw-r--r--net/core/dev_ioctl.c20
-rw-r--r--net/core/devlink.c8
-rw-r--r--net/core/dst.c300
-rw-r--r--net/core/fib_rules.c27
-rw-r--r--net/core/filter.c685
-rw-r--r--net/core/flow_dissector.c69
-rw-r--r--net/core/lwt_bpf.c5
-rw-r--r--net/core/lwtunnel.c38
-rw-r--r--net/core/neighbour.c94
-rw-r--r--net/core/net-procfs.c13
-rw-r--r--net/core/net-sysfs.c16
-rw-r--r--net/core/net_namespace.c83
-rw-r--r--net/core/netpoll.c14
-rw-r--r--net/core/pktgen.c58
-rw-r--r--net/core/rtnetlink.c199
-rw-r--r--net/core/secure_seq.c9
-rw-r--r--net/core/skbuff.c193
-rw-r--r--net/core/sock.c115
-rw-r--r--net/core/sysctl_net_core.c2
21 files changed, 1580 insertions, 726 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c
index db1866f2ffcf..6877c43cc92d 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -68,7 +68,7 @@ static inline int connection_based(struct sock *sk)
return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM;
}
-static int receiver_wake_function(wait_queue_t *wait, unsigned int mode, int sync,
+static int receiver_wake_function(wait_queue_entry_t *wait, unsigned int mode, int sync,
void *key)
{
unsigned long bits = (unsigned long)key;
@@ -161,6 +161,45 @@ done:
return skb;
}
+struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
+ struct sk_buff_head *queue,
+ unsigned int flags,
+ void (*destructor)(struct sock *sk,
+ struct sk_buff *skb),
+ int *peeked, int *off, int *err,
+ struct sk_buff **last)
+{
+ struct sk_buff *skb;
+ int _off = *off;
+
+ *last = queue->prev;
+ skb_queue_walk(queue, skb) {
+ if (flags & MSG_PEEK) {
+ if (_off >= skb->len && (skb->len || _off ||
+ skb->peeked)) {
+ _off -= skb->len;
+ continue;
+ }
+ if (!skb->len) {
+ skb = skb_set_peeked(skb);
+ if (unlikely(IS_ERR(skb))) {
+ *err = PTR_ERR(skb);
+ return NULL;
+ }
+ }
+ *peeked = 1;
+ refcount_inc(&skb->users);
+ } else {
+ __skb_unlink(skb, queue);
+ if (destructor)
+ destructor(sk, skb);
+ }
+ *off = _off;
+ return skb;
+ }
+ return NULL;
+}
+
/**
* __skb_try_recv_datagram - Receive a datagram skbuff
* @sk: socket
@@ -181,7 +220,7 @@ done:
*
* This function will lock the socket if a skb is returned, so
* the caller needs to unlock the socket in that case (usually by
- * calling skb_free_datagram). Returns NULL with *err set to
+ * calling skb_free_datagram). Returns NULL with @err set to
* -EAGAIN if no data was available or to some other value if an
* error was detected.
*
@@ -222,40 +261,14 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
* Look at current nfs client by the way...
* However, this function was correct in any case. 8)
*/
- int _off = *off;
-
- *last = (struct sk_buff *)queue;
spin_lock_irqsave(&queue->lock, cpu_flags);
- skb_queue_walk(queue, skb) {
- *last = skb;
- if (flags & MSG_PEEK) {
- if (_off >= skb->len && (skb->len || _off ||
- skb->peeked)) {
- _off -= skb->len;
- continue;
- }
- if (!skb->len) {
- skb = skb_set_peeked(skb);
- if (IS_ERR(skb)) {
- error = PTR_ERR(skb);
- spin_unlock_irqrestore(&queue->lock,
- cpu_flags);
- goto no_packet;
- }
- }
- *peeked = 1;
- atomic_inc(&skb->users);
- } else {
- __skb_unlink(skb, queue);
- if (destructor)
- destructor(sk, skb);
- }
- spin_unlock_irqrestore(&queue->lock, cpu_flags);
- *off = _off;
- return skb;
- }
-
+ skb = __skb_try_recv_from_queue(sk, queue, flags, destructor,
+ peeked, off, &error, last);
spin_unlock_irqrestore(&queue->lock, cpu_flags);
+ if (error)
+ goto no_packet;
+ if (skb)
+ return skb;
if (!sk_can_busy_loop(sk))
break;
@@ -317,9 +330,7 @@ void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len)
{
bool slow;
- if (likely(atomic_read(&skb->users) == 1))
- smp_rmb();
- else if (likely(!atomic_dec_and_test(&skb->users))) {
+ if (!skb_unref(skb)) {
sk_peek_offset_bwd(sk, len);
return;
}
@@ -335,8 +346,8 @@ void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len)
}
EXPORT_SYMBOL(__skb_free_datagram_locked);
-int __sk_queue_drop_skb(struct sock *sk, struct sk_buff *skb,
- unsigned int flags,
+int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue,
+ struct sk_buff *skb, unsigned int flags,
void (*destructor)(struct sock *sk,
struct sk_buff *skb))
{
@@ -344,15 +355,15 @@ int __sk_queue_drop_skb(struct sock *sk, struct sk_buff *skb,
if (flags & MSG_PEEK) {
err = -ENOENT;
- spin_lock_bh(&sk->sk_receive_queue.lock);
- if (skb == skb_peek(&sk->sk_receive_queue)) {
- __skb_unlink(skb, &sk->sk_receive_queue);
- atomic_dec(&skb->users);
+ spin_lock_bh(&sk_queue->lock);
+ if (skb == skb_peek(sk_queue)) {
+ __skb_unlink(skb, sk_queue);
+ refcount_dec(&skb->users);
if (destructor)
destructor(sk, skb);
err = 0;
}
- spin_unlock_bh(&sk->sk_receive_queue.lock);
+ spin_unlock_bh(&sk_queue->lock);
}
atomic_inc(&sk->sk_drops);
@@ -383,7 +394,8 @@ EXPORT_SYMBOL(__sk_queue_drop_skb);
int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
{
- int err = __sk_queue_drop_skb(sk, skb, flags, NULL);
+ int err = __sk_queue_drop_skb(sk, &sk->sk_receive_queue, skb, flags,
+ NULL);
kfree_skb(skb);
sk_mem_reclaim_partial(sk);
@@ -602,7 +614,7 @@ int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
skb->data_len += copied;
skb->len += copied;
skb->truesize += truesize;
- atomic_add(truesize, &skb->sk->sk_wmem_alloc);
+ refcount_add(truesize, &skb->sk->sk_wmem_alloc);
while (copied) {
int size = min_t(int, copied, PAGE_SIZE - start);
skb_fill_page_desc(skb, frag++, pages[n], start, size);
diff --git a/net/core/dev.c b/net/core/dev.c
index 96cf83da0d66..7098fba52be1 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -105,6 +105,7 @@
#include <net/dst.h>
#include <net/dst_metadata.h>
#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
#include <net/checksum.h>
#include <net/xfrm.h>
#include <linux/highmem.h>
@@ -142,6 +143,7 @@
#include <linux/hrtimer.h>
#include <linux/netfilter_ingress.h>
#include <linux/crash_dump.h>
+#include <linux/sctp.h>
#include "net-sysfs.h"
@@ -161,6 +163,7 @@ static int netif_rx_internal(struct sk_buff *skb);
static int call_netdevice_notifiers_info(unsigned long val,
struct net_device *dev,
struct netdev_notifier_info *info);
+static struct napi_struct *napi_by_id(unsigned int napi_id);
/*
* The @dev_base_head list is protected by @dev_base_lock and the rtnl
@@ -865,6 +868,31 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex)
EXPORT_SYMBOL(dev_get_by_index);
/**
+ * dev_get_by_napi_id - find a device by napi_id
+ * @napi_id: ID of the NAPI struct
+ *
+ * Search for an interface by NAPI ID. Returns %NULL if the device
+ * is not found or a pointer to the device. The device has not had
+ * its reference counter increased so the caller must be careful
+ * about locking. The caller must hold RCU lock.
+ */
+
+struct net_device *dev_get_by_napi_id(unsigned int napi_id)
+{
+ struct napi_struct *napi;
+
+ WARN_ON_ONCE(!rcu_read_lock_held());
+
+ if (napi_id < MIN_NAPI_ID)
+ return NULL;
+
+ napi = napi_by_id(napi_id);
+
+ return napi ? napi->dev : NULL;
+}
+EXPORT_SYMBOL(dev_get_by_napi_id);
+
+/**
* netdev_get_name - get a netdevice name, knowing its ifindex.
* @net: network namespace
* @name: a pointer to the buffer where the name will be stored.
@@ -1253,8 +1281,9 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
if (!new_ifalias)
return -ENOMEM;
dev->ifalias = new_ifalias;
+ memcpy(dev->ifalias, alias, len);
+ dev->ifalias[len] = 0;
- strlcpy(dev->ifalias, alias, len+1);
return len;
}
@@ -1833,7 +1862,7 @@ static inline int deliver_skb(struct sk_buff *skb,
{
if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
return -ENOMEM;
- atomic_inc(&skb->users);
+ refcount_inc(&skb->users);
return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
}
@@ -2455,10 +2484,10 @@ void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)
if (unlikely(!skb))
return;
- if (likely(atomic_read(&skb->users) == 1)) {
+ if (likely(refcount_read(&skb->users) == 1)) {
smp_rmb();
- atomic_set(&skb->users, 0);
- } else if (likely(!atomic_dec_and_test(&skb->users))) {
+ refcount_set(&skb->users, 0);
+ } else if (likely(!refcount_dec_and_test(&skb->users))) {
return;
}
get_kfree_skb_cb(skb)->reason = reason;
@@ -2611,6 +2640,47 @@ out:
}
EXPORT_SYMBOL(skb_checksum_help);
+int skb_crc32c_csum_help(struct sk_buff *skb)
+{
+ __le32 crc32c_csum;
+ int ret = 0, offset, start;
+
+ if (skb->ip_summed != CHECKSUM_PARTIAL)
+ goto out;
+
+ if (unlikely(skb_is_gso(skb)))
+ goto out;
+
+ /* Before computing a checksum, we should make sure no frag could
+ * be modified by an external entity : checksum could be wrong.
+ */
+ if (unlikely(skb_has_shared_frag(skb))) {
+ ret = __skb_linearize(skb);
+ if (ret)
+ goto out;
+ }
+ start = skb_checksum_start_offset(skb);
+ offset = start + offsetof(struct sctphdr, checksum);
+ if (WARN_ON_ONCE(offset >= skb_headlen(skb))) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if (skb_cloned(skb) &&
+ !skb_clone_writable(skb, offset + sizeof(__le32))) {
+ ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+ if (ret)
+ goto out;
+ }
+ crc32c_csum = cpu_to_le32(~__skb_checksum(skb, start,
+ skb->len - start, ~(__u32)0,
+ crc32c_csum_stub));
+ *(__le32 *)(skb->data + offset) = crc32c_csum;
+ skb->ip_summed = CHECKSUM_NONE;
+ skb->csum_not_inet = 0;
+out:
+ return ret;
+}
+
__be16 skb_network_protocol(struct sk_buff *skb, int *depth)
{
__be16 type = skb->protocol;
@@ -2953,6 +3023,17 @@ static struct sk_buff *validate_xmit_vlan(struct sk_buff *skb,
return skb;
}
+int skb_csum_hwoffload_help(struct sk_buff *skb,
+ const netdev_features_t features)
+{
+ if (unlikely(skb->csum_not_inet))
+ return !!(features & NETIF_F_SCTP_CRC) ? 0 :
+ skb_crc32c_csum_help(skb);
+
+ return !!(features & NETIF_F_CSUM_MASK) ? 0 : skb_checksum_help(skb);
+}
+EXPORT_SYMBOL(skb_csum_hwoffload_help);
+
static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev)
{
netdev_features_t features;
@@ -2991,8 +3072,7 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
else
skb_set_transport_header(skb,
skb_checksum_start_offset(skb));
- if (!(features & NETIF_F_CSUM_MASK) &&
- skb_checksum_help(skb))
+ if (skb_csum_hwoffload_help(skb, features))
goto out_kfree_skb;
}
}
@@ -3178,7 +3258,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
/* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
qdisc_bstats_cpu_update(cl->q, skb);
- switch (tc_classify(skb, cl, &cl_res, false)) {
+ switch (tcf_classify(skb, cl, &cl_res, false)) {
case TC_ACT_OK:
case TC_ACT_RECLASSIFY:
skb->tc_index = TC_H_MIN(cl_res.classid);
@@ -3190,6 +3270,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
return NULL;
case TC_ACT_STOLEN:
case TC_ACT_QUEUED:
+ case TC_ACT_TRAP:
*ret = NET_XMIT_SUCCESS;
consume_skb(skb);
return NULL;
@@ -3874,7 +3955,7 @@ static __latent_entropy void net_tx_action(struct softirq_action *h)
clist = clist->next;
- WARN_ON(atomic_read(&skb->users));
+ WARN_ON(refcount_read(&skb->users));
if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED))
trace_consume_skb(skb);
else
@@ -3948,7 +4029,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
skb->tc_at_ingress = 1;
qdisc_bstats_cpu_update(cl->q, skb);
- switch (tc_classify(skb, cl, &cl_res, false)) {
+ switch (tcf_classify(skb, cl, &cl_res, false)) {
case TC_ACT_OK:
case TC_ACT_RECLASSIFY:
skb->tc_index = TC_H_MIN(cl_res.classid);
@@ -3959,6 +4040,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
return NULL;
case TC_ACT_STOLEN:
case TC_ACT_QUEUED:
+ case TC_ACT_TRAP:
consume_skb(skb);
return NULL;
case TC_ACT_REDIRECT:
@@ -4260,13 +4342,12 @@ static struct static_key generic_xdp_needed __read_mostly;
static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp)
{
+ struct bpf_prog *old = rtnl_dereference(dev->xdp_prog);
struct bpf_prog *new = xdp->prog;
int ret = 0;
switch (xdp->command) {
- case XDP_SETUP_PROG: {
- struct bpf_prog *old = rtnl_dereference(dev->xdp_prog);
-
+ case XDP_SETUP_PROG:
rcu_assign_pointer(dev->xdp_prog, new);
if (old)
bpf_prog_put(old);
@@ -4278,10 +4359,10 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp)
dev_disable_lro(dev);
}
break;
- }
case XDP_QUERY_PROG:
- xdp->prog_attached = !!rcu_access_pointer(dev->xdp_prog);
+ xdp->prog_attached = !!old;
+ xdp->prog_id = old ? old->aux->id : 0;
break;
default:
@@ -4636,9 +4717,6 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
if (netif_elide_gro(skb->dev))
goto normal;
- if (skb->csum_bad)
- goto normal;
-
gro_list_prepare(napi, skb);
rcu_read_lock();
@@ -4766,6 +4844,13 @@ struct packet_offload *gro_find_complete_by_type(__be16 type)
}
EXPORT_SYMBOL(gro_find_complete_by_type);
+static void napi_skb_free_stolen_head(struct sk_buff *skb)
+{
+ skb_dst_drop(skb);
+ secpath_reset(skb);
+ kmem_cache_free(skbuff_head_cache, skb);
+}
+
static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
{
switch (ret) {
@@ -4779,13 +4864,10 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
break;
case GRO_MERGED_FREE:
- if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) {
- skb_dst_drop(skb);
- secpath_reset(skb);
- kmem_cache_free(skbuff_head_cache, skb);
- } else {
+ if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
+ napi_skb_free_stolen_head(skb);
+ else
__kfree_skb(skb);
- }
break;
case GRO_HELD:
@@ -4857,10 +4939,16 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi,
break;
case GRO_DROP:
- case GRO_MERGED_FREE:
napi_reuse_skb(napi, skb);
break;
+ case GRO_MERGED_FREE:
+ if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
+ napi_skb_free_stolen_head(skb);
+ else
+ napi_reuse_skb(napi, skb);
+ break;
+
case GRO_MERGED:
case GRO_CONSUMED:
break;
@@ -4948,6 +5036,19 @@ __sum16 __skb_gro_checksum_complete(struct sk_buff *skb)
}
EXPORT_SYMBOL(__skb_gro_checksum_complete);
+static void net_rps_send_ipi(struct softnet_data *remsd)
+{
+#ifdef CONFIG_RPS
+ while (remsd) {
+ struct softnet_data *next = remsd->rps_ipi_next;
+
+ if (cpu_online(remsd->cpu))
+ smp_call_function_single_async(remsd->cpu, &remsd->csd);
+ remsd = next;
+ }
+#endif
+}
+
/*
* net_rps_action_and_irq_enable sends any pending IPI's for rps.
* Note: called with local irq disabled, but exits with local irq enabled.
@@ -4963,14 +5064,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd)
local_irq_enable();
/* Send pending IPI's to kick RPS processing on remote cpus. */
- while (remsd) {
- struct softnet_data *next = remsd->rps_ipi_next;
-
- if (cpu_online(remsd->cpu))
- smp_call_function_single_async(remsd->cpu,
- &remsd->csd);
- remsd = next;
- }
+ net_rps_send_ipi(remsd);
} else
#endif
local_irq_enable();
@@ -5199,8 +5293,6 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock)
if (rc == BUSY_POLL_BUDGET)
__napi_schedule(napi);
local_bh_enable();
- if (local_softirq_pending())
- do_softirq();
}
void napi_busy_loop(unsigned int napi_id,
@@ -6852,6 +6944,39 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down)
}
EXPORT_SYMBOL(dev_change_proto_down);
+u8 __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op, u32 *prog_id)
+{
+ struct netdev_xdp xdp;
+
+ memset(&xdp, 0, sizeof(xdp));
+ xdp.command = XDP_QUERY_PROG;
+
+ /* Query must always succeed. */
+ WARN_ON(xdp_op(dev, &xdp) < 0);
+ if (prog_id)
+ *prog_id = xdp.prog_id;
+
+ return xdp.prog_attached;
+}
+
+static int dev_xdp_install(struct net_device *dev, xdp_op_t xdp_op,
+ struct netlink_ext_ack *extack, u32 flags,
+ struct bpf_prog *prog)
+{
+ struct netdev_xdp xdp;
+
+ memset(&xdp, 0, sizeof(xdp));
+ if (flags & XDP_FLAGS_HW_MODE)
+ xdp.command = XDP_SETUP_PROG_HW;
+ else
+ xdp.command = XDP_SETUP_PROG;
+ xdp.extack = extack;
+ xdp.flags = flags;
+ xdp.prog = prog;
+
+ return xdp_op(dev, &xdp);
+}
+
/**
* dev_change_xdp_fd - set or clear a bpf program for a device rx path
* @dev: device
@@ -6864,41 +6989,34 @@ EXPORT_SYMBOL(dev_change_proto_down);
int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
int fd, u32 flags)
{
- int (*xdp_op)(struct net_device *dev, struct netdev_xdp *xdp);
const struct net_device_ops *ops = dev->netdev_ops;
struct bpf_prog *prog = NULL;
- struct netdev_xdp xdp;
+ xdp_op_t xdp_op, xdp_chk;
int err;
ASSERT_RTNL();
- xdp_op = ops->ndo_xdp;
+ xdp_op = xdp_chk = ops->ndo_xdp;
+ if (!xdp_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE)))
+ return -EOPNOTSUPP;
if (!xdp_op || (flags & XDP_FLAGS_SKB_MODE))
xdp_op = generic_xdp_install;
+ if (xdp_op == xdp_chk)
+ xdp_chk = generic_xdp_install;
if (fd >= 0) {
- if (flags & XDP_FLAGS_UPDATE_IF_NOEXIST) {
- memset(&xdp, 0, sizeof(xdp));
- xdp.command = XDP_QUERY_PROG;
-
- err = xdp_op(dev, &xdp);
- if (err < 0)
- return err;
- if (xdp.prog_attached)
- return -EBUSY;
- }
+ if (xdp_chk && __dev_xdp_attached(dev, xdp_chk, NULL))
+ return -EEXIST;
+ if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) &&
+ __dev_xdp_attached(dev, xdp_op, NULL))
+ return -EBUSY;
prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);
if (IS_ERR(prog))
return PTR_ERR(prog);
}
- memset(&xdp, 0, sizeof(xdp));
- xdp.command = XDP_SETUP_PROG;
- xdp.extack = extack;
- xdp.prog = prog;
-
- err = xdp_op(dev, &xdp);
+ err = dev_xdp_install(dev, xdp_op, extack, flags, prog);
if (err < 0 && prog)
bpf_prog_put(prog);
@@ -6989,7 +7107,7 @@ static void rollback_registered_many(struct list_head *head)
if (!dev->rtnl_link_ops ||
dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
- skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U,
+ skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0,
GFP_KERNEL);
/*
@@ -7482,6 +7600,8 @@ out:
err_uninit:
if (dev->netdev_ops->ndo_uninit)
dev->netdev_ops->ndo_uninit(dev);
+ if (dev->priv_destructor)
+ dev->priv_destructor(dev);
goto out;
}
EXPORT_SYMBOL(register_netdevice);
@@ -7689,8 +7809,10 @@ void netdev_run_todo(void)
WARN_ON(rcu_access_pointer(dev->ip6_ptr));
WARN_ON(dev->dn_ptr);
- if (dev->destructor)
- dev->destructor(dev);
+ if (dev->priv_destructor)
+ dev->priv_destructor(dev);
+ if (dev->needs_free_netdev)
+ free_netdev(dev);
/* Report a network device has been unregistered */
rtnl_lock();
@@ -7713,7 +7835,7 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
{
#if BITS_PER_LONG == 64
BUILD_BUG_ON(sizeof(*stats64) < sizeof(*netdev_stats));
- memcpy(stats64, netdev_stats, sizeof(*stats64));
+ memcpy(stats64, netdev_stats, sizeof(*netdev_stats));
/* zero out counters that only exist in rtnl_link_stats64 */
memset((char *)stats64 + sizeof(*netdev_stats), 0,
sizeof(*stats64) - sizeof(*netdev_stats));
@@ -7755,9 +7877,9 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
} else {
netdev_stats_to_stats64(storage, &dev->stats);
}
- storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
- storage->tx_dropped += atomic_long_read(&dev->tx_dropped);
- storage->rx_nohandler += atomic_long_read(&dev->rx_nohandler);
+ storage->rx_dropped += (unsigned long)atomic_long_read(&dev->rx_dropped);
+ storage->tx_dropped += (unsigned long)atomic_long_read(&dev->tx_dropped);
+ storage->rx_nohandler += (unsigned long)atomic_long_read(&dev->rx_nohandler);
return storage;
}
EXPORT_SYMBOL(dev_get_stats);
@@ -8173,7 +8295,7 @@ static int dev_cpu_dead(unsigned int oldcpu)
struct sk_buff **list_skb;
struct sk_buff *skb;
unsigned int cpu;
- struct softnet_data *sd, *oldsd;
+ struct softnet_data *sd, *oldsd, *remsd = NULL;
local_irq_disable();
cpu = smp_processor_id();
@@ -8214,6 +8336,13 @@ static int dev_cpu_dead(unsigned int oldcpu)
raise_softirq_irqoff(NET_TX_SOFTIRQ);
local_irq_enable();
+#ifdef CONFIG_RPS
+ remsd = oldsd->rps_ipi_list;
+ oldsd->rps_ipi_list = NULL;
+#endif
+ /* send out pending IPI's on offline CPU */
+ net_rps_send_ipi(remsd);
+
/* Process offline CPU's input_pkt_queue */
while ((skb = __skb_dequeue(&oldsd->process_queue))) {
netif_rx_ni(skb);
@@ -8563,7 +8692,6 @@ static int __init net_dev_init(void)
rc = cpuhp_setup_state_nocalls(CPUHP_NET_DEV_DEAD, "net/dev:dead",
NULL, dev_cpu_dead);
WARN_ON(rc < 0);
- dst_subsys_init();
rc = 0;
out:
return rc;
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index b94b1d293506..82fd4c9c4a1b 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -225,6 +225,7 @@ static int net_hwtstamp_validate(struct ifreq *ifr)
case HWTSTAMP_FILTER_PTP_V2_EVENT:
case HWTSTAMP_FILTER_PTP_V2_SYNC:
case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+ case HWTSTAMP_FILTER_NTP_ALL:
rx_filter_valid = 1;
break;
}
@@ -410,6 +411,22 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
if (cmd == SIOCGIFNAME)
return dev_ifname(net, (struct ifreq __user *)arg);
+ /*
+ * Take care of Wireless Extensions. Unfortunately struct iwreq
+ * isn't a proper subset of struct ifreq (it's 8 byte shorter)
+ * so we need to treat it specially, otherwise applications may
+ * fault if the struct they're passing happens to land at the
+ * end of a mapped page.
+ */
+ if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
+ struct iwreq iwr;
+
+ if (copy_from_user(&iwr, arg, sizeof(iwr)))
+ return -EFAULT;
+
+ return wext_handle_ioctl(net, &iwr, cmd, arg);
+ }
+
if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
return -EFAULT;
@@ -559,9 +576,6 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
ret = -EFAULT;
return ret;
}
- /* Take care of Wireless Extensions */
- if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
- return wext_handle_ioctl(net, &ifr, cmd, arg);
return -ENOTTY;
}
}
diff --git a/net/core/devlink.c b/net/core/devlink.c
index b0b87a292e7c..a0adfc31a3fe 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -1680,8 +1680,10 @@ start_again:
hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
&devlink_nl_family, NLM_F_MULTI, cmd);
- if (!hdr)
+ if (!hdr) {
+ nlmsg_free(skb);
return -EMSGSIZE;
+ }
if (devlink_nl_put_handle(skb, devlink))
goto nla_put_failure;
@@ -2098,8 +2100,10 @@ start_again:
hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
&devlink_nl_family, NLM_F_MULTI, cmd);
- if (!hdr)
+ if (!hdr) {
+ nlmsg_free(skb);
return -EMSGSIZE;
+ }
if (devlink_nl_put_handle(skb, devlink))
goto nla_put_failure;
diff --git a/net/core/dst.c b/net/core/dst.c
index 960e503b5a52..00aa972ad1a1 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -42,108 +42,6 @@
* to dirty as few cache lines as possible in __dst_free().
* As this is not a very strong hint, we dont force an alignment on SMP.
*/
-static struct {
- spinlock_t lock;
- struct dst_entry *list;
- unsigned long timer_inc;
- unsigned long timer_expires;
-} dst_garbage = {
- .lock = __SPIN_LOCK_UNLOCKED(dst_garbage.lock),
- .timer_inc = DST_GC_MAX,
-};
-static void dst_gc_task(struct work_struct *work);
-static void ___dst_free(struct dst_entry *dst);
-
-static DECLARE_DELAYED_WORK(dst_gc_work, dst_gc_task);
-
-static DEFINE_MUTEX(dst_gc_mutex);
-/*
- * long lived entries are maintained in this list, guarded by dst_gc_mutex
- */
-static struct dst_entry *dst_busy_list;
-
-static void dst_gc_task(struct work_struct *work)
-{
- int delayed = 0;
- int work_performed = 0;
- unsigned long expires = ~0L;
- struct dst_entry *dst, *next, head;
- struct dst_entry *last = &head;
-
- mutex_lock(&dst_gc_mutex);
- next = dst_busy_list;
-
-loop:
- while ((dst = next) != NULL) {
- next = dst->next;
- prefetch(&next->next);
- cond_resched();
- if (likely(atomic_read(&dst->__refcnt))) {
- last->next = dst;
- last = dst;
- delayed++;
- continue;
- }
- work_performed++;
-
- dst = dst_destroy(dst);
- if (dst) {
- /* NOHASH and still referenced. Unless it is already
- * on gc list, invalidate it and add to gc list.
- *
- * Note: this is temporary. Actually, NOHASH dst's
- * must be obsoleted when parent is obsoleted.
- * But we do not have state "obsoleted, but
- * referenced by parent", so it is right.
- */
- if (dst->obsolete > 0)
- continue;
-
- ___dst_free(dst);
- dst->next = next;
- next = dst;
- }
- }
-
- spin_lock_bh(&dst_garbage.lock);
- next = dst_garbage.list;
- if (next) {
- dst_garbage.list = NULL;
- spin_unlock_bh(&dst_garbage.lock);
- goto loop;
- }
- last->next = NULL;
- dst_busy_list = head.next;
- if (!dst_busy_list)
- dst_garbage.timer_inc = DST_GC_MAX;
- else {
- /*
- * if we freed less than 1/10 of delayed entries,
- * we can sleep longer.
- */
- if (work_performed <= delayed/10) {
- dst_garbage.timer_expires += dst_garbage.timer_inc;
- if (dst_garbage.timer_expires > DST_GC_MAX)
- dst_garbage.timer_expires = DST_GC_MAX;
- dst_garbage.timer_inc += DST_GC_INC;
- } else {
- dst_garbage.timer_inc = DST_GC_INC;
- dst_garbage.timer_expires = DST_GC_MIN;
- }
- expires = dst_garbage.timer_expires;
- /*
- * if the next desired timer is more than 4 seconds in the
- * future then round the timer to whole seconds
- */
- if (expires > 4*HZ)
- expires = round_jiffies_relative(expires);
- schedule_delayed_work(&dst_gc_work, expires);
- }
-
- spin_unlock_bh(&dst_garbage.lock);
- mutex_unlock(&dst_gc_mutex);
-}
-
int dst_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
{
kfree_skb(skb);
@@ -151,13 +49,13 @@ int dst_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
}
EXPORT_SYMBOL(dst_discard_out);
-const u32 dst_default_metrics[RTAX_MAX + 1] = {
+const struct dst_metrics dst_default_metrics = {
/* This initializer is needed to force linker to place this variable
* into const section. Otherwise it might end into bss section.
* We really want to avoid false sharing on this variable, and catch
* any writes on it.
*/
- [RTAX_MAX] = 0xdeadbeef,
+ .refcnt = ATOMIC_INIT(1),
};
void dst_init(struct dst_entry *dst, struct dst_ops *ops,
@@ -169,7 +67,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
if (dev)
dev_hold(dev);
dst->ops = ops;
- dst_init_metrics(dst, dst_default_metrics, true);
+ dst_init_metrics(dst, dst_default_metrics.metrics, true);
dst->expires = 0UL;
dst->path = dst;
dst->from = NULL;
@@ -216,41 +114,12 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
}
EXPORT_SYMBOL(dst_alloc);
-static void ___dst_free(struct dst_entry *dst)
-{
- /* The first case (dev==NULL) is required, when
- protocol module is unloaded.
- */
- if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) {
- dst->input = dst_discard;
- dst->output = dst_discard_out;
- }
- dst->obsolete = DST_OBSOLETE_DEAD;
-}
-
-void __dst_free(struct dst_entry *dst)
-{
- spin_lock_bh(&dst_garbage.lock);
- ___dst_free(dst);
- dst->next = dst_garbage.list;
- dst_garbage.list = dst;
- if (dst_garbage.timer_inc > DST_GC_INC) {
- dst_garbage.timer_inc = DST_GC_INC;
- dst_garbage.timer_expires = DST_GC_MIN;
- mod_delayed_work(system_wq, &dst_gc_work,
- dst_garbage.timer_expires);
- }
- spin_unlock_bh(&dst_garbage.lock);
-}
-EXPORT_SYMBOL(__dst_free);
-
struct dst_entry *dst_destroy(struct dst_entry * dst)
{
struct dst_entry *child;
smp_rmb();
-again:
child = dst->child;
if (!(dst->flags & DST_NOCOUNT))
@@ -269,20 +138,8 @@ again:
kmem_cache_free(dst->ops->kmem_cachep, dst);
dst = child;
- if (dst) {
- int nohash = dst->flags & DST_NOHASH;
-
- if (atomic_dec_and_test(&dst->__refcnt)) {
- /* We were real parent of this dst, so kill child. */
- if (nohash)
- goto again;
- } else {
- /* Child is still referenced, return it for freeing. */
- if (nohash)
-