summaryrefslogtreecommitdiffstats
path: root/net/core/dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c202
1 files changed, 160 insertions, 42 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 522288177bbd..10684833f864 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -398,6 +398,74 @@ static RAW_NOTIFIER_HEAD(netdev_chain);
DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
EXPORT_PER_CPU_SYMBOL(softnet_data);
+#ifdef CONFIG_LOCKDEP
+/*
+ * register_netdevice() inits txq->_xmit_lock and sets lockdep class
+ * according to dev->type
+ */
+static const unsigned short netdev_lock_type[] = {
+ ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
+ ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
+ ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
+ ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
+ ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
+ ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
+ ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
+ ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
+ ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
+ ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
+ ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
+ ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
+ ARPHRD_FCFABRIC, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM,
+ ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE,
+ ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE};
+
+static const char *const netdev_lock_name[] = {
+ "_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
+ "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
+ "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
+ "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
+ "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
+ "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
+ "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
+ "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
+ "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
+ "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
+ "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
+ "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
+ "_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM",
+ "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE",
+ "_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"};
+
+static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
+
+static inline unsigned short netdev_lock_pos(unsigned short dev_type)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
+ if (netdev_lock_type[i] == dev_type)
+ return i;
+ /* the last key is used by default */
+ return ARRAY_SIZE(netdev_lock_type) - 1;
+}
+
+static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
+ unsigned short dev_type)
+{
+ int i;
+
+ i = netdev_lock_pos(dev_type);
+ lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
+ netdev_lock_name[i]);
+}
+#else
+static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
+ unsigned short dev_type)
+{
+}
+#endif
+
/*******************************************************************************
*
* Protocol management and registration routines
@@ -4549,6 +4617,11 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
xdp->data_meta = xdp->data;
xdp->data_end = xdp->data + hlen;
xdp->data_hard_start = skb->data - skb_headroom(skb);
+
+ /* SKB "head" area always have tailroom for skb_shared_info */
+ xdp->frame_sz = (void *)skb_end_pointer(skb) - xdp->data_hard_start;
+ xdp->frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+
orig_data_end = xdp->data_end;
orig_data = xdp->data;
eth = (struct ethhdr *)xdp->data;
@@ -4572,14 +4645,11 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
skb_reset_network_header(skb);
}
- /* check if bpf_xdp_adjust_tail was used. it can only "shrink"
- * pckt.
- */
- off = orig_data_end - xdp->data_end;
+ /* check if bpf_xdp_adjust_tail was used */
+ off = xdp->data_end - orig_data_end;
if (off != 0) {
skb_set_tail_pointer(skb, xdp->data_end - xdp->data);
- skb->len -= off;
-
+ skb->len += off; /* positive on grow, negative on shrink */
}
/* check if XDP changed eth hdr such SKB needs update */
@@ -4988,11 +5058,12 @@ static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
return 0;
}
-static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc,
+static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
struct packet_type **ppt_prev)
{
struct packet_type *ptype, *pt_prev;
rx_handler_func_t *rx_handler;
+ struct sk_buff *skb = *pskb;
struct net_device *orig_dev;
bool deliver_exact = false;
int ret = NET_RX_DROP;
@@ -5023,8 +5094,10 @@ another_round:
ret2 = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog), skb);
preempt_enable();
- if (ret2 != XDP_PASS)
- return NET_RX_DROP;
+ if (ret2 != XDP_PASS) {
+ ret = NET_RX_DROP;
+ goto out;
+ }
skb_reset_mac_len(skb);
}
@@ -5174,6 +5247,13 @@ drop:
}
out:
+ /* The invariant here is that if *ppt_prev is not NULL
+ * then skb should also be non-NULL.
+ *
+ * Apparently *ppt_prev assignment above holds this invariant due to
+ * skb dereferencing near it.
+ */
+ *pskb = skb;
return ret;
}
@@ -5183,7 +5263,7 @@ static int __netif_receive_skb_one_core(struct sk_buff *skb, bool pfmemalloc)
struct packet_type *pt_prev = NULL;
int ret;
- ret = __netif_receive_skb_core(skb, pfmemalloc, &pt_prev);
+ ret = __netif_receive_skb_core(&skb, pfmemalloc, &pt_prev);
if (pt_prev)
ret = INDIRECT_CALL_INET(pt_prev->func, ipv6_rcv, ip_rcv, skb,
skb->dev, pt_prev, orig_dev);
@@ -5261,7 +5341,7 @@ static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemallo
struct packet_type *pt_prev = NULL;
skb_list_del_init(skb);
- __netif_receive_skb_core(skb, pfmemalloc, &pt_prev);
+ __netif_receive_skb_core(&skb, pfmemalloc, &pt_prev);
if (!pt_prev)
continue;
if (pt_curr != pt_prev || od_curr != orig_dev) {
@@ -5340,6 +5420,18 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
struct bpf_prog *new = xdp->prog;
int ret = 0;
+ if (new) {
+ u32 i;
+
+ /* generic XDP does not work with DEVMAPs that can
+ * have a bpf_prog installed on an entry
+ */
+ for (i = 0; i < new->aux->used_map_cnt; i++) {
+ if (dev_map_can_have_prog(new->aux->used_maps[i]))
+ return -EINVAL;
+ }
+ }
+
switch (xdp->command) {
case XDP_SETUP_PROG:
rcu_assign_pointer(dev->xdp_prog, new);
@@ -6227,7 +6319,8 @@ EXPORT_SYMBOL(__napi_schedule_irqoff);
bool napi_complete_done(struct napi_struct *n, int work_done)
{
- unsigned long flags, val, new;
+ unsigned long flags, val, new, timeout = 0;
+ bool ret = true;
/*
* 1) Don't let napi dequeue from the cpu poll list
@@ -6239,20 +6332,23 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
NAPIF_STATE_IN_BUSY_POLL)))
return false;
+ if (work_done) {
+ if (n->gro_bitmask)
+ timeout = READ_ONCE(n->dev->gro_flush_timeout);
+ n->defer_hard_irqs_count = READ_ONCE(n->dev->napi_defer_hard_irqs);
+ }
+ if (n->defer_hard_irqs_count > 0) {
+ n->defer_hard_irqs_count--;
+ timeout = READ_ONCE(n->dev->gro_flush_timeout);
+ if (timeout)
+ ret = false;
+ }
if (n->gro_bitmask) {
- unsigned long timeout = 0;
-
- if (work_done)
- timeout = n->dev->gro_flush_timeout;
-
/* When the NAPI instance uses a timeout and keeps postponing
* it, we need to bound somehow the time packets are kept in
* the GRO layer
*/
napi_gro_flush(n, !!timeout);
- if (timeout)
- hrtimer_start(&n->timer, ns_to_ktime(timeout),
- HRTIMER_MODE_REL_PINNED);
}
gro_normal_list(n);
@@ -6284,7 +6380,10 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
return false;
}
- return true;
+ if (timeout)
+ hrtimer_start(&n->timer, ns_to_ktime(timeout),
+ HRTIMER_MODE_REL_PINNED);
+ return ret;
}
EXPORT_SYMBOL(napi_complete_done);
@@ -6464,7 +6563,7 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
/* Note : we use a relaxed variant of napi_schedule_prep() not setting
* NAPI_STATE_MISSED, since we do not react to a device IRQ.
*/
- if (napi->gro_bitmask && !napi_disable_pending(napi) &&
+ if (!napi_disable_pending(napi) &&
!test_and_set_bit(NAPI_STATE_SCHED, &napi->state))
__napi_schedule_irqoff(napi);
@@ -7786,6 +7885,28 @@ void netdev_bonding_info_change(struct net_device *dev,
}
EXPORT_SYMBOL(netdev_bonding_info_change);
+/**
+ * netdev_get_xmit_slave - Get the xmit slave of master device
+ * @skb: The packet
+ * @all_slaves: assume all the slaves are active
+ *
+ * The reference counters are not incremented so the caller must be
+ * careful with locks. The caller must hold RCU lock.
+ * %NULL is returned if no slave is found.
+ */
+
+struct net_device *netdev_get_xmit_slave(struct net_device *dev,
+ struct sk_buff *skb,
+ bool all_slaves)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ if (!ops->ndo_get_xmit_slave)
+ return NULL;
+ return ops->ndo_get_xmit_slave(dev, skb, all_slaves);
+}
+EXPORT_SYMBOL(netdev_get_xmit_slave);
+
static void netdev_adjacent_add_links(struct net_device *dev)
{
struct netdev_adjacent *iter;
@@ -8726,6 +8847,12 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
return -EINVAL;
}
+ if (prog->expected_attach_type == BPF_XDP_DEVMAP) {
+ NL_SET_ERR_MSG(extack, "BPF_XDP_DEVMAP programs can not be attached to a device");
+ bpf_prog_put(prog);
+ return -EINVAL;
+ }
+
/* prog->aux->id may be 0 for orphaned device-bound progs */
if (prog->aux->id && prog->aux->id == prog_id) {
bpf_prog_put(prog);
@@ -8907,11 +9034,13 @@ static void netdev_sync_lower_features(struct net_device *upper,
netdev_dbg(upper, "Disabling feature %pNF on lower dev %s.\n",
&feature, lower->name);
lower->wanted_features &= ~feature;
- netdev_update_features(lower);
+ __netdev_update_features(lower);
if (unlikely(lower->features & feature))
netdev_WARN(upper, "failed to disable %pNF on %s!\n",
&feature, lower->name);
+ else
+ netdev_features_change(lower);
}
}
}
@@ -9136,6 +9265,11 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev,
else
netif_dormant_off(dev);
+ if (rootdev->operstate == IF_OPER_TESTING)
+ netif_testing_on(dev);
+ else
+ netif_testing_off(dev);
+
if (netif_carrier_ok(rootdev))
netif_carrier_on(dev);
else
@@ -9196,7 +9330,7 @@ static void netdev_init_one_queue(struct net_device *dev,
{
/* Initialize queue lock */
spin_lock_init(&queue->_xmit_lock);
- lockdep_set_class(&queue->_xmit_lock, &dev->qdisc_xmit_lock_key);
+ netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
queue->xmit_lock_owner = -1;
netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
queue->dev = dev;
@@ -9243,22 +9377,6 @@ void netif_tx_stop_all_queues(struct net_device *dev)
}
EXPORT_SYMBOL(netif_tx_stop_all_queues);
-static void netdev_register_lockdep_key(struct net_device *dev)
-{
- lockdep_register_key(&dev->qdisc_tx_busylock_key);
- lockdep_register_key(&dev->qdisc_running_key);
- lockdep_register_key(&dev->qdisc_xmit_lock_key);
- lockdep_register_key(&dev->addr_list_lock_key);
-}
-
-static void netdev_unregister_lockdep_key(struct net_device *dev)
-{
- lockdep_unregister_key(&dev->qdisc_tx_busylock_key);
- lockdep_unregister_key(&dev->qdisc_running_key);
- lockdep_unregister_key(&dev->qdisc_xmit_lock_key);
- lockdep_unregister_key(&dev->addr_list_lock_key);
-}
-
void netdev_update_lockdep_key(struct net_device *dev)
{
lockdep_unregister_key(&dev->addr_list_lock_key);
@@ -9825,7 +9943,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev_net_set(dev, &init_net);
- netdev_register_lockdep_key(dev);
+ lockdep_register_key(&dev->addr_list_lock_key);
dev->gso_max_size = GSO_MAX_SIZE;
dev->gso_max_segs = GSO_MAX_SEGS;
@@ -9914,7 +10032,7 @@ void free_netdev(struct net_device *dev)
free_percpu(dev->xdp_bulkq);
dev->xdp_bulkq = NULL;
- netdev_unregister_lockdep_key(dev);
+ lockdep_unregister_key(&dev->addr_list_lock_key);
/* Compatibility with error handling in drivers */
if (dev->reg_state == NETREG_UNINITIALIZED) {