summaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c295
-rw-r--r--net/core/dev_ioctl.c7
-rw-r--r--net/core/ethtool.c81
-rw-r--r--net/core/filter.c8
-rw-r--r--net/core/flow_dissector.c115
-rw-r--r--net/core/netpoll.c7
-rw-r--r--net/core/pktgen.c28
-rw-r--r--net/core/rtnetlink.c66
-rw-r--r--net/core/secure_seq.c6
-rw-r--r--net/core/skbuff.c99
-rw-r--r--net/core/sock.c32
-rw-r--r--net/core/timestamping.c43
12 files changed, 472 insertions, 315 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index ab9a16530c36..3c6a967e5830 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2485,52 +2485,6 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
return 0;
}
-struct dev_gso_cb {
- void (*destructor)(struct sk_buff *skb);
-};
-
-#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
-
-static void dev_gso_skb_destructor(struct sk_buff *skb)
-{
- struct dev_gso_cb *cb;
-
- kfree_skb_list(skb->next);
- skb->next = NULL;
-
- cb = DEV_GSO_CB(skb);
- if (cb->destructor)
- cb->destructor(skb);
-}
-
-/**
- * dev_gso_segment - Perform emulated hardware segmentation on skb.
- * @skb: buffer to segment
- * @features: device features as applicable to this skb
- *
- * This function segments the given skb and stores the list of segments
- * in skb->next.
- */
-static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
-{
- struct sk_buff *segs;
-
- segs = skb_gso_segment(skb, features);
-
- /* Verifying header integrity only. */
- if (!segs)
- return 0;
-
- if (IS_ERR(segs))
- return PTR_ERR(segs);
-
- skb->next = segs;
- DEV_GSO_CB(skb)->destructor = skb->destructor;
- skb->destructor = dev_gso_skb_destructor;
-
- return 0;
-}
-
/* If MPLS offload request, verify we are testing hardware MPLS features
* instead of standard features for the netdev.
*/
@@ -2605,119 +2559,125 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
}
EXPORT_SYMBOL(netif_skb_features);
-int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
- struct netdev_queue *txq)
+static int xmit_one(struct sk_buff *skb, struct net_device *dev,
+ struct netdev_queue *txq, bool more)
{
- const struct net_device_ops *ops = dev->netdev_ops;
+ unsigned int len;
+ int rc;
+
+ if (!list_empty(&ptype_all))
+ dev_queue_xmit_nit(skb, dev);
+
+ len = skb->len;
+ trace_net_dev_start_xmit(skb, dev);
+ rc = netdev_start_xmit(skb, dev, txq, more);
+ trace_net_dev_xmit(skb, rc, dev, len);
+
+ return rc;
+}
+
+struct sk_buff *dev_hard_start_xmit(struct sk_buff *first, struct net_device *dev,
+ struct netdev_queue *txq, int *ret)
+{
+ struct sk_buff *skb = first;
int rc = NETDEV_TX_OK;
- unsigned int skb_len;
- if (likely(!skb->next)) {
- netdev_features_t features;
+ while (skb) {
+ struct sk_buff *next = skb->next;
- /*
- * If device doesn't need skb->dst, release it right now while
- * its hot in this cpu cache
- */
- if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
- skb_dst_drop(skb);
+ skb->next = NULL;
+ rc = xmit_one(skb, dev, txq, next != NULL);
+ if (unlikely(!dev_xmit_complete(rc))) {
+ skb->next = next;
+ goto out;
+ }
- features = netif_skb_features(skb);
+ skb = next;
+ if (netif_xmit_stopped(txq) && skb) {
+ rc = NETDEV_TX_BUSY;
+ break;
+ }
+ }
- if (vlan_tx_tag_present(skb) &&
- !vlan_hw_offload_capable(features, skb->vlan_proto)) {
- skb = __vlan_put_tag(skb, skb->vlan_proto,
- vlan_tx_tag_get(skb));
- if (unlikely(!skb))
- goto out;
+out:
+ *ret = rc;
+ return skb;
+}
+struct sk_buff *validate_xmit_vlan(struct sk_buff *skb, netdev_features_t features)
+{
+ if (vlan_tx_tag_present(skb) &&
+ !vlan_hw_offload_capable(features, skb->vlan_proto)) {
+ skb = __vlan_put_tag(skb, skb->vlan_proto,
+ vlan_tx_tag_get(skb));
+ if (skb)
skb->vlan_tci = 0;
- }
+ }
+ return skb;
+}
- /* If encapsulation offload request, verify we are testing
- * hardware encapsulation features instead of standard
- * features for the netdev
- */
- if (skb->encapsulation)
- features &= dev->hw_enc_features;
+struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev)
+{
+ netdev_features_t features;
- if (netif_needs_gso(skb, features)) {
- if (unlikely(dev_gso_segment(skb, features)))
- goto out_kfree_skb;
- if (skb->next)
- goto gso;
- } else {
- if (skb_needs_linearize(skb, features) &&
- __skb_linearize(skb))
- goto out_kfree_skb;
+ if (skb->next)
+ return skb;
- /* If packet is not checksummed and device does not
- * support checksumming for this protocol, complete
- * checksumming here.
- */
- if (skb->ip_summed == CHECKSUM_PARTIAL) {
- if (skb->encapsulation)
- skb_set_inner_transport_header(skb,
- skb_checksum_start_offset(skb));
- else
- skb_set_transport_header(skb,
- skb_checksum_start_offset(skb));
- if (!(features & NETIF_F_ALL_CSUM) &&
- skb_checksum_help(skb))
- goto out_kfree_skb;
- }
- }
+ /* If device doesn't need skb->dst, release it right now while
+ * its hot in this cpu cache
+ */
+ if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
+ skb_dst_drop(skb);
- if (!list_empty(&ptype_all))
- dev_queue_xmit_nit(skb, dev);
+ features = netif_skb_features(skb);
+ skb = validate_xmit_vlan(skb, features);
+ if (unlikely(!skb))
+ goto out_null;
- skb_len = skb->len;
- trace_net_dev_start_xmit(skb, dev);
- rc = ops->ndo_start_xmit(skb, dev);
- trace_net_dev_xmit(skb, rc, dev, skb_len);
- if (rc == NETDEV_TX_OK)
- txq_trans_update(txq);
- return rc;
- }
+ /* If encapsulation offload request, verify we are testing
+ * hardware encapsulation features instead of standard
+ * features for the netdev
+ */
+ if (skb->encapsulation)
+ features &= dev->hw_enc_features;
-gso:
- do {
- struct sk_buff *nskb = skb->next;
+ if (netif_needs_gso(skb, features)) {
+ struct sk_buff *segs;
- skb->next = nskb->next;
- nskb->next = NULL;
+ segs = skb_gso_segment(skb, features);
+ kfree_skb(skb);
+ if (IS_ERR(segs))
+ segs = NULL;
+ skb = segs;
+ } else {
+ if (skb_needs_linearize(skb, features) &&
+ __skb_linearize(skb))
+ goto out_kfree_skb;
- if (!list_empty(&ptype_all))
- dev_queue_xmit_nit(nskb, dev);
-
- skb_len = nskb->len;
- trace_net_dev_start_xmit(nskb, dev);
- rc = ops->ndo_start_xmit(nskb, dev);
- trace_net_dev_xmit(nskb, rc, dev, skb_len);
- if (unlikely(rc != NETDEV_TX_OK)) {
- if (rc & ~NETDEV_TX_MASK)
- goto out_kfree_gso_skb;
- nskb->next = skb->next;
- skb->next = nskb;
- return rc;
+ /* If packet is not checksummed and device does not
+ * support checksumming for this protocol, complete
+ * checksumming here.
+ */
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ if (skb->encapsulation)
+ skb_set_inner_transport_header(skb,
+ skb_checksum_start_offset(skb));
+ else
+ skb_set_transport_header(skb,
+ skb_checksum_start_offset(skb));
+ if (!(features & NETIF_F_ALL_CSUM) &&
+ skb_checksum_help(skb))
+ goto out_kfree_skb;
}
- txq_trans_update(txq);
- if (unlikely(netif_xmit_stopped(txq) && skb->next))
- return NETDEV_TX_BUSY;
- } while (skb->next);
-
-out_kfree_gso_skb:
- if (likely(skb->next == NULL)) {
- skb->destructor = DEV_GSO_CB(skb)->destructor;
- consume_skb(skb);
- return rc;
}
+
+ return skb;
+
out_kfree_skb:
kfree_skb(skb);
-out:
- return rc;
+out_null:
+ return NULL;
}
-EXPORT_SYMBOL_GPL(dev_hard_start_xmit);
static void qdisc_pkt_len_init(struct sk_buff *skb)
{
@@ -2785,7 +2745,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
qdisc_bstats_update(q, skb);
- if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
+ skb = validate_xmit_skb(skb, dev);
+ if (skb && sch_direct_xmit(skb, q, dev, txq, root_lock)) {
if (unlikely(contended)) {
spin_unlock(&q->busylock);
contended = false;
@@ -2925,11 +2886,15 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)
goto recursion_alert;
+ skb = validate_xmit_skb(skb, dev);
+ if (!skb)
+ goto drop;
+
HARD_TX_LOCK(dev, txq, cpu);
if (!netif_xmit_stopped(txq)) {
__this_cpu_inc(xmit_recursion);
- rc = dev_hard_start_xmit(skb, dev, txq);
+ skb = dev_hard_start_xmit(skb, dev, txq, &rc);
__this_cpu_dec(xmit_recursion);
if (dev_xmit_complete(rc)) {
HARD_TX_UNLOCK(dev, txq);
@@ -2950,10 +2915,11 @@ recursion_alert:
}
rc = -ENETDOWN;
+drop:
rcu_read_unlock_bh();
atomic_long_inc(&dev->tx_dropped);
- kfree_skb(skb);
+ kfree_skb_list(skb);
return rc;
out:
rcu_read_unlock_bh();
@@ -3130,8 +3096,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
}
if (map) {
- tcpu = map->cpus[((u64) hash * map->len) >> 32];
-
+ tcpu = map->cpus[reciprocal_scale(hash, map->len)];
if (cpu_online(tcpu)) {
cpu = tcpu;
goto done;
@@ -3965,11 +3930,10 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
if (!(skb->dev->features & NETIF_F_GRO))
goto normal;
- if (skb_is_gso(skb) || skb_has_frag_list(skb))
+ if (skb_is_gso(skb) || skb_has_frag_list(skb) || skb->csum_bad)
goto normal;
gro_list_prepare(napi, skb);
- NAPI_GRO_CB(skb)->csum = skb->csum; /* Needed for CHECKSUM_COMPLETE */
rcu_read_lock();
list_for_each_entry_rcu(ptype, head, list) {
@@ -3983,6 +3947,22 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
NAPI_GRO_CB(skb)->free = 0;
NAPI_GRO_CB(skb)->udp_mark = 0;
+ /* Setup for GRO checksum validation */
+ switch (skb->ip_summed) {
+ case CHECKSUM_COMPLETE:
+ NAPI_GRO_CB(skb)->csum = skb->csum;
+ NAPI_GRO_CB(skb)->csum_valid = 1;
+ NAPI_GRO_CB(skb)->csum_cnt = 0;
+ break;
+ case CHECKSUM_UNNECESSARY:
+ NAPI_GRO_CB(skb)->csum_cnt = skb->csum_level + 1;
+ NAPI_GRO_CB(skb)->csum_valid = 0;
+ break;
+ default:
+ NAPI_GRO_CB(skb)->csum_cnt = 0;
+ NAPI_GRO_CB(skb)->csum_valid = 0;
+ }
+
pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);
break;
}
@@ -4212,6 +4192,31 @@ gro_result_t napi_gro_frags(struct napi_struct *napi)
}
EXPORT_SYMBOL(napi_gro_frags);
+/* Compute the checksum from gro_offset and return the folded value
+ * after adding in any pseudo checksum.
+ */
+__sum16 __skb_gro_checksum_complete(struct sk_buff *skb)
+{
+ __wsum wsum;
+ __sum16 sum;
+
+ wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb), 0);
+
+ /* NAPI_GRO_CB(skb)->csum holds pseudo checksum */
+ sum = csum_fold(csum_add(NAPI_GRO_CB(skb)->csum, wsum));
+ if (likely(!sum)) {
+ if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
+ !skb->csum_complete_sw)
+ netdev_rx_csum_fault(skb->dev);
+ }
+
+ NAPI_GRO_CB(skb)->csum = wsum;
+ NAPI_GRO_CB(skb)->csum_valid = 1;
+
+ return sum;
+}
+EXPORT_SYMBOL(__skb_gro_checksum_complete);
+
/*
* net_rps_action_and_irq_enable sends any pending IPI's for rps.
* Note: called with local irq disabled, but exits with local irq enabled.
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index cf999e09bcd2..72e899a3efda 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -365,11 +365,8 @@ void dev_load(struct net *net, const char *name)
no_module = !dev;
if (no_module && capable(CAP_NET_ADMIN))
no_module = request_module("netdev-%s", name);
- if (no_module && capable(CAP_SYS_MODULE)) {
- if (!request_module("%s", name))
- pr_warn("Loading kernel module for a network device with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s instead.\n",
- name);
- }
+ if (no_module && capable(CAP_SYS_MODULE))
+ request_module("%s", name);
}
EXPORT_SYMBOL(dev_load);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 17cb912793fa..27e61b886520 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1621,6 +1621,80 @@ static int ethtool_get_module_eeprom(struct net_device *dev,
modinfo.eeprom_len);
}
+static int ethtool_tunable_valid(const struct ethtool_tunable *tuna)
+{
+ switch (tuna->id) {
+ case ETHTOOL_RX_COPYBREAK:
+ if (tuna->len != sizeof(u32) ||
+ tuna->type_id != ETHTOOL_TUNABLE_U32)
+ return -EINVAL;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int ethtool_get_tunable(struct net_device *dev, void __user *useraddr)
+{
+ int ret;
+ struct ethtool_tunable tuna;
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ void *data;
+
+ if (!ops->get_tunable)
+ return -EOPNOTSUPP;
+ if (copy_from_user(&tuna, useraddr, sizeof(tuna)))
+ return -EFAULT;
+ ret = ethtool_tunable_valid(&tuna);
+ if (ret)
+ return ret;
+ data = kmalloc(tuna.len, GFP_USER);
+ if (!data)
+ return -ENOMEM;
+ ret = ops->get_tunable(dev, &tuna, data);
+ if (ret)
+ goto out;
+ useraddr += sizeof(tuna);
+ ret = -EFAULT;
+ if (copy_to_user(useraddr, data, tuna.len))
+ goto out;
+ ret = 0;
+
+out:
+ kfree(data);
+ return ret;
+}
+
+static int ethtool_set_tunable(struct net_device *dev, void __user *useraddr)
+{
+ int ret;
+ struct ethtool_tunable tuna;
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ void *data;
+
+ if (!ops->set_tunable)
+ return -EOPNOTSUPP;
+ if (copy_from_user(&tuna, useraddr, sizeof(tuna)))
+ return -EFAULT;
+ ret = ethtool_tunable_valid(&tuna);
+ if (ret)
+ return ret;
+ data = kmalloc(tuna.len, GFP_USER);
+ if (!data)
+ return -ENOMEM;
+ useraddr += sizeof(tuna);
+ ret = -EFAULT;
+ if (copy_from_user(data, useraddr, tuna.len))
+ goto out;
+ ret = ops->set_tunable(dev, &tuna, data);
+
+out:
+ kfree(data);
+ return ret;
+}
+
/* The main entry point in this file. Called from net/core/dev_ioctl.c */
int dev_ethtool(struct net *net, struct ifreq *ifr)
@@ -1670,6 +1744,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_GCHANNELS:
case ETHTOOL_GET_TS_INFO:
case ETHTOOL_GEEE:
+ case ETHTOOL_GTUNABLE:
break;
default:
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
@@ -1857,6 +1932,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_GMODULEEEPROM:
rc = ethtool_get_module_eeprom(dev, useraddr);
break;
+ case ETHTOOL_GTUNABLE:
+ rc = ethtool_get_tunable(dev, useraddr);
+ break;
+ case ETHTOOL_STUNABLE:
+ rc = ethtool_set_tunable(dev, useraddr);
+ break;
default:
rc = -EOPNOTSUPP;
}
diff --git a/net/core/filter.c b/net/core/filter.c
index d814b8a89d0f..fa5b7d0f77ac 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -113,7 +113,7 @@ static unsigned int pkt_type_offset(void)
static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
{
- return __skb_get_poff((struct sk_buff *)(unsigned long) ctx);
+ return skb_get_poff((struct sk_buff *)(unsigned long) ctx);
}
static u64 __skb_get_nlattr(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
@@ -933,7 +933,7 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
/* Expand fp for appending the new filter representation. */
old_fp = fp;
- fp = krealloc(old_fp, bpf_prog_size(new_len), GFP_KERNEL);
+ fp = bpf_prog_realloc(old_fp, bpf_prog_size(new_len), 0);
if (!fp) {
/* The old_fp is still around in case we couldn't
* allocate new memory, so uncharge on that one.
@@ -1013,7 +1013,7 @@ int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog)
if (fprog->filter == NULL)
return -EINVAL;
- fp = kmalloc(bpf_prog_size(fprog->len), GFP_KERNEL);
+ fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
if (!fp)
return -ENOMEM;
@@ -1069,7 +1069,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
if (fprog->filter == NULL)
return -EINVAL;
- prog = kmalloc(bpf_fsize, GFP_KERNEL);
+ prog = bpf_prog_alloc(bpf_fsize, 0);
if (!prog)
return -ENOMEM;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 5f362c1d0332..8560dea58803 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -13,6 +13,7 @@
#include <linux/if_pppox.h>
#include <linux/ppp_defs.h>
#include <net/flow_keys.h>
+#include <scsi/fc/fc_fcoe.h>
/* copy saddr & daddr, possibly using 64bit load/store
* Equivalent to : flow->src = iph->saddr;
@@ -26,36 +27,61 @@ static void iph_to_flow_copy_addrs(struct flow_keys *flow, const struct iphdr *i
}
/**
- * skb_flow_get_ports - extract the upper layer ports and return them
- * @skb: buffer to extract the ports from
+ * __skb_flow_get_ports - extract the upper layer ports and return them
+ * @skb: sk_buff to extract the ports from
* @thoff: transport header offset
* @ip_proto: protocol for which to get port offset
+ * @data: raw buffer pointer to the packet, if NULL use skb->data
+ * @hlen: packet header length, if @data is NULL use skb_headlen(skb)
*
* The function will try to retrieve the ports at offset thoff + poff where poff
* is the protocol port offset returned from proto_ports_offset
*/
-__be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto)
+__be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
+ void *data, int hlen)
{
int poff = proto_ports_offset(ip_proto);
+ if (!data) {
+ data = skb->data;
+ hlen = skb_headlen(skb);
+ }
+
if (poff >= 0) {
__be32 *ports, _ports;
- ports = skb_header_pointer(skb, thoff + poff,
- sizeof(_ports), &_ports);
+ ports = __skb_header_pointer(skb, thoff + poff,
+ sizeof(_ports), data, hlen, &_ports);
if (ports)
return *ports;
}
return 0;
}
-EXPORT_SYMBOL(skb_flow_get_ports);
+EXPORT_SYMBOL(__skb_flow_get_ports);
-bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow)
+/**
+ * __skb_flow_dissect - extract the flow_keys struct and return it
+ * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified
+ * @data: raw buffer pointer to the packet, if NULL use skb->data
+ * @proto: protocol for which to get the flow, if @data is NULL use skb->protocol
+ * @nhoff: network header offset, if @data is NULL use skb_network_offset(skb)
+ * @hlen: packet header length, if @data is NULL use skb_headlen(skb)
+ *
+ * The function will try to retrieve the struct flow_keys from either the skbuff
+ * or a raw buffer specified by the rest parameters
+ */
+bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow,
+ void *data, __be16 proto, int nhoff, int hlen)
{
- int nhoff = skb_network_offset(skb);
u8 ip_proto;
- __be16 proto = skb->protocol;
+
+ if (!data) {
+ data = skb->data;
+ proto = skb->protocol;
+ nhoff = skb_network_offset(skb);
+ hlen = skb_headlen(skb);
+ }
memset(flow, 0, sizeof(*flow));
@@ -65,7 +91,7 @@ again:
const struct iphdr *iph;
struct iphdr _iph;
ip:
- iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
+ iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph);
if (!iph || iph->ihl < 5)
return false;
nhoff += iph->ihl * 4;
@@ -83,7 +109,7 @@ ip:
__be32 flow_label;
ipv6:
- iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
+ iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph);
if (!iph)
return false;
@@ -92,6 +118,13 @@ ipv6:
flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr);
nhoff += sizeof(struct ipv6hdr);
+ /* skip the flow label processing if skb is NULL. The
+ * assumption here is that if there is no skb we are not
+ * looking for flow info as much as we are length.
+ */
+ if (!skb)
+ break;
+
flow_label = ip6_flowlabel(iph);
if (flow_label) {
/* Awesome, IPv6 packet has a flow label so we can
@@ -113,7 +146,7 @@ ipv6:
const struct vlan_hdr *vlan;
struct vlan_hdr _vlan;
- vlan = skb_header_pointer(skb, nhoff, sizeof(_vlan), &_vlan);
+ vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan), data, hlen, &_vlan);
if (!vlan)
return false;
@@ -126,7 +159,7 @@ ipv6:
struct pppoe_hdr hdr;
__be16 proto;
} *hdr, _hdr;
- hdr = skb_header_pointer(skb, nhoff, sizeof(_hdr), &_hdr);
+ hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
if (!hdr)
return false;
proto = hdr->proto;
@@ -140,6 +173,9 @@ ipv6:
return false;
}
}
+ case htons(ETH_P_FCOE):
+ flow->thoff = (u16)(nhoff + FCOE_HEADER_LEN);
+ /* fall through */
default:
return false;
}
@@ -151,7 +187,7 @@ ipv6:
__be16 proto;
} *hdr, _hdr;
- hdr = skb_header_pointer(skb, nhoff, sizeof(_hdr), &_hdr);
+ hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
if (!hdr)
return false;
/*
@@ -171,8 +207,9 @@ ipv6:
const struct ethhdr *eth;
struct ethhdr _eth;
- eth = skb_header_pointer(skb, nhoff,
- sizeof(_eth), &_eth);
+ eth = __skb_header_pointer(skb, nhoff,
+ sizeof(_eth),
+ data, hlen, &_eth);
if (!eth)
return false;
proto = eth->h_proto;
@@ -194,12 +231,12 @@ ipv6:
flow->n_proto = proto;
flow->ip_proto = ip_proto;
- flow->ports = skb_flow_get_ports(skb, nhoff, ip_proto);
+ flow->ports = __skb_flow_get_ports(skb, nhoff, ip_proto, data, hlen);
flow->thoff = (u16) nhoff;
return true;
}
-EXPORT_SYMBOL(skb_flow_dissect);
+EXPORT_SYMBOL(__skb_flow_dissect);
static u32 hashrnd __read_mostly;
static __always_inline void __flow_hash_secret_init(void)
@@ -286,30 +323,22 @@ u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
qcount = dev->tc_to_txq[tc].count;
}
- return (u16) (((u64)skb_get_hash(skb) * qcount) >> 32) + qoffset;
+ return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset;
}
EXPORT_SYMBOL(__skb_tx_hash);
-/* __skb_get_poff() returns the offset to the payload as far as it could
- * be dissected. The main user is currently BPF, so that we can dynamically
- * truncate packets without needing to push actual payload to the user
- * space and can analyze headers only, instead.
- */
-u32 __skb_get_poff(const struct sk_buff *skb)
+u32 __skb_get_poff(const struct sk_buff *skb, void *data,
+ const struct flow_keys *keys, int hlen)
{
- struct flow_keys keys;
- u32 poff = 0;
-
- if (!skb_flow_dissect(skb, &keys))
- return 0;
+ u32 poff = keys->thoff;
- poff += keys.thoff;
- switch (keys.ip_proto) {
+ switch (keys->ip_proto) {
case IPPROTO_TCP: {
const struct tcphdr *tcph;
struct tcphdr _tcph;
- tcph = skb_header_pointer(skb, poff, sizeof(_tcph), &_tcph);
+ tcph = __skb_header_pointer(skb, poff, sizeof(_tcph),
+ data, hlen, &_tcph);
if (!tcph)
return poff;
@@ -343,6 +372,21 @@ u32 __skb_get_poff(const struct sk_buff *skb)
return poff;
}
+/* skb_get_poff() returns the offset to the payload as far as it could
+ * be dissected. The main user is currently BPF, so that we can dynamically
+ * truncate packets without needing to push actual payload to the user
+ * space and can analyze headers only, instead.
+ */
+u32 skb_get_poff(const struct sk_buff *skb)
+{
+ struct flow_keys keys;
+
+ if (!skb_flow_dissect(skb, &keys))
+ return 0;
+
+ return __skb_get_poff(skb, skb->data, &keys, skb_headlen(skb));
+}
+
static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
{
#ifdef CONFIG_XPS
@@ -359,9 +403,8 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
if (map->len == 1)
queue_index = map->queues[0];
else
- queue_index = map->queues[
- ((u64)skb_get_hash(skb) * map->len) >> 32];
-
+ queue_index = map->queues[reciprocal_scale(skb_get_hash(skb),
+ map->len)];
if (unlikely(queue_index >= dev->real_num_tx_queues))
queue_index = -1;
}
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 907fb5e36c02..e6645b4f330a 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -72,7 +72,6 @@ module_param(carrier_timeout, uint, 0644);
static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev,
struct netdev_queue *txq)
{
- const struct net_device_ops *ops = dev->netdev_ops;
int status = NETDEV_TX_OK;
netdev_features_t features;
@@ -92,9 +91,7 @@ static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev,
skb->vlan_tci = 0;
}
- status = ops->ndo_start_xmit(skb, dev);
- if (status == NETDEV_TX_OK)
- txq_trans_update(txq);
+ status = netdev_start_xmit(skb, dev, txq, false);
out:
return status;
@@ -116,7 +113,7 @@ static void queue_process(struct work_struct *work)
continue;
}
- txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
+ txq = skb_get_tx_queue(dev, skb);
local_irq_save(flags);
HARD_TX_LOCK(dev, txq, smp_processor_id());
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 8b849ddfef2e..21cb4839bc97 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -202,6 +202,7 @@
#define F_QUEUE_MAP_CPU (1<<14) /* queue map mirrors smp_processor_id() */
#define F_NODE (1<<15) /* Node memory alloc*/
#define F_UDPCSUM (1<<16) /* Include UDP checksum */
+#define F_NO_TIMESTAMP (1<<17) /* Don't timestamp packets (default TS) */
/* Thread control flag bits */
#define T_STOP (1<<0) /* Stop run */
@@ -638,6 +639,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
if (pkt_dev->flags & F_UDPCSUM)
seq_puts(seq, "UDPCSUM ");
+ if (pkt_dev->flags & F_NO_TIMESTAMP)
+ seq_puts(seq, "NO_TIMESTAMP ");
+
if (pkt_dev->flags & F_MPLS_RND)
seq_puts(seq, "MPLS_RND ");
@@ -1243,6 +1247,9 @@ static ssize_t pktgen_if_write(struct file *file,
else if (strcmp(f, "!UDPCSUM") == 0)
pkt_dev->flags &= ~F_UDPCSUM;
+ else if (strcmp(f, "NO_TIMESTAMP") == 0)
+ pkt_dev->flags |= F_NO_TIMESTAMP;
+
else {
sprintf(pg_result,
"Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s",
@@ -1251,6 +1258,7 @@ static ssize_t pktgen_if_write(struct file *file,
"MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, "
"MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, "
"QUEUE_MAP_RND, QUEUE_MAP_CPU, UDPCSUM, "
+ "NO_TIMESTAMP, "
#ifdef CONFIG_XFRM
"IPSEC, "
#endif
@@ -2685,9 +2693,14 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb,
pgh->pgh_magic = htonl(PKTGEN_MAGIC);
pgh->seq_num = htonl(pkt_dev->seq_num);
- do_gettimeofday(&timestamp);
- pgh->tv_sec = htonl(timestamp.tv_sec);
- pgh->tv_usec = htonl(timestamp.tv_usec);
+ if (pkt_dev->flags & F_NO_TIMESTAMP) {
+ pgh->tv_sec = 0;
+ pgh->tv_usec = 0;
+ } else {
+ do_gettimeofday(&timestamp);
+ pgh->tv_sec = htonl(timestamp.tv_sec);
+ pgh->tv_usec = htonl(timestamp.tv_usec);
+ }
}
static struct sk_buff *pktgen_alloc_skb(struct net_device *dev,
@@ -3285,10 +3298,7 @@ static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev)
static void pktgen_xmit(struct pktgen_dev *pkt_dev)
{
struct net_device *odev = pkt_dev->odev;
- netdev_tx_t (*xmit)(struct sk_buff *, struct net_device *)
- = odev->netdev_ops->ndo_start_xmit;
struct netdev_queue *txq;
- u16 queue_map;
int ret;
/* If device is offline, then don't send */
@@ -3326,8 +3336,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
if (pkt_dev->delay && pkt_dev->last_ok)
spin(pkt_dev, pkt_dev->next_tx);
- queue_map = skb_get_queue_mapping(pkt_dev->skb);
- txq = netdev_get_tx_queue(odev, queue_map);
+ txq = skb_get_tx_queue(odev, pkt_dev->skb);
local_bh_disable();
@@ -3339,11 +3348,10 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
goto unlock;
}
atomic_inc(&(pkt_dev->skb->users));
- ret = (*xmit)(pkt_dev->skb, odev);
+ ret = netdev_start_xmit(pkt_dev->skb, odev, txq, false);
switch (ret) {
case NETDEV_TX_OK:
- txq_trans_update(txq);
pkt_dev->last_ok = 1;
pkt_dev->sofar++;
pkt_dev->seq_num++;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index f0493e3b7471..a6882686ca3a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1481,9 +1481,12 @@ static int do_set_master(struct net_device *dev, int ifindex)
return 0;
}
+#define DO_SETLINK_MODIFIED 0x01
+/* notify flag means notify + modified. */
+#define DO_SETLINK_NOTIFY 0x03
static int do_setlink(const struct sk_buff *skb,
struct net_device *dev, struct ifinfomsg *ifm,
- struct nlattr **tb, char *ifname, int modified)
+ struct nlattr **tb, char *ifname, int status)
{
const struct net_device_ops *ops = dev->netdev_ops;
int err;
@@ -1502,7 +1505,7 @@ static int do_setlink(const struct sk_buff *skb,
put_net(net);
if (err)
goto errout;
- modified = 1;
+ status |= DO_SETLINK_MODIFIED;
}
if (tb[IFLA_MAP]) {
@@ -1531,7 +1534,7 @@ static int do_setlink(const struct sk_buff *skb,
if (err < 0)
goto errout;
- modified = 1;
+ status |= DO_SETLINK_NOTIFY;
}
if (tb[IFLA_ADDRESS]) {
@@ -1551,19 +1554,19 @@ static int do_setlink(const struct sk_buff *skb,
kfree(sa);
if (err)
goto errout;
- modified = 1;
+ status |= DO_SETLINK_MODIFIED;
}
if (tb[IFLA_MTU]) {
err = dev_set_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
if (err < 0)
goto errout;
- modified = 1;
+ status |= DO_SETLINK_MODIFIED;
}
if (tb[IFLA_GROUP]) {
dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP]));
- modified = 1;
+ status |= DO_SETLINK_NOTIFY;
}
/*
@@ -1575,7 +1578,7 @@ static int do_setlink(const struct sk_buff *skb,
err = dev_change_name(dev, ifname);
if (err < 0)
goto errout;
- modified = 1;
+ status |= DO_SETLINK_MODIFIED;
}
if (tb[IFLA_IFALIAS]) {
@@ -1583,7 +1586,7 @@ static int do_setlink(const struct sk_buff *skb,
nla_len(tb[IFLA_IFALIAS]));
if (err < 0)
goto errout;
- modified = 1;
+ status |= DO_SETLINK_NOTIFY;
}
if (tb[IFLA_BROADCAST]) {
@@ -1601,25 +1604,35 @@ static int do_setlink(const struct sk_buff *skb,
err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]));
if (err)
goto errout;
- modified = 1;
+ status |= DO_SETLINK_MODIFIED;
}
if (tb[IFLA_CARRIER]) {
err = dev_change_carrier(dev, nla_get_u8(tb[IFLA_CARRIER]));
if (err)
goto errout;
- modified = 1;
+ status |= DO_SETLINK_MODIFIED;
}
- if (tb[IFLA_TXQLEN])
- dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]);
+ if (tb[IFLA_TXQLEN]) {
+ unsigned long value = nla_get_u32(tb[IFLA_TXQLEN]);
+
+ if (dev->tx_queue_len ^ value)
+ status |= DO_SETLINK_NOTIFY;
+
+ dev->tx_queue_len = value;
+ }
if (tb[IFLA_OPERSTATE])
set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
if (tb[IFLA_LINKMODE]) {
+ unsigned char value = nla_get_u8(tb[IFLA_LINKMODE]);
+
write_lock_bh(&dev_base_lock);
- dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]);
+ if (dev->link_mode ^ value)
+ status |= DO_