summaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-12-27 13:04:52 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2018-12-27 13:04:52 -0800
commite0c38a4d1f196a4b17d2eba36afff8f656a4f1de (patch)
treeb26a69fabef0160adb127416a9744217700feeb7 /net/core
parent7f9f852c75e7d776b078813586c76a2bc7dca993 (diff)
parent90cadbbf341dd5b2df991c33a6bd6341f3a53788 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) New ipset extensions for matching on destination MAC addresses, from Stefano Brivio. 2) Add ipv4 ttl and tos, plus ipv6 flow label and hop limit offloads to nfp driver. From Stefano Brivio. 3) Implement GRO for plain UDP sockets, from Paolo Abeni. 4) Lots of work from Michał Mirosław to eliminate the VLAN_TAG_PRESENT bit so that we could support the entire vlan_tci value. 5) Rework the IPSEC policy lookups to better optimize more usecases, from Florian Westphal. 6) Infrastructure changes eliminating direct manipulation of SKB lists wherever possible, and to always use the appropriate SKB list helpers. This work is still ongoing... 7) Lots of PHY driver and state machine improvements and simplifications, from Heiner Kallweit. 8) Various TSO deferral refinements, from Eric Dumazet. 9) Add ntuple filter support to aquantia driver, from Dmitry Bogdanov. 10) Batch dropping of XDP packets in tuntap, from Jason Wang. 11) Lots of cleanups and improvements to the r8169 driver from Heiner Kallweit, including support for ->xmit_more. This driver has been getting some much needed love since he started working on it. 12) Lots of new forwarding selftests from Petr Machata. 13) Enable VXLAN learning in mlxsw driver, from Ido Schimmel. 14) Packed ring support for virtio, from Tiwei Bie. 15) Add new Aquantia AQtion USB driver, from Dmitry Bezrukov. 16) Add XDP support to dpaa2-eth driver, from Ioana Ciocoi Radulescu. 17) Implement coalescing on TCP backlog queue, from Eric Dumazet. 18) Implement carrier change in tun driver, from Nicolas Dichtel. 19) Support msg_zerocopy in UDP, from Willem de Bruijn. 20) Significantly improve garbage collection of neighbor objects when the table has many PERMANENT entries, from David Ahern. 21) Remove egdev usage from nfp and mlx5, and remove the facility completely from the tree as it no longer has any users. From Oz Shlomo and others. 22) Add a NETDEV_PRE_CHANGEADDR so that drivers can veto the change and therefore abort the operation before the commit phase (which is the NETDEV_CHANGEADDR event). From Petr Machata. 23) Add indirect call wrappers to avoid retpoline overhead, and use them in the GRO code paths. From Paolo Abeni. 24) Add support for netlink FDB get operations, from Roopa Prabhu. 25) Support bloom filter in mlxsw driver, from Nir Dotan. 26) Add SKB extension infrastructure. This consolidates the handling of the auxiliary SKB data used by IPSEC and bridge netfilter, and is designed to support the needs to MPTCP which could be integrated in the future. 27) Lots of XDP TX optimizations in mlx5 from Tariq Toukan. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1845 commits) net: dccp: fix kernel crash on module load drivers/net: appletalk/cops: remove redundant if statement and mask bnx2x: Fix NULL pointer dereference in bnx2x_del_all_vlans() on some hw net/net_namespace: Check the return value of register_pernet_subsys() net/netlink_compat: Fix a missing check of nla_parse_nested ieee802154: lowpan_header_create check must check daddr net/mlx4_core: drop useless LIST_HEAD mlxsw: spectrum: drop useless LIST_HEAD net/mlx5e: drop useless LIST_HEAD iptunnel: Set tun_flags in the iptunnel_metadata_reply from src net/mlx5e: fix semicolon.cocci warnings staging: octeon: fix build failure with XFRM enabled net: Revert recent Spectre-v1 patches. can: af_can: Fix Spectre v1 vulnerability packet: validate address length if non-zero nfc: af_nfc: Fix Spectre v1 vulnerability phonet: af_phonet: Fix Spectre v1 vulnerability net: core: Fix Spectre v1 vulnerability net: minor cleanup in skb_ext_add() net: drop the unused helper skb_ext_get() ...
Diffstat (limited to 'net/core')
-rw-r--r--net/core/datagram.c45
-rw-r--r--net/core/dev.c152
-rw-r--r--net/core/dev_addr_lists.c100
-rw-r--r--net/core/dev_ioctl.c4
-rw-r--r--net/core/devlink.c5
-rw-r--r--net/core/filter.c455
-rw-r--r--net/core/flow_dissector.c3
-rw-r--r--net/core/neighbour.c448
-rw-r--r--net/core/net-sysfs.c2
-rw-r--r--net/core/net_namespace.c162
-rw-r--r--net/core/netpoll.c2
-rw-r--r--net/core/rtnetlink.c517
-rw-r--r--net/core/skbuff.c313
-rw-r--r--net/core/skmsg.c23
-rw-r--r--net/core/sock.c14
-rw-r--r--net/core/sock_reuseport.c1
-rw-r--r--net/core/stream.c2
17 files changed, 1790 insertions, 458 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 57f3a6fcfc1e..4bf62b1afa3b 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -728,49 +728,6 @@ fault:
return -EFAULT;
}
-__sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
-{
- __sum16 sum;
-
- sum = csum_fold(skb_checksum(skb, 0, len, skb->csum));
- if (likely(!sum)) {
- if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
- !skb->csum_complete_sw)
- netdev_rx_csum_fault(skb->dev);
- }
- if (!skb_shared(skb))
- skb->csum_valid = !sum;
- return sum;
-}
-EXPORT_SYMBOL(__skb_checksum_complete_head);
-
-__sum16 __skb_checksum_complete(struct sk_buff *skb)
-{
- __wsum csum;
- __sum16 sum;
-
- csum = skb_checksum(skb, 0, skb->len, 0);
-
- /* skb->csum holds pseudo checksum */
- sum = csum_fold(csum_add(skb->csum, csum));
- if (likely(!sum)) {
- if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
- !skb->csum_complete_sw)
- netdev_rx_csum_fault(skb->dev);
- }
-
- if (!skb_shared(skb)) {
- /* Save full packet checksum */
- skb->csum = csum;
- skb->ip_summed = CHECKSUM_COMPLETE;
- skb->csum_complete_sw = 1;
- skb->csum_valid = !sum;
- }
-
- return sum;
-}
-EXPORT_SYMBOL(__skb_checksum_complete);
-
/**
* skb_copy_and_csum_datagram_msg - Copy and checksum skb to user iovec.
* @skb: skbuff
@@ -810,7 +767,7 @@ int skb_copy_and_csum_datagram_msg(struct sk_buff *skb,
if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
!skb->csum_complete_sw)
- netdev_rx_csum_fault(NULL);
+ netdev_rx_csum_fault(NULL, skb);
}
return 0;
fault:
diff --git a/net/core/dev.c b/net/core/dev.c
index 722d50dbf8a4..1b5a4410be0e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -145,6 +145,7 @@
#include <linux/sctp.h>
#include <net/udp_tunnel.h>
#include <linux/net_namespace.h>
+#include <linux/indirect_call_wrapper.h>
#include "net-sysfs.h"
@@ -162,6 +163,9 @@ static struct list_head offload_base __read_mostly;
static int netif_rx_internal(struct sk_buff *skb);
static int call_netdevice_notifiers_info(unsigned long val,
struct netdev_notifier_info *info);
+static int call_netdevice_notifiers_extack(unsigned long val,
+ struct net_device *dev,
+ struct netlink_ext_ack *extack);
static struct napi_struct *napi_by_id(unsigned int napi_id);
/*
@@ -1361,7 +1365,7 @@ void netdev_notify_peers(struct net_device *dev)
}
EXPORT_SYMBOL(netdev_notify_peers);
-static int __dev_open(struct net_device *dev)
+static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
{
const struct net_device_ops *ops = dev->netdev_ops;
int ret;
@@ -1377,7 +1381,7 @@ static int __dev_open(struct net_device *dev)
*/
netpoll_poll_disable(dev);
- ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
+ ret = call_netdevice_notifiers_extack(NETDEV_PRE_UP, dev, extack);
ret = notifier_to_errno(ret);
if (ret)
return ret;
@@ -1406,7 +1410,8 @@ static int __dev_open(struct net_device *dev)
/**
* dev_open - prepare an interface for use.
- * @dev: device to open
+ * @dev: device to open
+ * @extack: netlink extended ack
*
* Takes a device from down to up state. The device's private open
* function is invoked and then the multicast lists are loaded. Finally
@@ -1416,14 +1421,14 @@ static int __dev_open(struct net_device *dev)
* Calling this function on an active interface is a nop. On a failure
* a negative errno code is returned.
*/
-int dev_open(struct net_device *dev)
+int dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
{
int ret;
if (dev->flags & IFF_UP)
return 0;
- ret = __dev_open(dev);
+ ret = __dev_open(dev, extack);
if (ret < 0)
return ret;
@@ -1585,6 +1590,7 @@ const char *netdev_cmd_to_name(enum netdev_cmd cmd)
N(UDP_TUNNEL_DROP_INFO) N(CHANGE_TX_QUEUE_LEN)
N(CVLAN_FILTER_PUSH_INFO) N(CVLAN_FILTER_DROP_INFO)
N(SVLAN_FILTER_PUSH_INFO) N(SVLAN_FILTER_DROP_INFO)
+ N(PRE_CHANGEADDR)
}
#undef N
return "UNKNOWN_NETDEV_EVENT";
@@ -1733,6 +1739,18 @@ static int call_netdevice_notifiers_info(unsigned long val,
return raw_notifier_call_chain(&netdev_chain, val, info);
}
+static int call_netdevice_notifiers_extack(unsigned long val,
+ struct net_device *dev,
+ struct netlink_ext_ack *extack)
+{
+ struct netdev_notifier_info info = {
+ .dev = dev,
+ .extack = extack,
+ };
+
+ return call_netdevice_notifiers_info(val, &info);
+}
+
/**
* call_netdevice_notifiers - call all network notifier blocks
* @val: value passed unmodified to notifier function
@@ -1744,11 +1762,7 @@ static int call_netdevice_notifiers_info(unsigned long val,
int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
{
- struct netdev_notifier_info info = {
- .dev = dev,
- };
-
- return call_netdevice_notifiers_info(val, &info);
+ return call_netdevice_notifiers_extack(val, dev, NULL);
}
EXPORT_SYMBOL(call_netdevice_notifiers);
@@ -3096,10 +3110,17 @@ EXPORT_SYMBOL(__skb_gso_segment);
/* Take action when hardware reception checksum errors are detected. */
#ifdef CONFIG_BUG
-void netdev_rx_csum_fault(struct net_device *dev)
+void netdev_rx_csum_fault(struct net_device *dev, struct sk_buff *skb)
{
if (net_ratelimit()) {
pr_err("%s: hw csum failure\n", dev ? dev->name : "<unknown>");
+ if (dev)
+ pr_err("dev features: %pNF\n", &dev->features);
+ pr_err("skb len=%u data_len=%u pkt_type=%u gso_size=%u gso_type=%u nr_frags=%u ip_summed=%u csum=%x csum_complete_sw=%d csum_valid=%d csum_level=%u\n",
+ skb->len, skb->data_len, skb->pkt_type,
+ skb_shinfo(skb)->gso_size, skb_shinfo(skb)->gso_type,
+ skb_shinfo(skb)->nr_frags, skb->ip_summed, skb->csum,
+ skb->csum_complete_sw, skb->csum_valid, skb->csum_level);
dump_stack();
}
}
@@ -4525,9 +4546,14 @@ static int netif_rx_internal(struct sk_buff *skb)
int netif_rx(struct sk_buff *skb)
{
+ int ret;
+
trace_netif_rx_entry(skb);
- return netif_rx_internal(skb);
+ ret = netif_rx_internal(skb);
+ trace_netif_rx_exit(ret);
+
+ return ret;
}
EXPORT_SYMBOL(netif_rx);
@@ -4542,6 +4568,7 @@ int netif_rx_ni(struct sk_buff *skb)
if (local_softirq_pending())
do_softirq();
preempt_enable();
+ trace_netif_rx_ni_exit(err);
return err;
}
@@ -4894,7 +4921,7 @@ skip_classify:
* and set skb->priority like in vlan_do_receive()
* For the time being, just ignore Priority Code Point
*/
- skb->vlan_tci = 0;
+ __vlan_hwaccel_clear_tag(skb);
}
type = skb->protocol;
@@ -5227,9 +5254,14 @@ static void netif_receive_skb_list_internal(struct list_head *head)
*/
int netif_receive_skb(struct sk_buff *skb)
{
+ int ret;
+
trace_netif_receive_skb_entry(skb);
- return netif_receive_skb_internal(skb);
+ ret = netif_receive_skb_internal(skb);
+ trace_netif_receive_skb_exit(ret);
+
+ return ret;
}
EXPORT_SYMBOL(netif_receive_skb);
@@ -5249,9 +5281,12 @@ void netif_receive_skb_list(struct list_head *head)
if (list_empty(head))
return;
- list_for_each_entry(skb, head, list)
- trace_netif_receive_skb_list_entry(skb);
+ if (trace_netif_receive_skb_list_entry_enabled()) {
+ list_for_each_entry(skb, head, list)
+ trace_netif_receive_skb_list_entry(skb);
+ }
netif_receive_skb_list_internal(head);
+ trace_netif_receive_skb_list_exit(0);
}
EXPORT_SYMBOL(netif_receive_skb_list);
@@ -5304,6 +5339,8 @@ static void flush_all_backlogs(void)
put_online_cpus();
}
+INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int));
+INDIRECT_CALLABLE_DECLARE(int ipv6_gro_complete(struct sk_buff *, int));
static int napi_gro_complete(struct sk_buff *skb)
{
struct packet_offload *ptype;
@@ -5323,7 +5360,9 @@ static int napi_gro_complete(struct sk_buff *skb)
if (ptype->type != type || !ptype->callbacks.gro_complete)
continue;
- err = ptype->callbacks.gro_complete(skb, 0);
+ err = INDIRECT_CALL_INET(ptype->callbacks.gro_complete,
+ ipv6_gro_complete, inet_gro_complete,
+ skb, 0);
break;
}
rcu_read_unlock();
@@ -5362,11 +5401,13 @@ static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index,
*/
void napi_gro_flush(struct napi_struct *napi, bool flush_old)
{
- u32 i;
+ unsigned long bitmask = napi->gro_bitmask;
+ unsigned int i, base = ~0U;
- for (i = 0; i < GRO_HASH_BUCKETS; i++) {
- if (test_bit(i, &napi->gro_bitmask))
- __napi_gro_flush_chain(napi, i, flush_old);
+ while ((i = ffs(bitmask)) != 0) {
+ bitmask >>= i;
+ base += i;
+ __napi_gro_flush_chain(napi, base, flush_old);
}
}
EXPORT_SYMBOL(napi_gro_flush);
@@ -5391,7 +5432,9 @@ static struct list_head *gro_list_prepare(struct napi_struct *napi,
}
diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
- diffs |= p->vlan_tci ^ skb->vlan_tci;
+ diffs |= skb_vlan_tag_present(p) ^ skb_vlan_tag_present(skb);
+ if (skb_vlan_tag_present(p))
+ diffs |= p->vlan_tci ^ skb->vlan_tci;
diffs |= skb_metadata_dst_cmp(p, skb);
diffs |= skb_metadata_differs(p, skb);
if (maclen == ETH_HLEN)
@@ -5466,6 +5509,10 @@ static void gro_flush_oldest(struct list_head *head)
napi_gro_complete(oldest);
}
+INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *,
+ struct sk_buff *));
+INDIRECT_CALLABLE_DECLARE(struct sk_buff *ipv6_gro_receive(struct list_head *,
+ struct sk_buff *));
static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
u32 hash = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
@@ -5515,7 +5562,9 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
NAPI_GRO_CB(skb)->csum_valid = 0;
}
- pp = ptype->callbacks.gro_receive(gro_head, skb);
+ pp = INDIRECT_CALL_INET(ptype->callbacks.gro_receive,
+ ipv6_gro_receive, inet_gro_receive,
+ gro_head, skb);
break;
}
rcu_read_unlock();
@@ -5639,12 +5688,17 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
+ gro_result_t ret;
+
skb_mark_napi_id(skb, napi);
trace_napi_gro_receive_entry(skb);
skb_gro_reset_offset(skb);
- return napi_skb_finish(dev_gro_receive(napi, skb), skb);
+ ret = napi_skb_finish(dev_gro_receive(napi, skb), skb);
+ trace_napi_gro_receive_exit(ret);
+
+ return ret;
}
EXPORT_SYMBOL(napi_gro_receive);
@@ -5657,7 +5711,7 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
__skb_pull(skb, skb_headlen(skb));
/* restore the reserve we had after netdev_alloc_skb_ip_align() */
skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb));
- skb->vlan_tci = 0;
+ __vlan_hwaccel_clear_tag(skb);
skb->dev = napi->dev;
skb->skb_iif = 0;
@@ -5762,6 +5816,7 @@ static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
gro_result_t napi_gro_frags(struct napi_struct *napi)
{
+ gro_result_t ret;
struct sk_buff *skb = napi_frags_skb(napi);
if (!skb)
@@ -5769,7 +5824,10 @@ gro_result_t napi_gro_frags(struct napi_struct *napi)
trace_napi_gro_frags_entry(skb);
- return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));
+ ret = napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));
+ trace_napi_gro_frags_exit(ret);
+
+ return ret;
}
EXPORT_SYMBOL(napi_gro_frags);
@@ -5785,10 +5843,11 @@ __sum16 __skb_gro_checksum_complete(struct sk_buff *skb)
/* NAPI_GRO_CB(skb)->csum holds pseudo checksum */
sum = csum_fold(csum_add(NAPI_GRO_CB(skb)->csum, wsum));
+ /* See comments in __skb_checksum_complete(). */
if (likely(!sum)) {
if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
!skb->csum_complete_sw)
- netdev_rx_csum_fault(skb->dev);
+ netdev_rx_csum_fault(skb->dev, skb);
}
NAPI_GRO_CB(skb)->csum = wsum;
@@ -7467,7 +7526,8 @@ unsigned int dev_get_flags(const struct net_device *dev)
}
EXPORT_SYMBOL(dev_get_flags);
-int __dev_change_flags(struct net_device *dev, unsigned int flags)
+int __dev_change_flags(struct net_device *dev, unsigned int flags,
+ struct netlink_ext_ack *extack)
{
unsigned int old_flags = dev->flags;
int ret;
@@ -7504,7 +7564,7 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags)
if (old_flags & IFF_UP)
__dev_close(dev);
else
- ret = __dev_open(dev);
+ ret = __dev_open(dev, extack);
}
if ((flags ^ dev->gflags) & IFF_PROMISC) {
@@ -7564,16 +7624,18 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
* dev_change_flags - change device settings
* @dev: device
* @flags: device state flags
+ * @extack: netlink extended ack
*
* Change settings on device based state flags. The flags are
* in the userspace exported format.
*/
-int dev_change_flags(struct net_device *dev, unsigned int flags)
+int dev_change_flags(struct net_device *dev, unsigned int flags,
+ struct netlink_ext_ack *extack)
{
int ret;
unsigned int changes, old_flags = dev->flags, old_gflags = dev->gflags;
- ret = __dev_change_flags(dev, flags);
+ ret = __dev_change_flags(dev, flags, extack);
if (ret < 0)
return ret;
@@ -7706,13 +7768,36 @@ void dev_set_group(struct net_device *dev, int new_group)
EXPORT_SYMBOL(dev_set_group);
/**
+ * dev_pre_changeaddr_notify - Call NETDEV_PRE_CHANGEADDR.
+ * @dev: device
+ * @addr: new address
+ * @extack: netlink extended ack
+ */
+int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr,
+ struct netlink_ext_ack *extack)
+{
+ struct netdev_notifier_pre_changeaddr_info info = {
+ .info.dev = dev,
+ .info.extack = extack,
+ .dev_addr = addr,
+ };
+ int rc;
+
+ rc = call_netdevice_notifiers_info(NETDEV_PRE_CHANGEADDR, &info.info);
+ return notifier_to_errno(rc);
+}
+EXPORT_SYMBOL(dev_pre_changeaddr_notify);
+
+/**
* dev_set_mac_address - Change Media Access Control Address
* @dev: device
* @sa: new address
+ * @extack: netlink extended ack
*
* Change the hardware (MAC) address of the device
*/
-int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
+int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa,
+ struct netlink_ext_ack *extack)
{
const struct net_device_ops *ops = dev->netdev_ops;
int err;
@@ -7723,6 +7808,9 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
return -EINVAL;
if (!netif_device_present(dev))
return -ENODEV;
+ err = dev_pre_changeaddr_notify(dev, sa->sa_data, extack);
+ if (err)
+ return err;
err = ops->ndo_set_mac_address(dev, sa);
if (err)
return err;
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index d884d8f5f0e5..a6723b306717 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -278,6 +278,103 @@ int __hw_addr_sync_dev(struct netdev_hw_addr_list *list,
EXPORT_SYMBOL(__hw_addr_sync_dev);
/**
+ * __hw_addr_ref_sync_dev - Synchronize device's multicast address list taking
+ * into account references
+ * @list: address list to synchronize
+ * @dev: device to sync
+ * @sync: function to call if address or reference on it should be added
+ * @unsync: function to call if address or some reference on it should removed
+ *
+ * This function is intended to be called from the ndo_set_rx_mode
+ * function of devices that require explicit address or references on it
+ * add/remove notifications. The unsync function may be NULL in which case
+ * the addresses or references on it requiring removal will simply be
+ * removed without any notification to the device. That is responsibility of
+ * the driver to identify and distribute address or references on it between
+ * internal address tables.
+ **/
+int __hw_addr_ref_sync_dev(struct netdev_hw_addr_list *list,
+ struct net_device *dev,
+ int (*sync)(struct net_device *,
+ const unsigned char *, int),
+ int (*unsync)(struct net_device *,
+ const unsigned char *, int))
+{
+ struct netdev_hw_addr *ha, *tmp;
+ int err, ref_cnt;
+
+ /* first go through and flush out any unsynced/stale entries */
+ list_for_each_entry_safe(ha, tmp, &list->list, list) {
+ /* sync if address is not used */
+ if ((ha->sync_cnt << 1) <= ha->refcount)
+ continue;
+
+ /* if fails defer unsyncing address */
+ ref_cnt = ha->refcount - ha->sync_cnt;
+ if (unsync && unsync(dev, ha->addr, ref_cnt))
+ continue;
+
+ ha->refcount = (ref_cnt << 1) + 1;
+ ha->sync_cnt = ref_cnt;
+ __hw_addr_del_entry(list, ha, false, false);
+ }
+
+ /* go through and sync updated/new entries to the list */
+ list_for_each_entry_safe(ha, tmp, &list->list, list) {
+ /* sync if address added or reused */
+ if ((ha->sync_cnt << 1) >= ha->refcount)
+ continue;
+
+ ref_cnt = ha->refcount - ha->sync_cnt;
+ err = sync(dev, ha->addr, ref_cnt);
+ if (err)
+ return err;
+
+ ha->refcount = ref_cnt << 1;
+ ha->sync_cnt = ref_cnt;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(__hw_addr_ref_sync_dev);
+
+/**
+ * __hw_addr_ref_unsync_dev - Remove synchronized addresses and references on
+ * it from device
+ * @list: address list to remove synchronized addresses (references on it) from
+ * @dev: device to sync
+ * @unsync: function to call if address and references on it should be removed
+ *
+ * Remove all addresses that were added to the device by
+ * __hw_addr_ref_sync_dev(). This function is intended to be called from the
+ * ndo_stop or ndo_open functions on devices that require explicit address (or
+ * references on it) add/remove notifications. If the unsync function pointer
+ * is NULL then this function can be used to just reset the sync_cnt for the
+ * addresses in the list.
+ **/
+void __hw_addr_ref_unsync_dev(struct netdev_hw_addr_list *list,
+ struct net_device *dev,
+ int (*unsync)(struct net_device *,
+ const unsigned char *, int))
+{
+ struct netdev_hw_addr *ha, *tmp;
+
+ list_for_each_entry_safe(ha, tmp, &list->list, list) {
+ if (!ha->sync_cnt)
+ continue;
+
+ /* if fails defer unsyncing address */
+ if (unsync && unsync(dev, ha->addr, ha->sync_cnt))
+ continue;
+
+ ha->refcount -= ha->sync_cnt - 1;
+ ha->sync_cnt = 0;
+ __hw_addr_del_entry(list, ha, false, false);
+ }
+}
+EXPORT_SYMBOL(__hw_addr_ref_unsync_dev);
+
+/**
* __hw_addr_unsync_dev - Remove synchronized addresses from device
* @list: address list to remove synchronized addresses from
* @dev: device to sync
@@ -401,6 +498,9 @@ int dev_addr_add(struct net_device *dev, const unsigned char *addr,
ASSERT_RTNL();
+ err = dev_pre_changeaddr_notify(dev, addr, NULL);
+ if (err)
+ return err;
err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type);
if (!err)
call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 90e8aa36881e..31380fd5a4e2 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -234,7 +234,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
switch (cmd) {
case SIOCSIFFLAGS: /* Set interface flags */
- return dev_change_flags(dev, ifr->ifr_flags);
+ return dev_change_flags(dev, ifr->ifr_flags, NULL);
case SIOCSIFMETRIC: /* Set the metric on the interface
(currently unused) */
@@ -246,7 +246,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
case SIOCSIFHWADDR:
if (dev->addr_len > sizeof(struct sockaddr))
return -EINVAL;
- return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
+ return dev_set_mac_address(dev, &ifr->ifr_hwaddr, NULL);
case SIOCSIFHWBROADCAST:
if (ifr->ifr_hwaddr.sa_family != dev->type)
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 3a4b29a13d31..abb0da9d7b4b 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -2692,6 +2692,11 @@ static const struct devlink_param devlink_param_generic[] = {
.name = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MIN_NAME,
.type = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MIN_TYPE,
},
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_FW_LOAD_POLICY,
+ .name = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_NAME,
+ .type = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_TYPE,
+ },
};
static int devlink_param_generic_verify(const struct devlink_param *param)
diff --git a/net/core/filter.c b/net/core/filter.c
index 8d2c629501e2..447dd1bad31f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -296,22 +296,18 @@ static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg,
break;
case SKF_AD_VLAN_TAG:
- case SKF_AD_VLAN_TAG_PRESENT:
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
- BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
/* dst_reg = *(u16 *) (src_reg + offsetof(vlan_tci)) */
*insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
offsetof(struct sk_buff, vlan_tci));
- if (skb_field == SKF_AD_VLAN_TAG) {
- *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg,
- ~VLAN_TAG_PRESENT);
- } else {
- /* dst_reg >>= 12 */
- *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 12);
- /* dst_reg &= 1 */
+ break;
+ case SKF_AD_VLAN_TAG_PRESENT:
+ *insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_VLAN_PRESENT_OFFSET());
+ if (PKT_VLAN_PRESENT_BIT)
+ *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, PKT_VLAN_PRESENT_BIT);
+ if (PKT_VLAN_PRESENT_BIT < 7)
*insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, 1);
- }
break;
}
@@ -467,7 +463,8 @@ static bool convert_bpf_ld_abs(struct sock_filter *fp, struct bpf_insn **insnp)
bool ldx_off_ok = offset <= S16_MAX;
*insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_H);
- *insn++ = BPF_ALU64_IMM(BPF_SUB, BPF_REG_TMP, offset);
+ if (offset)
+ *insn++ = BPF_ALU64_IMM(BPF_SUB, BPF_REG_TMP, offset);
*insn++ = BPF_JMP_IMM(BPF_JSLT, BPF_REG_TMP,
size, 2 + endian + (!ldx_off_ok * 2));
if (ldx_off_ok) {
@@ -2428,6 +2425,174 @@ static const struct bpf_func_proto bpf_msg_push_data_proto = {
.arg4_type = ARG_ANYTHING,
};
+static void sk_msg_shift_left(struct sk_msg *msg, int i)
+{
+ int prev;
+
+ do {
+ prev = i;
+ sk_msg_iter_var_next(i);
+ msg->sg.data[prev] = msg->sg.data[i];
+ } while (i != msg->sg.end);
+
+ sk_msg_iter_prev(msg, end);
+}
+
+static void sk_msg_shift_right(struct sk_msg *msg, int i)
+{
+ struct scatterlist tmp, sge;
+
+ sk_msg_iter_next(msg, end);
+ sge = sk_msg_elem_cpy(msg, i);
+ sk_msg_iter_var_next(i);
+ tmp = sk_msg_elem_cpy(msg, i);
+
+ while (i != msg->sg.end) {
+ msg->sg.data[i] = sge;
+ sk_msg_iter_var_next(i);
+ sge = tmp;
+ tmp = sk_msg_elem_cpy(msg, i);
+ }
+}
+
+BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
+ u32, len, u64, flags)
+{
+ u32 i = 0, l, space, offset = 0;
+ u64 last = start + len;
+ int pop;
+
+ if (unlikely(flags))
+ return -EINVAL;
+
+ /* First find the starting scatterlist element */
+ i = msg->sg.start;
+ do {
+ l = sk_msg_elem(msg, i)->length;
+
+ if (start < offset + l)
+ break;
+ offset += l;
+ sk_msg_iter_var_next(i);
+ } while (i != msg->sg.end);
+
+ /* Bounds checks: start and pop must be inside message */
+ if (start >= offset + l || last >= msg->sg.size)
+ return -EINVAL;
+
+ space = MAX_MSG_FRAGS - sk_msg_elem_used(msg);
+
+ pop = len;
+ /* --------------| offset
+ * -| start |-------- len -------|
+ *
+ * |----- a ----|-------- pop -------|----- b ----|
+ * |______________________________________________| length
+ *
+ *
+ * a: region at front of scatter element to save
+ * b: region at back of scatter element to save when length > A + pop
+ * pop: region to pop from element, same as input 'pop' here will be
+ * decremented below per iteration.
+ *
+ * Two top-level cases to handle when start != offset, first B is non
+ * zero and second B is zero corresponding to when a pop includes more
+ * than one element.
+ *
+ * Then if B is non-zero AND there is no space allocate space and
+ * compact A, B regions into page. If there is space shift ring to
+ * the rigth free'ing the next element in ring to place B, leaving
+ * A untouched except to reduce length.
+ */
+ if (start != offset) {
+ struct scatterlist *nsge, *sge = sk_msg_elem(msg, i);
+ int a = start;
+ int b = sge->length - pop - a;
+
+ sk_msg_iter_var_next(i);
+
+ if (pop < sge->length - a) {
+ if (space) {
+ sge->length = a;
+ sk_msg_shift_right(msg, i);
+ nsge = sk_msg_elem(msg, i);
+ get_page(sg_page(sge));
+ sg_set_page(nsge,
+ sg_page(sge),
+ b, sge->offset + pop + a);
+ } else {
+ struct page *page, *orig;
+ u8 *to, *from;
+
+ page = alloc_pages(__GFP_NOWARN |
+ __GFP_COMP | GFP_ATOMIC,
+ get_order(a + b));
+ if (unlikely(!page))
+ return -ENOMEM;
+
+ sge->length = a;
+ orig = sg_page(sge);
+ from = sg_virt(sge);
+ to = page_address(page);
+ memcpy(to, from, a);
+ memcpy(to + a, from + a + pop, b);
+ sg_set_page(sge, page, a + b, 0);
+ put_page(orig);
+ }
+ pop = 0;
+ } else if (pop >= sge->length - a) {
+ sge->length = a;
+ pop -= (sge->length - a);
+ }
+ }
+
+ /* From above the current layout _must_ be as follows,
+ *
+ * -| offset
+ * -| start
+ *
+ * |---- pop ---|---------------- b ------------|
+ * |____________________________________________| length
+ *
+ * Offset and start of the current msg elem are equal because in the
+ * previous case we handled offset != start and either consumed the
+ * entire element and advanced to the next element OR pop == 0.
+ *
+ * Two cases to handle here are first pop is less than the length
+ * leaving some remainder b above. Simply adjust the element's layout
+ * in this case. Or pop >= length of the element so that b = 0. In this
+ * case advance to next element decrementing pop.
+ */
+ while (pop) {
+ struct scatterlist *sge = sk_msg_elem(msg, i);
+
+ if (pop < sge->length) {
+ sge->length -= pop;
+ sge->offset += pop;
+ pop = 0;
+ } else {
+ pop -= sge->length;
+ sk_msg_shift_left(msg, i);
+ }
+ sk_msg_iter_var_next(i);
+ }
+
+ sk_mem_uncharge(msg->sk, len - pop);
+ msg->sg.size -= (len - pop);
+ sk_msg_compute_data_pointers(msg);
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_msg_pop_data_proto = {
+ .func = bpf_msg_pop_data,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_ANYTHING,
+};
+
BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
{
return task_get_classid(skb);
@@ -3908,6 +4073,26 @@ static const struct bpf_func_proto bpf_get_socket_uid_proto = {
.arg1_type = ARG_PTR_TO_CTX,
};
+BPF_CALL_5(bpf_sockopt_event_output, struct bpf_sock_ops_kern *, bpf_sock,
+ struct bpf_map *, map, u64, flags, void *, data, u64, size)
+{
+ if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
+ return -EINVAL;
+
+ return bpf_event_output(map, flags, data, size, NULL, 0, NULL);
+}
+
+static const struct bpf_func_proto bpf_sockopt_event_output_proto = {
+ .func = bpf_sockopt_event_output,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_CONST_MAP_PTR,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_PTR_TO_MEM,
+ .arg5_type = ARG_CONST_SIZE_OR_ZERO,
+};
+
BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
int, level, int, optname, char *, optval, int, optlen)
{
@@ -4825,37 +5010,31 @@ static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = {
#ifdef CONFIG_INET
static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
- struct sk_buff *skb, u8 family, u8 proto)
+ int dif, int sdif, u8 family, u8 proto)
{
bool refcounted = false;
struct sock *sk = NULL;
- int dif = 0;
-
- if (skb->dev)
- dif = skb->dev->ifindex;
if (family == AF_INET) {
__be32 src4 = tuple->ipv4.saddr;