summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-02-03 13:16:55 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2018-02-03 13:16:55 -0800
commitc80c238a28b8a3b2f35898e9a49d9516f7d54346 (patch)
tree685b29ca3fc7ff6135173eb2cfe064593bcde278
parent6ec4de89b478043aa1c33f89f68f62ebf61b3e43 (diff)
parentedbe69ef2c90fc86998a74b08319a01c508bd497 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking fixes from David Miller: 1) The bnx2x can hang if you give it a GSO packet with a segment size which is too big for the hardware, detect and drop in this case. From Daniel Axtens. 2) Fix some overflows and pointer leaks in xtables, from Dmitry Vyukov. 3) Missing RCU locking in igmp, from Eric Dumazet. 4) Fix RX checksum handling on r8152, it can only checksum UDP and TCP packets. From Hayes Wang. 5) Minor pacing tweak to TCP BBR congestion control, from Neal Cardwell. 6) Missing RCU annotations in cls_u32, from Paolo Abeni. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (30 commits) Revert "defer call to mem_cgroup_sk_alloc()" soreuseport: fix mem leak in reuseport_add_sock() net: qlge: use memmove instead of skb_copy_to_linear_data net: qed: use correct strncpy() size net: cxgb4: avoid memcpy beyond end of source buffer cls_u32: add missing RCU annotation. r8152: set rx mode early when linking on r8152: fix wrong checksum status for received IPv4 packets nfp: fix TLV offset calculation net: pxa168_eth: add netconsole support net: igmp: add a missing rcu locking section ibmvnic: fix firmware version when no firmware level has been provided by the VIOS server vmxnet3: remove redundant initialization of pointer 'rq' lan78xx: remove redundant initialization of pointer 'phydev' net: jme: remove unused initialization of 'rxdesc' rtnetlink: remove check for IFLA_IF_NETNSID rocker: fix possible null pointer dereference in rocker_router_fib_event_work inet: Avoid unitialized variable warning in inet_unhash() net: bridge: Fix uninitialized error in br_fdb_sync_static() openvswitch: Remove padding from packet before L3+ conntrack processing ...
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c18
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h2
-rw-r--r--drivers/net/ethernet/emulex/benet/be_main.c3
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.c6
-rw-r--r--drivers/net/ethernet/jme.c2
-rw-r--r--drivers/net/ethernet/marvell/pxa168_eth.c12
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c2
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_debug.c6
-rw-r--r--drivers/net/ethernet/qlogic/qlge/qlge_main.c3
-rw-r--r--drivers/net/ethernet/rocker/rocker_main.c18
-rw-r--r--drivers/net/usb/lan78xx.c2
-rw-r--r--drivers/net/usb/r8152.c13
-rw-r--r--drivers/net/vmxnet3/vmxnet3_drv.c6
-rw-r--r--include/linux/skbuff.h16
-rw-r--r--mm/memcontrol.c14
-rw-r--r--net/bridge/br_fdb.c2
-rw-r--r--net/core/rtnetlink.c3
-rw-r--r--net/core/skbuff.c63
-rw-r--r--net/core/sock.c5
-rw-r--r--net/core/sock_reuseport.c35
-rw-r--r--net/ipv4/igmp.c4
-rw-r--r--net/ipv4/inet_connection_sock.c1
-rw-r--r--net/ipv4/inet_hashtables.c6
-rw-r--r--net/ipv4/ip_sockglue.c14
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c16
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c6
-rw-r--r--net/ipv4/tcp_bbr.c6
-rw-r--r--net/ipv6/ipv6_sockglue.c17
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c18
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c26
-rw-r--r--net/netfilter/ipset/ip_set_hash_net.c9
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c9
-rw-r--r--net/netfilter/ipset/ip_set_hash_netnet.c28
-rw-r--r--net/netfilter/ipset/ip_set_hash_netport.c19
-rw-r--r--net/netfilter/ipset/ip_set_hash_netportnet.c35
-rw-r--r--net/netfilter/x_tables.c9
-rw-r--r--net/netfilter/xt_IDLETIMER.c1
-rw-r--r--net/netfilter/xt_LED.c1
-rw-r--r--net/netfilter/xt_limit.c3
-rw-r--r--net/netfilter/xt_nfacct.c1
-rw-r--r--net/netfilter/xt_statistic.c1
-rw-r--r--net/openvswitch/conntrack.c34
-rw-r--r--net/sched/cls_u32.c12
-rw-r--r--net/sched/sch_tbf.c10
44 files changed, 326 insertions, 191 deletions
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 7b08323e3f3d..74fc9af4aadb 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -12934,6 +12934,24 @@ static netdev_features_t bnx2x_features_check(struct sk_buff *skb,
struct net_device *dev,
netdev_features_t features)
{
+ /*
+ * A skb with gso_size + header length > 9700 will cause a
+ * firmware panic. Drop GSO support.
+ *
+ * Eventually the upper layer should not pass these packets down.
+ *
+ * For speed, if the gso_size is <= 9000, assume there will
+ * not be 700 bytes of headers and pass it through. Only do a
+ * full (slow) validation if the gso_size is > 9000.
+ *
+ * (Due to the way SKB_BY_FRAGS works this will also do a full
+ * validation in that case.)
+ */
+ if (unlikely(skb_is_gso(skb) &&
+ (skb_shinfo(skb)->gso_size > 9000) &&
+ !skb_gso_validate_mac_len(skb, 9700)))
+ features &= ~NETIF_F_GSO_MASK;
+
features = vlan_features_check(skb, features);
return vxlan_features_check(skb, features);
}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index 1d37672902da..a14e8db51cdc 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -355,7 +355,7 @@ struct cxgb4_lld_info {
};
struct cxgb4_uld_info {
- const char *name;
+ char name[IFNAMSIZ];
void *handle;
unsigned int nrxq;
unsigned int rxq_size;
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index c36c81959198..d81e2d37bc3d 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -991,9 +991,8 @@ static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
{
u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
struct device *dev = &adapter->pdev->dev;
- struct be_queue_info *txq = &txo->q;
bool map_single = false;
- u32 head = txq->head;
+ u32 head;
dma_addr_t busaddr;
int len;
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 56d6f1a4205a..5caaa9033841 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -3305,7 +3305,11 @@ static void handle_vpd_rsp(union ibmvnic_crq *crq,
*/
substr = strnstr(adapter->vpd->buff, "RM", adapter->vpd->len);
if (!substr) {
- dev_info(dev, "No FW level provided by VPD\n");
+ dev_info(dev, "Warning - No FW level has been provided in the VPD buffer by the VIOS Server\n");
+ ptr = strncpy((char *)adapter->fw_version, "N/A",
+ 3 * sizeof(char));
+ if (!ptr)
+ dev_err(dev, "Failed to inform that firmware version is unavailable to the adapter\n");
goto complete;
}
diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c
index 62d848df26ef..8a165842fa85 100644
--- a/drivers/net/ethernet/jme.c
+++ b/drivers/net/ethernet/jme.c
@@ -1071,7 +1071,7 @@ static int
jme_process_receive(struct jme_adapter *jme, int limit)
{
struct jme_ring *rxring = &(jme->rxring[0]);
- struct rxdesc *rxdesc = rxring->desc;
+ struct rxdesc *rxdesc;
int i, j, ccnt, desccnt, mask = jme->rx_ring_mask;
if (unlikely(!atomic_dec_and_test(&jme->rx_cleaning)))
diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c
index 7bbd86f08e5f..3a9730612a70 100644
--- a/drivers/net/ethernet/marvell/pxa168_eth.c
+++ b/drivers/net/ethernet/marvell/pxa168_eth.c
@@ -1362,6 +1362,15 @@ static int pxa168_eth_do_ioctl(struct net_device *dev, struct ifreq *ifr,
return -EOPNOTSUPP;
}
+#ifdef CONFIG_NET_POLL_CONTROLLER
+static void pxa168_eth_netpoll(struct net_device *dev)
+{
+ disable_irq(dev->irq);
+ pxa168_eth_int_handler(dev->irq, dev);
+ enable_irq(dev->irq);
+}
+#endif
+
static void pxa168_get_drvinfo(struct net_device *dev,
struct ethtool_drvinfo *info)
{
@@ -1390,6 +1399,9 @@ static const struct net_device_ops pxa168_eth_netdev_ops = {
.ndo_do_ioctl = pxa168_eth_do_ioctl,
.ndo_change_mtu = pxa168_eth_change_mtu,
.ndo_tx_timeout = pxa168_eth_tx_timeout,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+ .ndo_poll_controller = pxa168_eth_netpoll,
+#endif
};
static int pxa168_eth_probe(struct platform_device *pdev)
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c
index ffb402746ad4..1f9149bb2ae6 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c
@@ -65,7 +65,7 @@ int nfp_net_tlv_caps_parse(struct device *dev, u8 __iomem *ctrl_mem,
u32 hdr = readl(data);
length = FIELD_GET(NFP_NET_CFG_TLV_HEADER_LENGTH, hdr);
- offset = data - ctrl_mem + NFP_NET_CFG_TLV_BASE;
+ offset = data - ctrl_mem;
/* Advance past the header */
data += 4;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c
index f2633ec87a6a..fdf37abee3d3 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_debug.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c
@@ -3649,10 +3649,8 @@ static u32 qed_grc_dump_big_ram(struct qed_hwfn *p_hwfn,
BIT(big_ram->is_256b_bit_offset[dev_data->chip_id]) ? 256
: 128;
- strncpy(type_name, big_ram->instance_name,
- strlen(big_ram->instance_name));
- strncpy(mem_name, big_ram->instance_name,
- strlen(big_ram->instance_name));
+ strscpy(type_name, big_ram->instance_name, sizeof(type_name));
+ strscpy(mem_name, big_ram->instance_name, sizeof(mem_name));
/* Dump memory header */
offset += qed_grc_dump_mem_hdr(p_hwfn,
diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
index 7b97a9969046..50038d946ced 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c
+++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
@@ -1747,8 +1747,7 @@ static void ql_realign_skb(struct sk_buff *skb, int len)
*/
skb->data -= QLGE_SB_PAD - NET_IP_ALIGN;
skb->tail -= QLGE_SB_PAD - NET_IP_ALIGN;
- skb_copy_to_linear_data(skb, temp_addr,
- (unsigned int)len);
+ memmove(skb->data, temp_addr, len);
}
/*
diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c
index fc8f8bdf6579..056cb6093630 100644
--- a/drivers/net/ethernet/rocker/rocker_main.c
+++ b/drivers/net/ethernet/rocker/rocker_main.c
@@ -2902,6 +2902,12 @@ static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id)
goto err_alloc_ordered_workqueue;
}
+ err = rocker_probe_ports(rocker);
+ if (err) {
+ dev_err(&pdev->dev, "failed to probe ports\n");
+ goto err_probe_ports;
+ }
+
/* Only FIBs pointing to our own netdevs are programmed into
* the device, so no need to pass a callback.
*/
@@ -2918,22 +2924,16 @@ static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id)
rocker->hw.id = rocker_read64(rocker, SWITCH_ID);
- err = rocker_probe_ports(rocker);
- if (err) {
- dev_err(&pdev->dev, "failed to probe ports\n");
- goto err_probe_ports;
- }
-
dev_info(&pdev->dev, "Rocker switch with id %*phN\n",
(int)sizeof(rocker->hw.id), &rocker->hw.id);
return 0;
-err_probe_ports:
- unregister_switchdev_notifier(&rocker_switchdev_notifier);
err_register_switchdev_notifier:
unregister_fib_notifier(&rocker->fib_nb);
err_register_fib_notifier:
+ rocker_remove_ports(rocker);
+err_probe_ports:
destroy_workqueue(rocker->rocker_owq);
err_alloc_ordered_workqueue:
free_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_EVENT), rocker);
@@ -2961,9 +2961,9 @@ static void rocker_remove(struct pci_dev *pdev)
{
struct rocker *rocker = pci_get_drvdata(pdev);
- rocker_remove_ports(rocker);
unregister_switchdev_notifier(&rocker_switchdev_notifier);
unregister_fib_notifier(&rocker->fib_nb);
+ rocker_remove_ports(rocker);
rocker_write32(rocker, CONTROL, ROCKER_CONTROL_RESET);
destroy_workqueue(rocker->rocker_owq);
free_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_EVENT), rocker);
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index ec56ff29aac4..60a604cc7647 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -2006,7 +2006,7 @@ static int lan78xx_phy_init(struct lan78xx_net *dev)
{
int ret;
u32 mii_adv;
- struct phy_device *phydev = dev->net->phydev;
+ struct phy_device *phydev;
phydev = phy_find_first(dev->mdiobus);
if (!phydev) {
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 0657203ffb91..958b2e8b90f6 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -1848,11 +1848,9 @@ static u8 r8152_rx_csum(struct r8152 *tp, struct rx_desc *rx_desc)
if (opts2 & RD_IPV4_CS) {
if (opts3 & IPF)
checksum = CHECKSUM_NONE;
- else if ((opts2 & RD_UDP_CS) && (opts3 & UDPF))
- checksum = CHECKSUM_NONE;
- else if ((opts2 & RD_TCP_CS) && (opts3 & TCPF))
- checksum = CHECKSUM_NONE;
- else
+ else if ((opts2 & RD_UDP_CS) && !(opts3 & UDPF))
+ checksum = CHECKSUM_UNNECESSARY;
+ else if ((opts2 & RD_TCP_CS) && !(opts3 & TCPF))
checksum = CHECKSUM_UNNECESSARY;
} else if (opts2 & RD_IPV6_CS) {
if ((opts2 & RD_UDP_CS) && !(opts3 & UDPF))
@@ -3797,11 +3795,12 @@ static void set_carrier(struct r8152 *tp)
if (speed & LINK_STATUS) {
if (!netif_carrier_ok(netdev)) {
tp->rtl_ops.enable(tp);
- set_bit(RTL8152_SET_RX_MODE, &tp->flags);
netif_stop_queue(netdev);
napi_disable(napi);
netif_carrier_on(netdev);
rtl_start_rx(tp);
+ clear_bit(RTL8152_SET_RX_MODE, &tp->flags);
+ _rtl8152_set_rx_mode(netdev);
napi_enable(&tp->napi);
netif_wake_queue(netdev);
netif_info(tp, link, netdev, "carrier on\n");
@@ -4261,7 +4260,7 @@ static int rtl8152_post_reset(struct usb_interface *intf)
mutex_lock(&tp->control);
tp->rtl_ops.enable(tp);
rtl_start_rx(tp);
- rtl8152_set_rx_mode(netdev);
+ _rtl8152_set_rx_mode(netdev);
mutex_unlock(&tp->control);
}
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index cf95290b160c..8b39c160743d 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -2760,9 +2760,6 @@ static void
vmxnet3_adjust_rx_ring_size(struct vmxnet3_adapter *adapter)
{
size_t sz, i, ring0_size, ring1_size, comp_size;
- struct vmxnet3_rx_queue *rq = &adapter->rx_queue[0];
-
-
if (adapter->netdev->mtu <= VMXNET3_MAX_SKB_BUF_SIZE -
VMXNET3_MAX_ETH_HDR_SIZE) {
adapter->skb_buf_size = adapter->netdev->mtu +
@@ -2794,7 +2791,8 @@ vmxnet3_adjust_rx_ring_size(struct vmxnet3_adapter *adapter)
comp_size = ring0_size + ring1_size;
for (i = 0; i < adapter->num_rx_queues; i++) {
- rq = &adapter->rx_queue[i];
+ struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
+
rq->rx_ring[0].size = ring0_size;
rq->rx_ring[1].size = ring1_size;
rq->comp_ring.size = comp_size;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index ac89a93b7c83..5ebc0f869720 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3287,6 +3287,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen);
void skb_scrub_packet(struct sk_buff *skb, bool xnet);
unsigned int skb_gso_transport_seglen(const struct sk_buff *skb);
bool skb_gso_validate_mtu(const struct sk_buff *skb, unsigned int mtu);
+bool skb_gso_validate_mac_len(const struct sk_buff *skb, unsigned int len);
struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features);
struct sk_buff *skb_vlan_untag(struct sk_buff *skb);
int skb_ensure_writable(struct sk_buff *skb, int write_len);
@@ -4120,6 +4121,21 @@ static inline unsigned int skb_gso_network_seglen(const struct sk_buff *skb)
return hdr_len + skb_gso_transport_seglen(skb);
}
+/**
+ * skb_gso_mac_seglen - Return length of individual segments of a gso packet
+ *
+ * @skb: GSO skb
+ *
+ * skb_gso_mac_seglen is used to determine the real size of the
+ * individual segments, including MAC/L2, Layer3 (IP, IPv6) and L4
+ * headers (TCP/UDP).
+ */
+static inline unsigned int skb_gso_mac_seglen(const struct sk_buff *skb)
+{
+ unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
+ return hdr_len + skb_gso_transport_seglen(skb);
+}
+
/* Local Checksum Offload.
* Compute outer checksum based on the assumption that the
* inner checksum will be offloaded later.
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 0ae2dc3a1748..0937f2c52c7d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5747,6 +5747,20 @@ void mem_cgroup_sk_alloc(struct sock *sk)
if (!mem_cgroup_sockets_enabled)
return;
+ /*
+ * Socket cloning can throw us here with sk_memcg already
+ * filled. It won't however, necessarily happen from
+ * process context. So the test for root memcg given
+ * the current task's memcg won't help us in this case.
+ *
+ * Respecting the original socket's memcg is a better
+ * decision in this case.
+ */
+ if (sk->sk_memcg) {
+ css_get(&sk->sk_memcg->css);
+ return;
+ }
+
rcu_read_lock();
memcg = mem_cgroup_from_task(current);
if (memcg == root_mem_cgroup)
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index dc87fbc9a23b..d9e69e4514be 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -993,7 +993,7 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p)
{
struct net_bridge_fdb_entry *f, *tmp;
- int err;
+ int err = 0;
ASSERT_RTNL();
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 204297dffd2a..56af8e41abfc 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2802,9 +2802,6 @@ replay:
if (err < 0)
return err;
- if (tb[IFLA_IF_NETNSID])
- return -EOPNOTSUPP;
-
if (tb[IFLA_IFNAME])
nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
else
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 01e8285aea73..8c61c27c1b28 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4914,37 +4914,74 @@ unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
EXPORT_SYMBOL_GPL(skb_gso_transport_seglen);
/**
- * skb_gso_validate_mtu - Return in case such skb fits a given MTU
+ * skb_gso_size_check - check the skb size, considering GSO_BY_FRAGS
*
- * @skb: GSO skb
- * @mtu: MTU to validate against
+ * There are a couple of instances where we have a GSO skb, and we
+ * want to determine what size it would be after it is segmented.
*
- * skb_gso_validate_mtu validates if a given skb will fit a wanted MTU
- * once split.
+ * We might want to check:
+ * - L3+L4+payload size (e.g. IP forwarding)
+ * - L2+L3+L4+payload size (e.g. sanity check before passing to driver)
+ *
+ * This is a helper to do that correctly considering GSO_BY_FRAGS.
+ *
+ * @seg_len: The segmented length (from skb_gso_*_seglen). In the
+ * GSO_BY_FRAGS case this will be [header sizes + GSO_BY_FRAGS].
+ *
+ * @max_len: The maximum permissible length.
+ *
+ * Returns true if the segmented length <= max length.
*/
-bool skb_gso_validate_mtu(const struct sk_buff *skb, unsigned int mtu)
-{
+static inline bool skb_gso_size_check(const struct sk_buff *skb,
+ unsigned int seg_len,
+ unsigned int max_len) {
const struct skb_shared_info *shinfo = skb_shinfo(skb);
const struct sk_buff *iter;
- unsigned int hlen;
-
- hlen = skb_gso_network_seglen(skb);
if (shinfo->gso_size != GSO_BY_FRAGS)
- return hlen <= mtu;
+ return seg_len <= max_len;
/* Undo this so we can re-use header sizes */
- hlen -= GSO_BY_FRAGS;
+ seg_len -= GSO_BY_FRAGS;
skb_walk_frags(skb, iter) {
- if (hlen + skb_headlen(iter) > mtu)
+ if (seg_len + skb_headlen(iter) > max_len)
return false;
}
return true;
}
+
+/**
+ * skb_gso_validate_mtu - Return in case such skb fits a given MTU
+ *
+ * @skb: GSO skb
+ * @mtu: MTU to validate against
+ *
+ * skb_gso_validate_mtu validates if a given skb will fit a wanted MTU
+ * once split.
+ */
+bool skb_gso_validate_mtu(const struct sk_buff *skb, unsigned int mtu)
+{
+ return skb_gso_size_check(skb, skb_gso_network_seglen(skb), mtu);
+}
EXPORT_SYMBOL_GPL(skb_gso_validate_mtu);
+/**
+ * skb_gso_validate_mac_len - Will a split GSO skb fit in a given length?
+ *
+ * @skb: GSO skb
+ * @len: length to validate against
+ *
+ * skb_gso_validate_mac_len validates if a given skb will fit a wanted
+ * length once split, including L2, L3 and L4 headers and the payload.
+ */
+bool skb_gso_validate_mac_len(const struct sk_buff *skb, unsigned int len)
+{
+ return skb_gso_size_check(skb, skb_gso_mac_seglen(skb), len);
+}
+EXPORT_SYMBOL_GPL(skb_gso_validate_mac_len);
+
static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
{
if (skb_cow(skb, skb_headroom(skb)) < 0) {
diff --git a/net/core/sock.c b/net/core/sock.c
index 1033f8ab0547..e50e7b3f2223 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1683,16 +1683,13 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
newsk->sk_dst_pending_confirm = 0;
newsk->sk_wmem_queued = 0;
newsk->sk_forward_alloc = 0;
-
- /* sk->sk_memcg will be populated at accept() time */
- newsk->sk_memcg = NULL;
-
atomic_set(&newsk->sk_drops, 0);
newsk->sk_send_head = NULL;
newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
atomic_set(&newsk->sk_zckey, 0);
sock_reset_flag(newsk, SOCK_DONE);
+ mem_cgroup_sk_alloc(newsk);
cgroup_sk_alloc(&newsk->sk_cgrp_data);
rcu_read_lock();
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index c5bb52bc73a1..064acb04be0f 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -94,6 +94,16 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
return more_reuse;
}
+static void reuseport_free_rcu(struct rcu_head *head)
+{
+ struct sock_reuseport *reuse;
+
+ reuse = container_of(head, struct sock_reuseport, rcu);
+ if (reuse->prog)
+ bpf_prog_destroy(reuse->prog);
+ kfree(reuse);
+}
+
/**
* reuseport_add_sock - Add a socket to the reuseport group of another.
* @sk: New socket to add to the group.
@@ -102,7 +112,7 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
*/
int reuseport_add_sock(struct sock *sk, struct sock *sk2)
{
- struct sock_reuseport *reuse;
+ struct sock_reuseport *old_reuse, *reuse;
if (!rcu_access_pointer(sk2->sk_reuseport_cb)) {
int err = reuseport_alloc(sk2);
@@ -113,10 +123,13 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2)
spin_lock_bh(&reuseport_lock);
reuse = rcu_dereference_protected(sk2->sk_reuseport_cb,
- lockdep_is_held(&reuseport_lock)),
- WARN_ONCE(rcu_dereference_protected(sk->sk_reuseport_cb,
- lockdep_is_held(&reuseport_lock)),
- "socket already in reuseport group");
+ lockdep_is_held(&reuseport_lock));
+ old_reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
+ lockdep_is_held(&reuseport_lock));
+ if (old_reuse && old_reuse->num_socks != 1) {
+ spin_unlock_bh(&reuseport_lock);
+ return -EBUSY;
+ }
if (reuse->num_socks == reuse->max_socks) {
reuse = reuseport_grow(reuse);
@@ -134,19 +147,11 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2)
spin_unlock_bh(&reuseport_lock);
+ if (old_reuse)
+ call_rcu(&old_reuse->rcu, reuseport_free_rcu);
return 0;
}
-static void reuseport_free_rcu(struct rcu_head *head)
-{
- struct sock_reuseport *reuse;
-
- reuse = container_of(head, struct sock_reuseport, rcu);
- if (reuse->prog)
- bpf_prog_destroy(reuse->prog);
- kfree(reuse);
-}
-
void reuseport_detach_sock(struct sock *sk)
{
struct sock_reuseport *reuse;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 10f7f74a0831..f2402581fef1 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -386,7 +386,11 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
pip->frag_off = htons(IP_DF);
pip->ttl = 1;
pip->daddr = fl4.daddr;
+
+ rcu_read_lock();
pip->saddr = igmpv3_get_srcaddr(dev, &fl4);
+ rcu_read_unlock();
+
pip->protocol = IPPROTO_IGMP;
pip->tot_len = 0; /* filled in later */
ip_select_ident(net, skb, NULL);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 12410ec6f7f7..881ac6d046f2 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -475,7 +475,6 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern)
}