summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-03-23 11:29:49 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-03-23 11:29:49 -0700
commitf341d9f08ae01d90d8d0c135ae2edf4423e724c9 (patch)
treec9e9db67b586df67157a52096d787d67c4440a55 /net
parent093b995e3b55a0ae0670226ddfcb05bfbf0099ae (diff)
parent68c386590375b2aea5a3154f17882a30170707bf (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking fixes from David Miller: 1) Several netfilter fixes from Pablo and the crew: - Handle fragmented packets properly in netfilter conntrack, from Florian Westphal. - Fix SCTP ICMP packet handling, from Ying Xue. - Fix big-endian bug in nftables, from Liping Zhang. - Fix alignment of fake conntrack entry, from Steven Rostedt. 2) Fix feature flags setting in fjes driver, from Taku Izumi. 3) Openvswitch ipv6 tunnel source address not set properly, from Or Gerlitz. 4) Fix jumbo MTU handling in amd-xgbe driver, from Thomas Lendacky. 5) sk->sk_frag.page not released properly in some cases, from Eric Dumazet. 6) Fix RTNL deadlocks in nl80211, from Johannes Berg. 7) Fix erroneous RTNL lockdep splat in crypto, from Herbert Xu. 8) Cure improper inflight handling during AF_UNIX GC, from Andrey Ulanov. 9) sch_dsmark doesn't write to packet headers properly, from Eric Dumazet. 10) Fix SCM_TIMESTAMPING_OPT_STATS handling in TCP, from Soheil Hassas Yeganeh. 11) Add some IDs for Motorola qmi_wwan chips, from Tony Lindgren. 12) Fix nametbl deadlock in tipc, from Ying Xue. 13) GRO and LRO packets not counted correctly in mlx5 driver, from Gal Pressman. 14) Fix reset of internal PHYs in bcmgenet, from Doug Berger. 15) Fix hashmap allocation handling, from Alexei Starovoitov. 16) nl_fib_input() needs stronger netlink message length checking, from Eric Dumazet. 17) Fix double-free of sk->sk_filter during sock clone, from Daniel Borkmann. 18) Fix RX checksum offloading in aquantia driver, from Pavel Belous. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (85 commits) net:ethernet:aquantia: Fix for RX checksum offload. amd-xgbe: Fix the ECC-related bit position definitions sfc: cleanup a condition in efx_udp_tunnel_del() Bluetooth: btqcomsmd: fix compile-test dependency inet: frag: release spinlock before calling icmp_send() tcp: initialize icsk_ack.lrcvtime at session start time genetlink: fix counting regression on ctrl_dumpfamily() socket, bpf: fix sk_filter use after free in sk_clone_lock ipv4: provide stronger user input validation in nl_fib_input() bpf: fix hashmap extra_elems logic enic: update enic maintainers net: bcmgenet: remove bcmgenet_internal_phy_setup() ipv6: make sure to initialize sockc.tsflags before first use fjes: Do not load fjes driver if extended socket device is not power on. fjes: Do not load fjes driver if system does not have extended socket device. net/mlx5e: Count LRO packets correctly net/mlx5e: Count GSO packets correctly net/mlx5: Increase number of max QPs in default profile net/mlx5e: Avoid supporting udp tunnel port ndo for VF reps net/mlx5e: Use the proper UAPI values when offloading TC vlan actions ...
Diffstat (limited to 'net')
-rw-r--r--net/batman-adv/bat_iv_ogm.c11
-rw-r--r--net/batman-adv/bat_v.c14
-rw-r--r--net/batman-adv/fragmentation.c20
-rw-r--r--net/batman-adv/gateway_common.c5
-rw-r--r--net/batman-adv/soft-interface.c1
-rw-r--r--net/batman-adv/types.h2
-rw-r--r--net/bridge/br_fdb.c2
-rw-r--r--net/bridge/br_netfilter_hooks.c12
-rw-r--r--net/bridge/br_private.h9
-rw-r--r--net/core/netclassid_cgroup.c32
-rw-r--r--net/core/skbuff.c27
-rw-r--r--net/core/sock.c16
-rw-r--r--net/ipv4/fib_frontend.c3
-rw-r--r--net/ipv4/ip_fragment.c25
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c4
-rw-r--r--net/ipv4/netfilter/nf_nat_l3proto_ipv4.c5
-rw-r--r--net/ipv4/netfilter/nft_masq_ipv4.c8
-rw-r--r--net/ipv4/netfilter/nft_redir_ipv4.c8
-rw-r--r--net/ipv4/tcp.c3
-rw-r--r--net/ipv4/tcp_input.c2
-rw-r--r--net/ipv4/tcp_minisocks.c1
-rw-r--r--net/ipv6/netfilter/nft_masq_ipv6.c8
-rw-r--r--net/ipv6/netfilter/nft_redir_ipv6.c8
-rw-r--r--net/ipv6/route.c2
-rw-r--r--net/ipv6/udp.c2
-rw-r--r--net/mpls/af_mpls.c13
-rw-r--r--net/netfilter/nf_conntrack_core.c6
-rw-r--r--net/netfilter/nf_nat_proto_sctp.c13
-rw-r--r--net/netfilter/nf_tables_api.c4
-rw-r--r--net/netfilter/nft_ct.c21
-rw-r--r--net/netfilter/nft_meta.c40
-rw-r--r--net/netfilter/nft_nat.c8
-rw-r--r--net/netfilter/nft_set_bitmap.c165
-rw-r--r--net/netlink/af_netlink.c41
-rw-r--r--net/netlink/genetlink.c4
-rw-r--r--net/openvswitch/flow_netlink.c4
-rw-r--r--net/rxrpc/conn_event.c4
-rw-r--r--net/sched/sch_dsmark.c10
-rw-r--r--net/sctp/associola.c6
-rw-r--r--net/sctp/output.c7
-rw-r--r--net/sctp/outqueue.c11
-rw-r--r--net/socket.c13
-rw-r--r--net/tipc/subscr.c7
-rw-r--r--net/unix/garbage.c17
-rw-r--r--net/vmw_vsock/af_vsock.c14
-rw-r--r--net/vmw_vsock/virtio_transport.c42
-rw-r--r--net/vmw_vsock/virtio_transport_common.c7
-rw-r--r--net/wireless/nl80211.c127
48 files changed, 502 insertions, 312 deletions
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 7c3d994e90d8..71343d0fec94 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -2477,6 +2477,16 @@ static void batadv_iv_iface_activate(struct batadv_hard_iface *hard_iface)
batadv_iv_ogm_schedule(hard_iface);
}
+/**
+ * batadv_iv_init_sel_class - initialize GW selection class
+ * @bat_priv: the bat priv with all the soft interface information
+ */
+static void batadv_iv_init_sel_class(struct batadv_priv *bat_priv)
+{
+ /* set default TQ difference threshold to 20 */
+ atomic_set(&bat_priv->gw.sel_class, 20);
+}
+
static struct batadv_gw_node *
batadv_iv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
{
@@ -2823,6 +2833,7 @@ static struct batadv_algo_ops batadv_batman_iv __read_mostly = {
.del_if = batadv_iv_ogm_orig_del_if,
},
.gw = {
+ .init_sel_class = batadv_iv_init_sel_class,
.get_best_gw_node = batadv_iv_gw_get_best_gw_node,
.is_eligible = batadv_iv_gw_is_eligible,
#ifdef CONFIG_BATMAN_ADV_DEBUGFS
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index 0acd081dd286..a36c8e7291d6 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -668,6 +668,16 @@ err_ifinfo1:
return ret;
}
+/**
+ * batadv_v_init_sel_class - initialize GW selection class
+ * @bat_priv: the bat priv with all the soft interface information
+ */
+static void batadv_v_init_sel_class(struct batadv_priv *bat_priv)
+{
+ /* set default throughput difference threshold to 5Mbps */
+ atomic_set(&bat_priv->gw.sel_class, 50);
+}
+
static ssize_t batadv_v_store_sel_class(struct batadv_priv *bat_priv,
char *buff, size_t count)
{
@@ -1052,6 +1062,7 @@ static struct batadv_algo_ops batadv_batman_v __read_mostly = {
.dump = batadv_v_orig_dump,
},
.gw = {
+ .init_sel_class = batadv_v_init_sel_class,
.store_sel_class = batadv_v_store_sel_class,
.show_sel_class = batadv_v_show_sel_class,
.get_best_gw_node = batadv_v_gw_get_best_gw_node,
@@ -1092,9 +1103,6 @@ int batadv_v_mesh_init(struct batadv_priv *bat_priv)
if (ret < 0)
return ret;
- /* set default throughput difference threshold to 5Mbps */
- atomic_set(&bat_priv->gw.sel_class, 50);
-
return 0;
}
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 11a23fd6e1a0..8f964beaac28 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -404,7 +404,7 @@ out:
* batadv_frag_create - create a fragment from skb
* @skb: skb to create fragment from
* @frag_head: header to use in new fragment
- * @mtu: size of new fragment
+ * @fragment_size: size of new fragment
*
* Split the passed skb into two fragments: A new one with size matching the
* passed mtu and the old one with the rest. The new skb contains data from the
@@ -414,11 +414,11 @@ out:
*/
static struct sk_buff *batadv_frag_create(struct sk_buff *skb,
struct batadv_frag_packet *frag_head,
- unsigned int mtu)
+ unsigned int fragment_size)
{
struct sk_buff *skb_fragment;
unsigned int header_size = sizeof(*frag_head);
- unsigned int fragment_size = mtu - header_size;
+ unsigned int mtu = fragment_size + header_size;
skb_fragment = netdev_alloc_skb(NULL, mtu + ETH_HLEN);
if (!skb_fragment)
@@ -456,7 +456,7 @@ int batadv_frag_send_packet(struct sk_buff *skb,
struct sk_buff *skb_fragment;
unsigned int mtu = neigh_node->if_incoming->net_dev->mtu;
unsigned int header_size = sizeof(frag_header);
- unsigned int max_fragment_size, max_packet_size;
+ unsigned int max_fragment_size, num_fragments;
int ret;
/* To avoid merge and refragmentation at next-hops we never send
@@ -464,10 +464,15 @@ int batadv_frag_send_packet(struct sk_buff *skb,
*/
mtu = min_t(unsigned int, mtu, BATADV_FRAG_MAX_FRAG_SIZE);
max_fragment_size = mtu - header_size;
- max_packet_size = max_fragment_size * BATADV_FRAG_MAX_FRAGMENTS;
+
+ if (skb->len == 0 || max_fragment_size == 0)
+ return -EINVAL;
+
+ num_fragments = (skb->len - 1) / max_fragment_size + 1;
+ max_fragment_size = (skb->len - 1) / num_fragments + 1;
/* Don't even try to fragment, if we need more than 16 fragments */
- if (skb->len > max_packet_size) {
+ if (num_fragments > BATADV_FRAG_MAX_FRAGMENTS) {
ret = -EAGAIN;
goto free_skb;
}
@@ -507,7 +512,8 @@ int batadv_frag_send_packet(struct sk_buff *skb,
goto put_primary_if;
}
- skb_fragment = batadv_frag_create(skb, &frag_header, mtu);
+ skb_fragment = batadv_frag_create(skb, &frag_header,
+ max_fragment_size);
if (!skb_fragment) {
ret = -ENOMEM;
goto put_primary_if;
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index 5db2e43e3775..33940c5c74a8 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -253,6 +253,11 @@ static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
*/
void batadv_gw_init(struct batadv_priv *bat_priv)
{
+ if (bat_priv->algo_ops->gw.init_sel_class)
+ bat_priv->algo_ops->gw.init_sel_class(bat_priv);
+ else
+ atomic_set(&bat_priv->gw.sel_class, 1);
+
batadv_tvlv_handler_register(bat_priv, batadv_gw_tvlv_ogm_handler_v1,
NULL, BATADV_TVLV_GW, 1,
BATADV_TVLV_HANDLER_OGM_CIFNOTFND);
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 5d099b2e6cfc..d042c99af028 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -819,7 +819,6 @@ static int batadv_softif_init_late(struct net_device *dev)
atomic_set(&bat_priv->mcast.num_want_all_ipv6, 0);
#endif
atomic_set(&bat_priv->gw.mode, BATADV_GW_MODE_OFF);
- atomic_set(&bat_priv->gw.sel_class, 20);
atomic_set(&bat_priv->gw.bandwidth_down, 100);
atomic_set(&bat_priv->gw.bandwidth_up, 20);
atomic_set(&bat_priv->orig_interval, 1000);
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 66b25e410a41..246f21b4973b 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -1489,6 +1489,7 @@ struct batadv_algo_orig_ops {
/**
* struct batadv_algo_gw_ops - mesh algorithm callbacks (GW specific)
+ * @init_sel_class: initialize GW selection class (optional)
* @store_sel_class: parse and stores a new GW selection class (optional)
* @show_sel_class: prints the current GW selection class (optional)
* @get_best_gw_node: select the best GW from the list of available nodes
@@ -1499,6 +1500,7 @@ struct batadv_algo_orig_ops {
* @dump: dump gateways to a netlink socket (optional)
*/
struct batadv_algo_gw_ops {
+ void (*init_sel_class)(struct batadv_priv *bat_priv);
ssize_t (*store_sel_class)(struct batadv_priv *bat_priv, char *buff,
size_t count);
ssize_t (*show_sel_class)(struct batadv_priv *bat_priv, char *buff);
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 4f598dc2d916..6e08b7199dd7 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -106,7 +106,7 @@ static struct net_bridge_fdb_entry *br_fdb_find(struct net_bridge *br,
struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
struct net_bridge_fdb_entry *fdb;
- WARN_ON_ONCE(!br_hash_lock_held(br));
+ lockdep_assert_held_once(&br->hash_lock);
rcu_read_lock();
fdb = fdb_find_rcu(head, addr, vid);
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index fa87fbd62bb7..1f1e62095464 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -706,18 +706,20 @@ static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- struct nf_bridge_info *nf_bridge;
- unsigned int mtu_reserved;
+ struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
+ unsigned int mtu, mtu_reserved;
mtu_reserved = nf_bridge_mtu_reduction(skb);
+ mtu = skb->dev->mtu;
+
+ if (nf_bridge->frag_max_size && nf_bridge->frag_max_size < mtu)
+ mtu = nf_bridge->frag_max_size;
- if (skb_is_gso(skb) || skb->len + mtu_reserved <= skb->dev->mtu) {
+ if (skb_is_gso(skb) || skb->len + mtu_reserved <= mtu) {
nf_bridge_info_free(skb);
return br_dev_queue_push_xmit(net, sk, skb);
}
- nf_bridge = nf_bridge_info_get(skb);
-
/* This is wrong! We should preserve the original fragment
* boundaries by preserving frag_list rather than refragmenting.
*/
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 2288fca7756c..61368186edea 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -531,15 +531,6 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
const unsigned char *addr, u16 vid);
-static inline bool br_hash_lock_held(struct net_bridge *br)
-{
-#ifdef CONFIG_LOCKDEP
- return lockdep_is_held(&br->hash_lock);
-#else
- return true;
-#endif
-}
-
/* br_forward.c */
enum br_pkt_type {
BR_PKT_UNICAST,
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 6ae56037bb13..029a61ac6cdd 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -71,27 +71,17 @@ static int update_classid_sock(const void *v, struct file *file, unsigned n)
return 0;
}
-static void update_classid(struct cgroup_subsys_state *css, void *v)
+static void cgrp_attach(struct cgroup_taskset *tset)
{
- struct css_task_iter it;
+ struct cgroup_subsys_state *css;
struct task_struct *p;
- css_task_iter_start(css, &it);
- while ((p = css_task_iter_next(&it))) {
+ cgroup_taskset_for_each(p, css, tset) {
task_lock(p);
- iterate_fd(p->files, 0, update_classid_sock, v);
+ iterate_fd(p->files, 0, update_classid_sock,
+ (void *)(unsigned long)css_cls_state(css)->classid);
task_unlock(p);
}
- css_task_iter_end(&it);
-}
-
-static void cgrp_attach(struct cgroup_taskset *tset)
-{
- struct cgroup_subsys_state *css;
-
- cgroup_taskset_first(tset, &css);
- update_classid(css,
- (void *)(unsigned long)css_cls_state(css)->classid);
}
static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft)
@@ -103,12 +93,22 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
u64 value)
{
struct cgroup_cls_state *cs = css_cls_state(css);
+ struct css_task_iter it;
+ struct task_struct *p;
cgroup_sk_alloc_disable();
cs->classid = (u32)value;
- update_classid(css, (void *)(unsigned long)cs->classid);
+ css_task_iter_start(css, &it);
+ while ((p = css_task_iter_next(&it))) {
+ task_lock(p);
+ iterate_fd(p->files, 0, update_classid_sock,
+ (void *)(unsigned long)cs->classid);
+ task_unlock(p);
+ }
+ css_task_iter_end(&it);
+
return 0;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index cd4ba8c6b609..9f781092fda9 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3694,6 +3694,15 @@ static void sock_rmem_free(struct sk_buff *skb)
atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
}
+static void skb_set_err_queue(struct sk_buff *skb)
+{
+ /* pkt_type of skbs received on local sockets is never PACKET_OUTGOING.
+ * So, it is safe to (mis)use it to mark skbs on the error queue.
+ */
+ skb->pkt_type = PACKET_OUTGOING;
+ BUILD_BUG_ON(PACKET_OUTGOING == 0);
+}
+
/*
* Note: We dont mem charge error packets (no sk_forward_alloc changes)
*/
@@ -3707,6 +3716,7 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
skb->sk = sk;
skb->destructor = sock_rmem_free;
atomic_add(skb->truesize, &sk->sk_rmem_alloc);
+ skb_set_err_queue(skb);
/* before exiting rcu section, make sure dst is refcounted */
skb_dst_force(skb);
@@ -3783,16 +3793,20 @@ EXPORT_SYMBOL(skb_clone_sk);
static void __skb_complete_tx_timestamp(struct sk_buff *skb,
struct sock *sk,
- int tstype)
+ int tstype,
+ bool opt_stats)
{
struct sock_exterr_skb *serr;
int err;
+ BUILD_BUG_ON(sizeof(struct sock_exterr_skb) > sizeof(skb->cb));
+
serr = SKB_EXT_ERR(skb);
memset(serr, 0, sizeof(*serr));
serr->ee.ee_errno = ENOMSG;
serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
serr->ee.ee_info = tstype;
+ serr->opt_stats = opt_stats;
if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) {
serr->ee.ee_data = skb_shinfo(skb)->tskey;
if (sk->sk_protocol == IPPROTO_TCP &&
@@ -3833,7 +3847,7 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
*/
if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) {
*skb_hwtstamps(skb) = *hwtstamps;
- __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND);
+ __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND, false);
sock_put(sk);
}
}
@@ -3844,7 +3858,7 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
struct sock *sk, int tstype)
{
struct sk_buff *skb;
- bool tsonly;
+ bool tsonly, opt_stats = false;
if (!sk)
return;
@@ -3857,9 +3871,10 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
#ifdef CONFIG_INET
if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
sk->sk_protocol == IPPROTO_TCP &&
- sk->sk_type == SOCK_STREAM)
+ sk->sk_type == SOCK_STREAM) {
skb = tcp_get_timestamping_opt_stats(sk);
- else
+ opt_stats = true;
+ } else
#endif
skb = alloc_skb(0, GFP_ATOMIC);
} else {
@@ -3878,7 +3893,7 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
else
skb->tstamp = ktime_get_real();
- __skb_complete_tx_timestamp(skb, sk, tstype);
+ __skb_complete_tx_timestamp(skb, sk, tstype, opt_stats);
}
EXPORT_SYMBOL_GPL(__skb_tstamp_tx);
diff --git a/net/core/sock.c b/net/core/sock.c
index a96d5f7a5734..2c4f574168fb 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1442,6 +1442,11 @@ static void __sk_destruct(struct rcu_head *head)
pr_debug("%s: optmem leakage (%d bytes) detected\n",
__func__, atomic_read(&sk->sk_omem_alloc));
+ if (sk->sk_frag.page) {
+ put_page(sk->sk_frag.page);
+ sk->sk_frag.page = NULL;
+ }
+
if (sk->sk_peer_cred)
put_cred(sk->sk_peer_cred);
put_pid(sk->sk_peer_pid);
@@ -1539,6 +1544,12 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
is_charged = sk_filter_charge(newsk, filter);
if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
+ /* We need to make sure that we don't uncharge the new
+ * socket if we couldn't charge it in the first place
+ * as otherwise we uncharge the parent's filter.
+ */
+ if (!is_charged)
+ RCU_INIT_POINTER(newsk->sk_filter, NULL);
sk_free_unlock_clone(newsk);
newsk = NULL;
goto out;
@@ -2787,11 +2798,6 @@ void sk_common_release(struct sock *sk)
sk_refcnt_debug_release(sk);
- if (sk->sk_frag.page) {
- put_page(sk->sk_frag.page);
- sk->sk_frag.page = NULL;
- }
-
sock_put(sk);
}
EXPORT_SYMBOL(sk_common_release);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 42bfd08109dd..8f2133ffc2ff 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1083,7 +1083,8 @@ static void nl_fib_input(struct sk_buff *skb)
net = sock_net(skb->sk);
nlh = nlmsg_hdr(skb);
- if (skb->len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len ||
+ if (skb->len < nlmsg_total_size(sizeof(*frn)) ||
+ skb->len < nlh->nlmsg_len ||
nlmsg_len(nlh) < sizeof(*frn))
return;
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index bbe7f72db9c1..b3cdeec85f1f 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -198,6 +198,7 @@ static void ip_expire(unsigned long arg)
qp = container_of((struct inet_frag_queue *) arg, struct ipq, q);
net = container_of(qp->q.net, struct net, ipv4.frags);
+ rcu_read_lock();
spin_lock(&qp->q.lock);
if (qp->q.flags & INET_FRAG_COMPLETE)
@@ -207,7 +208,7 @@ static void ip_expire(unsigned long arg)
__IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
if (!inet_frag_evicting(&qp->q)) {
- struct sk_buff *head = qp->q.fragments;
+ struct sk_buff *clone, *head = qp->q.fragments;
const struct iphdr *iph;
int err;
@@ -216,32 +217,40 @@ static void ip_expire(unsigned long arg)
if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !qp->q.fragments)
goto out;
- rcu_read_lock();
head->dev = dev_get_by_index_rcu(net, qp->iif);
if (!head->dev)
- goto out_rcu_unlock;
+ goto out;
+
/* skb has no dst, perform route lookup again */
iph = ip_hdr(head);
err = ip_route_input_noref(head, iph->daddr, iph->saddr,
iph->tos, head->dev);
if (err)
- goto out_rcu_unlock;
+ goto out;
/* Only an end host needs to send an ICMP
* "Fragment Reassembly Timeout" message, per RFC792.
*/
if (frag_expire_skip_icmp(qp->user) &&
(skb_rtable(head)->rt_type != RTN_LOCAL))
- goto out_rcu_unlock;
+ goto out;
+
+ clone = skb_clone(head, GFP_ATOMIC);
/* Send an ICMP "Fragment Reassembly Timeout" message. */
- icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
-out_rcu_unlock:
- rcu_read_unlock();
+ if (clone) {
+ spin_unlock(&qp->q.lock);
+ icmp_send(clone, ICMP_TIME_EXCEEDED,
+ ICMP_EXC_FRAGTIME, 0);
+ consume_skb(clone);
+ goto out_rcu_unlock;
+ }
}
out:
spin_unlock(&qp->q.lock);
+out_rcu_unlock:
+ rcu_read_unlock();
ipq_put(qp);
}
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index bc1486f2c064..2e14ed11a35c 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -165,6 +165,10 @@ static unsigned int ipv4_conntrack_local(void *priv,
if (skb->len < sizeof(struct iphdr) ||
ip_hdrlen(skb) < sizeof(struct iphdr))
return NF_ACCEPT;
+
+ if (ip_is_fragment(ip_hdr(skb))) /* IP_NODEFRAG setsockopt set */
+ return NF_ACCEPT;
+
return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
}
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index f8aad03d674b..6f5e8d01b876 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -255,11 +255,6 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
/* maniptype == SRC for postrouting. */
enum nf_nat_manip_type maniptype = HOOK2MANIP(state->hook);
- /* We never see fragments: conntrack defrags on pre-routing
- * and local-out, and nf_nat_out protects post-routing.
- */
- NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb)));
-
ct = nf_ct_get(skb, &ctinfo);
/* Can't track? It's not due to stress, or conntrack would
* have dropped it. Hence it's the user's responsibilty to
diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c
index a0ea8aad1bf1..f18677277119 100644
--- a/net/ipv4/netfilter/nft_masq_ipv4.c
+++ b/net/ipv4/netfilter/nft_masq_ipv4.c
@@ -26,10 +26,10 @@ static void nft_masq_ipv4_eval(const struct nft_expr *expr,
memset(&range, 0, sizeof(range));
range.flags = priv->flags;
if (priv->sreg_proto_min) {
- range.min_proto.all =
- *(__be16 *)&regs->data[priv->sreg_proto_min];
- range.max_proto.all =
- *(__be16 *)&regs->data[priv->sreg_proto_max];
+ range.min_proto.all = (__force __be16)nft_reg_load16(
+ &regs->data[priv->sreg_proto_min]);
+ range.max_proto.all = (__force __be16)nft_reg_load16(
+ &regs->data[priv->sreg_proto_max]);
}
regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, nft_hook(pkt),
&range, nft_out(pkt));
diff --git a/net/ipv4/netfilter/nft_redir_ipv4.c b/net/ipv4/netfilter/nft_redir_ipv4.c
index 1650ed23c15d..5120be1d3118 100644
--- a/net/ipv4/netfilter/nft_redir_ipv4.c
+++ b/net/ipv4/netfilter/nft_redir_ipv4.c
@@ -26,10 +26,10 @@ static void nft_redir_ipv4_eval(const struct nft_expr *expr,
memset(&mr, 0, sizeof(mr));
if (priv->sreg_proto_min) {
- mr.range[0].min.all =
- *(__be16 *)&regs->data[priv->sreg_proto_min];
- mr.range[0].max.all =
- *(__be16 *)&regs->data[priv->sreg_proto_max];
+ mr.range[0].min.all = (__force __be16)nft_reg_load16(
+ &regs->data[priv->sreg_proto_min]);
+ mr.range[0].max.all = (__force __be16)nft_reg_load16(
+ &regs->data[priv->sreg_proto_max]);
mr.range[0].flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index cf4555581282..1e319a525d51 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2770,7 +2770,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
{
const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */
const struct inet_connection_sock *icsk = inet_csk(sk);
- u32 now = tcp_time_stamp, intv;
+ u32 now, intv;
u64 rate64;
bool slow;