Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

Several conflicts here. NFP driver bug fix adding nfp_netdev_is_nfp_repr() check to nfp_fl_output() needed some adjustments because the code block is in an else block now. Parallel additions to net/pkt_cls.h and net/sch_generic.h A bug fix in __tcp_retransmit_skb() conflicted with some of the rbtree changes in net-next. The tc action RCU callback fixes in 'net' had some overlap with some of the recent tcf_block reworking. Signed-off-by: David S. Miller <davem@davemloft.net>
author: David S. Miller <davem@davemloft.net> 2017-10-30 14:10:01 +0900
committer: David S. Miller <davem@davemloft.net> 2017-10-30 21:09:24 +0900
commit: e1ea2f9856b765a2eaabb403a6751f70efc9ba4c (patch)
tree: 771f0f96fdab1b27757730e96d911c73f5499ee4 /net
parent: aad93c70b9a3b80dbc383a31e77a119f69bdd856 (diff)
parent: 0b07194bb55ed836c2cc7c22e866b87a14681984 (diff)
38 files changed, 591 insertions, 186 deletions
diff --git a/net/core/filter.c b/net/core/filter.c
index b79c44cc8145..721c30889033 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1845,14 +1845,15 @@ BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
 {
 	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
 
+	/* If user passes invalid input drop the packet. */
 	if (unlikely(flags))
-		return SK_ABORTED;
+		return SK_DROP;
 
 	tcb->bpf.key = key;
 	tcb->bpf.flags = flags;
 	tcb->bpf.map = map;
 
-	return SK_REDIRECT;
+	return SK_PASS;
 }
 
 struct sock *do_sk_redirect_map(struct sk_buff *skb)
@@ -4475,6 +4476,31 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
 	return insn - insn_buf;
 }
 
+static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
+				     const struct bpf_insn *si,
+				     struct bpf_insn *insn_buf,
+				     struct bpf_prog *prog, u32 *target_size)
+{
+	struct bpf_insn *insn = insn_buf;
+	int off;
+
+	switch (si->off) {
+	case offsetof(struct __sk_buff, data_end):
+		off  = si->off;
+		off -= offsetof(struct __sk_buff, data_end);
+		off += offsetof(struct sk_buff, cb);
+		off += offsetof(struct tcp_skb_cb, bpf.data_end);
+		*insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
+				      si->src_reg, off);
+		break;
+	default:
+		return bpf_convert_ctx_access(type, si, insn_buf, prog,
+					      target_size);
+	}
+
+	return insn - insn_buf;
+}
+
 const struct bpf_verifier_ops sk_filter_verifier_ops = {
 	.get_func_proto		= sk_filter_func_proto,
 	.is_valid_access	= sk_filter_is_valid_access,
@@ -4565,7 +4591,7 @@ const struct bpf_prog_ops sock_ops_prog_ops = {
 const struct bpf_verifier_ops sk_skb_verifier_ops = {
 	.get_func_proto		= sk_skb_func_proto,
 	.is_valid_access	= sk_skb_is_valid_access,
-	.convert_ctx_access	= bpf_convert_ctx_access,
+	.convert_ctx_access	= sk_skb_convert_ctx_access,
 	.gen_prologue		= sk_skb_prologue,
 };
 
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 0490916864f9..e65fcb45c3f6 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -495,7 +495,7 @@ static int dccp_v4_send_response(const struct sock *sk, struct request_sock *req
 							      ireq->ir_rmt_addr);
 		err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
 					    ireq->ir_rmt_addr,
-					    rcu_dereference(ireq->ireq_opt));
+					    ireq_opt_deref(ireq));
 		err = net_xmit_eval(err);
 	}
 
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index de91c48b6806..4d1ccf87c59c 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -486,14 +486,15 @@ static int dsa_cpu_parse(struct dsa_port *port, u32 index,
 		if (!ethernet)
 			return -EINVAL;
 		ethernet_dev = of_find_net_device_by_node(ethernet);
+		if (!ethernet_dev)
+			return -EPROBE_DEFER;
 	} else {
 		ethernet_dev = dsa_dev_to_net_device(ds->cd->netdev[index]);
+		if (!ethernet_dev)
+			return -EPROBE_DEFER;
 		dev_put(ethernet_dev);
 	}
 
-	if (!ethernet_dev)
-		return -EPROBE_DEFER;
-
 	if (!dst->cpu_dp) {
 		dst->cpu_dp = port;
 		dst->cpu_dp->master = ethernet_dev;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index ca03a1dcbc8f..4ca46dc08e63 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -541,7 +541,8 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk,
 	struct ip_options_rcu *opt;
 	struct rtable *rt;
 
-	opt = rcu_dereference(ireq->ireq_opt);
+	opt = ireq_opt_deref(ireq);
+
 	flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
 			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
 			   sk->sk_protocol, inet_sk_flowi_flags(sk),
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 1e47818e38c7..c891235b4966 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -128,43 +128,68 @@ static struct rtnl_link_ops ipip_link_ops __read_mostly;
 
 static int ipip_err(struct sk_buff *skb, u32 info)
 {
-
-/* All the routers (except for Linux) return only
-   8 bytes of packet payload. It means, that precise relaying of
-   ICMP in the real Internet is absolutely infeasible.
- */
+	/* All the routers (except for Linux) return only
+	 * 8 bytes of packet payload. It means, that precise relaying of
+	 * ICMP in the real Internet is absolutely infeasible.
+	 */
 	struct net *net = dev_net(skb->dev);
 	struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
 	const struct iphdr *iph = (const struct iphdr *)skb->data;
-	struct ip_tunnel *t;
-	int err;
 	const int type = icmp_hdr(skb)->type;
 	const int code = icmp_hdr(skb)->code;
+	struct ip_tunnel *t;
+	int err = 0;
+
+	switch (type) {
+	case ICMP_DEST_UNREACH:
+		switch (code) {
+		case ICMP_SR_FAILED:
+			/* Impossible event. */
+			goto out;
+		default:
+			/* All others are translated to HOST_UNREACH.
+			 * rfc2003 contains "deep thoughts" about NET_UNREACH,
+			 * I believe they are just ether pollution. --ANK
+			 */
+			break;
+		}
+		break;
+
+	case ICMP_TIME_EXCEEDED:
+		if (code != ICMP_EXC_TTL)
+			goto out;
+		break;
+
+	case ICMP_REDIRECT:
+		break;
+
+	default:
+		goto out;
+	}
 
-	err = -ENOENT;
 	t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
 			     iph->daddr, iph->saddr, 0);
-	if (!t)
+	if (!t) {
+		err = -ENOENT;
 		goto out;
+	}
 
 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
-		ipv4_update_pmtu(skb, dev_net(skb->dev), info,
-				 t->parms.link, 0, iph->protocol, 0);
-		err = 0;
+		ipv4_update_pmtu(skb, net, info, t->parms.link, 0,
+				 iph->protocol, 0);
 		goto out;
 	}
 
 	if (type == ICMP_REDIRECT) {
-		ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
-			      iph->protocol, 0);
-		err = 0;
+		ipv4_redirect(skb, net, t->parms.link, 0, iph->protocol, 0);
 		goto out;
 	}
 
-	if (t->parms.iph.daddr == 0)
+	if (t->parms.iph.daddr == 0) {
+		err = -ENOENT;
 		goto out;
+	}
 
-	err = 0;
 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
 		goto out;
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 7c1dae6493c3..0162c577bb9c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -881,7 +881,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
 
 		err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
 					    ireq->ir_rmt_addr,
-					    rcu_dereference(ireq->ireq_opt));
+					    ireq_opt_deref(ireq));
 		err = net_xmit_eval(err);
 	}
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index aab6e7145013..a69a34f57330 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -781,8 +781,10 @@ static void tcp_tsq_handler(struct sock *sk)
 		struct tcp_sock *tp = tcp_sk(sk);
 
 		if (tp->lost_out > tp->retrans_out &&
-		    tp->snd_cwnd > tcp_packets_in_flight(tp))
+		    tp->snd_cwnd > tcp_packets_in_flight(tp)) {
+			tcp_mstamp_refresh(tp);
 			tcp_xmit_retransmit_queue(sk);
+		}
 
 		tcp_write_xmit(sk, tcp_current_mss(sk), tp->nonagle,
 			       0, GFP_ATOMIC);
@@ -2307,6 +2309,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 
 	sent_pkts = 0;
 
+	tcp_mstamp_refresh(tp);
 	if (!push_one) {
 		/* Do MTU probing. */
 		result = tcp_mtu_probe(sk);
@@ -2318,7 +2321,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 	}
 
 	max_segs = tcp_tso_segs(sk, mss_now);
-	tcp_mstamp_refresh(tp);
 	while ((skb = tcp_send_head(sk))) {
 		unsigned int limit;
 
@@ -2911,8 +2913,10 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
 				     -ENOBUFS;
 		} tcp_skb_tsorted_restore(skb);
 
-		if (!err)
+		if (!err) {
 			tcp_update_skb_after_send(tp, skb);
+			tcp_rate_skb_sent(sk, skb);
+		}
 	} else {
 		err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
 	}
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 241841fb479c..3e10c51e7e0c 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -408,13 +408,16 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	case ICMPV6_DEST_UNREACH:
 		net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n",
 				    t->parms.name);
-		break;
+		if (code != ICMPV6_PORT_UNREACH)
+			break;
+		return;
 	case ICMPV6_TIME_EXCEED:
 		if (code == ICMPV6_EXC_HOPLIMIT) {
 			net_dbg_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n",
 					    t->parms.name);
+			break;
 		}
-		break;
+		return;
 	case ICMPV6_PARAMPROB:
 		teli = 0;
 		if (code == ICMPV6_HDR_FIELD)
@@ -430,7 +433,7 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 			net_dbg_ratelimited("%s: Recipient unable to parse tunneled packet!\n",
 					    t->parms.name);
 		}
-		break;
+		return;
 	case ICMPV6_PKT_TOOBIG:
 		mtu = be32_to_cpu(info) - offset - t->tun_hlen;
 		if (t->dev->type == ARPHRD_ETHER)
@@ -438,7 +441,7 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		if (mtu < IPV6_MIN_MTU)
 			mtu = IPV6_MIN_MTU;
 		t->dev->mtu = mtu;
-		break;
+		return;
 	}
 
 	if (time_before(jiffies, t->err_time + IP6TUNNEL_ERR_TIMEO))
@@ -500,8 +503,8 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
 			       __u32 *pmtu, __be16 proto)
 {
 	struct ip6_tnl *tunnel = netdev_priv(dev);
-	__be16 protocol = (dev->type == ARPHRD_ETHER) ?
-			  htons(ETH_P_TEB) : proto;
+	struct dst_entry *dst = skb_dst(skb);
+	__be16 protocol;
 
 	if (dev->type == ARPHRD_ETHER)
 		IPCB(skb)->flags = 0;
@@ -515,9 +518,14 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
 		tunnel->o_seqno++;
 
 	/* Push GRE header. */
+	protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto;
 	gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
 			 protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno));
 
+	/* TooBig packet may have updated dst->dev's mtu */
+	if (dst && dst_mtu(dst) > dst->dev->mtu)
+		dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu);
+
 	return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu,
 			    NEXTHDR_GRE);
 }
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index a354f1939e49..fb15d3b97cb2 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -2727,12 +2727,6 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
 	if (!ieee80211_sdata_running(sdata))
 		return -ENETDOWN;
 
-	if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) {
-		ret = drv_set_bitrate_mask(local, sdata, mask);
-		if (ret)
-			return ret;
-	}
-
 	/*
 	 * If active validate the setting and reject it if it doesn't leave
 	 * at least one basic rate usable, since we really have to be able
@@ -2748,6 +2742,12 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
 			return -EINVAL;
 	}
 
+	if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) {
+		ret = drv_set_bitrate_mask(local, sdata, mask);
+		if (ret)
+			return ret;
+	}
+
 	for (i = 0; i < NUM_NL80211_BANDS; i++) {
 		struct ieee80211_supported_band *sband = wiphy->bands[i];
 		int j;
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index ae995c8480db..938049395f90 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -19,6 +19,7 @@
 #include <linux/slab.h>
 #include <linux/export.h>
 #include <net/mac80211.h>
+#include <crypto/algapi.h>
 #include <asm/unaligned.h>
 #include "ieee80211_i.h"
 #include "driver-ops.h"
@@ -609,6 +610,39 @@ void ieee80211_key_free_unused(struct ieee80211_key *key)
 	ieee80211_key_free_common(key);
 }
 
+static bool ieee80211_key_identical(struct ieee80211_sub_if_data *sdata,
+				    struct ieee80211_key *old,
+				    struct ieee80211_key *new)
+{
+	u8 tkip_old[WLAN_KEY_LEN_TKIP], tkip_new[WLAN_KEY_LEN_TKIP];
+	u8 *tk_old, *tk_new;
+
+	if (!old || new->conf.keylen != old->conf.keylen)
+		return false;
+
+	tk_old = old->conf.key;
+	tk_new = new->conf.key;
+
+	/*
+	 * In station mode, don't compare the TX MIC key, as it's never used
+	 * and offloaded rekeying may not care to send it to the host. This
+	 * is the case in iwlwifi, for example.
+	 */
+	if (sdata->vif.type == NL80211_IFTYPE_STATION &&
+	    new->conf.cipher == WLAN_CIPHER_SUITE_TKIP &&
+	    new->conf.keylen == WLAN_KEY_LEN_TKIP &&
+	    !(new->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE)) {
+		memcpy(tkip_old, tk_old, WLAN_KEY_LEN_TKIP);
+		memcpy(tkip_new, tk_new, WLAN_KEY_LEN_TKIP);
+		memset(tkip_old + NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY, 0, 8);
+		memset(tkip_new + NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY, 0, 8);
+		tk_old = tkip_old;
+		tk_new = tkip_new;
+	}
+
+	return !crypto_memneq(tk_old, tk_new, new->conf.keylen);
+}
+
 int ieee80211_key_link(struct ieee80211_key *key,
 		       struct ieee80211_sub_if_data *sdata,
 		       struct sta_info *sta)
@@ -634,8 +668,7 @@ int ieee80211_key_link(struct ieee80211_key *key,
 	 * Silently accept key re-installation without really installing the
 	 * new version of the key to avoid nonce reuse or replay issues.
 	 */
-	if (old_key && key->conf.keylen == old_key->conf.keylen &&
-	    !memcmp(key->conf.key, old_key->conf.key, key->conf.keylen)) {
+	if (ieee80211_key_identical(sdata, old_key, key)) {
 		ieee80211_key_free_unused(key);
 		ret = 0;
 		goto out;
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 6ab39dbcca01..8557a1cae041 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -661,13 +661,15 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
 			}
 		}
 
-		rds_ib_set_wr_signal_state(ic, send, 0);
+		rds_ib_set_wr_signal_state(ic, send, false);
 
 		/*
 		 * Always signal the last one if we're stopping due to flow control.
 		 */
-		if (ic->i_flowctl && flow_controlled && i == (work_alloc-1))
-			send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
+		if (ic->i_flowctl && flow_controlled && i == (work_alloc - 1)) {
+			rds_ib_set_wr_signal_state(ic, send, true);
+			send->s_wr.send_flags |= IB_SEND_SOLICITED;
+		}
 
 		if (send->s_wr.send_flags & IB_SEND_SIGNALED)
 			nr_sig++;
@@ -705,11 +707,8 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
 	if (scat == &rm->data.op_sg[rm->data.op_count]) {
 		prev->s_op = ic->i_data_op;
 		prev->s_wr.send_flags |= IB_SEND_SOLICITED;
-		if (!(prev->s_wr.send_flags & IB_SEND_SIGNALED)) {
-			ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs;
-			prev->s_wr.send_flags |= IB_SEND_SIGNALED;
-			nr_sig++;
-		}
+		if (!(prev->s_wr.send_flags & IB_SEND_SIGNALED))
+			nr_sig += rds_ib_set_wr_signal_state(ic, prev, true);
 		ic->i_data_op = NULL;
 	}
 
@@ -792,6 +791,7 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
 		send->s_atomic_wr.compare_add_mask = op->op_m_fadd.nocarry_mask;
 		send->s_atomic_wr.swap_mask = 0;
 	}
+	send->s_wr.send_flags = 0;
 	nr_sig = rds_ib_set_wr_signal_state(ic, send, op->op_notify);
 	send->s_atomic_wr.wr.num_sge = 1;
 	send->s_atomic_wr.wr.next = NULL;
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index ec986ae52808..a9f9a2ccc664 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -264,6 +264,7 @@ static int __init sample_init_module(void)
 
 static void __exit sample_cleanup_module(void)
 {
+	rcu_barrier();
 	tcf_unregister_action(&act_sample_ops, &sample_net_ops);
 }
 
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 0e96cdae9995..d9d54b367d23 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -77,6 +77,8 @@ out:
 }
 EXPORT_SYMBOL(register_tcf_proto_ops);
 
+static struct workqueue_struct *tc_filter_wq;
+
 int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
 {
 	struct tcf_proto_ops *t;
@@ -86,6 +88,7 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
 	 * tcf_proto_ops's destroy() handler.
 	 */
 	rcu_barrier();
+	flush_workqueue(tc_filter_wq);
 
 	write_lock(&cls_mod_lock);
 	list_for_each_entry(t, &tcf_proto_base, head) {
@@ -100,6 +103,12 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
 }
 EXPORT_SYMBOL(unregister_tcf_proto_ops);
 
+bool tcf_queue_work(struct work_struct *work)
+{
+	return queue_work(tc_filter_wq, work);
+}
+EXPORT_SYMBOL(tcf_queue_work);
+
 /* Select new prio value from the range, managed by kernel. */
 
 static inline u32 tcf_auto_prio(struct tcf_proto *tp)
@@ -308,27 +317,30 @@ int tcf_block_get(struct tcf_block **p_block,
 }
 EXPORT_SYMBOL(tcf_block_get);
 
-void tcf_block_put_ext(struct tcf_block *block,
-		       struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
-		       struct tcf_block_ext_info *ei)
+static void tcf_block_put_final(struct work_struct *work)
 {
+	struct tcf_block *block = container_of(work, struct tcf_block, work);
 	struct tcf_chain *chain, *tmp;
 
-	if (!block)
-		return;
-
-	tcf_block_offload_unbind(block, q, ei);
-
-	/* XXX: Standalone actions are not allowed to jump to any chain, and
-	 * bound actions should be all removed after flushing. However,
-	 * filters are destroyed in RCU callbacks, we have to hold the chains
-	 * first, otherwise we would always race with RCU callbacks on this list
-	 * without proper locking.
-	 */
+	/* At this point, all the chains should have refcnt == 1. */
+	rtnl_lock();
+	list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
+		tcf_chain_put(chain);
+	rtnl_unlock();
+	kfree(block);
+}
 
-	/* Wait for existing RCU callbacks to cool down. */
-	rcu_barrier();
+/* XXX: Standalone actions are not allowed to jump to any chain, and bound
+ * actions should be all removed after flushing. However, filters are destroyed
+ * in RCU callbacks, we have to hold the chains first, otherwise we would
+ * always race with RCU callbacks on this list without proper locking.
+ */
+static void tcf_block_put_deferred(struct work_struct *work)
+{
+	struct tcf_block *block = container_of(work, struct tcf_block, work);
+	struct tcf_chain *chain;
 
+	rtnl_lock();
 	/* Hold a refcnt for all chains, except 0, in case they are gone. */
 	list_for_each_entry(chain, &block->chain_list, list)
 		if (chain->index)
@@ -338,13 +350,31 @@ void tcf_block_put_ext(struct tcf_block *block,
 	list_for_each_entry(chain, &block->chain_list, list)
 		tcf_chain_flush(chain);
 
-	/* Wait for RCU callbacks to release the reference count. */
+	INIT_WORK(&block->work, tcf_block_put_final);
+	/* Wait for RCU callbacks to release the reference count and make
+	 * sure their works have been queued before this.
+	 */
 	rcu_barrier();
+	tcf_queue_work(&block->work);
+	rtnl_unlock();
+}
 
-	/* At this point, all the chains should have refcnt == 1. */
-	list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
-		tcf_chain_put(chain);
-	kfree(block);
+void tcf_block_put_ext(struct tcf_block *block,
+		       struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
+		       struct tcf_block_ext_info *ei)
+{
+	if (!block)
+		return;
+
+	tcf_block_offload_unbind(block, q, ei);
+
+	INIT_WORK(&block->work, tcf_block_put_deferred);
+	/* Wait for existing RCU callbacks to cool down, make sure their works
+	 * have been queued before this. We can not flush pending works here
+	 * because we are holding the RTNL lock.
+	 */
+	rcu_barrier();
+	tcf_queue_work(&block->work);
 }
 EXPORT_SYMBOL(tcf_block_put_ext);
 
@@ -354,6 +384,7 @@ void tcf_block_put(struct tcf_block *block)
 
 	tcf_block_put_ext(block, NULL, block->q, &ei);
 }
+
 EXPORT_SYMBOL(tcf_block_put);
 
 struct tcf_block_cb {
@@ -1051,6 +1082,7 @@ void tcf_exts_destroy(struct tcf_exts *exts)
 #ifdef CONFIG_NET_CLS_ACT
 	LIST_HEAD(actions);
 
+	ASSERT_RTNL();
 	tcf_exts_to_list(exts, &actions);
 	tcf_action_destroy(&actions, TCA_ACT_UNBIND);
 	kfree(exts->actions);
@@ -1229,6 +1261,10 @@ EXPORT_SYMBOL(tc_setup_cb_call);
 
 static int __init tc_filter_init(void)
 {
+	tc_filter_wq = alloc_ordered_workqueue("tc_filter_workqueue", 0);
+	if (!tc_filter_wq)
+		return -ENOMEM;
+
 	rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, 0);
 	rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, 0);
 	rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter,
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 700b345b07f9..871351358c10 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -35,7 +35,10 @@ struct basic_filter {
 	struct tcf_result	res;
 	struct tcf_proto	*tp;
 	struct list_head	link;
-	struct rcu_head		rcu;
+	union {
+		struct work_struct	work;
+		struct rcu_head		rcu;
+	};
 };
 
 static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp,
@@ -84,15 +87,26 @@ static int basic_init(struct tcf_proto *tp)
 	return 0;
 }
 
-static void basic_delete_filter(struct rcu_head *head)
+static void basic_delete_filter_work(struct work_struct *work)
 {
-	struct basic_filter *f = container_of(head, struct basic_filter, rcu);
+	struct basic_filter *f = container_of(work, struct basic_filter, work);
 
+	rtnl_lock();
 	tcf_exts_destroy(&f->exts);
 	tcf_em_tree_destroy(&f->ematches);
+	rtnl_unlock();
+
 	kfree(f);
 }
 
+static void basic_delete_filter(struct rcu_head *head)
+{
+	struct basic_filter *f = container_of(head, struct basic_filter, rcu);
+
+	INIT_WORK(&f->work, basic_delete_filter_work);
+	tcf_queue_work(&f->work);
+}
+
 static void basic_destroy(struct tcf_proto *tp)
 {
 	struct basic_head *head = rtnl_dereference(tp->root);
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 0f8b51061c39..5f701c8670a2 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -50,7 +50,10 @@ struct cls_bpf_prog {
 	struct sock_filter *bpf_ops;
 	const char *bpf_name;
 	struct tcf_proto *tp;
-	struct rcu_head rcu;
+	union {
+		struct work_struct work;
+		struct rcu_head rcu;
+	};
 };
 
 static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
@@ -269,9 +272,21 @@ static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog)
 	kfree(prog);
 }
 
+static void cls_bpf_delete_prog_work(struct work_struct *work)
+{
+	struct cls_bpf_prog *prog = container_of(work, struct cls_bpf_prog, work);
+
+	rtnl_lock();
+	__cls_bpf_delete_prog(prog);
+	rtnl_unlock();
+}
+
 static void cls_bpf_delete_prog_rcu(struct rcu_head *rcu)
 {
-	__cls_bpf_delete_prog(container_of(rcu, struct cls_bpf_prog, rcu));
+	struct cls_bpf_prog *prog = container_of(rcu, struct cls_bpf_prog, rcu);
+
+	INIT_WORK(&prog->work, cls_bpf_delete_prog_work);
+	tcf_queue_work(&prog->work);
 }
 
 static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog)
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index d48452f87975..a97e069bee89 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -23,7 +23,10 @@ struct cls_cgroup_head {
 	struct tcf_exts		exts;
 	struct tcf_ematch_tree	ematches;
 	struct tcf_proto	*tp;
-	struct rcu_head		rcu;
+	union {
+		struct work_struct	work;
+		struct rcu_head		rcu;
+	};
 };
 
 static int cls_cgroup_classify(struct sk_buff *skb, const struct tcf_proto *tp,
@@ -57,15 +60,26 @@ static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = {
 	[TCA_CGROUP_EMATCHES]	= { .type = NLA_NESTED },
 };
 
+static void cls_cgroup_destroy_work(struct work_struct *work)
+{
+	struct cls_cgroup_head *head = container_of(work,
+						    struct cls_cgroup_head,
+						    work);
+	rtnl_lock();
+	tcf_exts_destroy(&head->exts);
+	tcf_em_tree_destroy(&head->ematches);
+	kfree(head);
+	rtnl_unlock();
+}
+
 static void cls_cgroup_destroy_rcu(struct rcu_head *root)
 {
 	struct cls_cgroup_head *head = container_of(root,
 						    struct cls_cgroup_head,
 						    rcu);
 
-	tcf_exts_destroy(&head->exts);
-	tcf_em_tree_destroy(&head->ematches);
-	kfree(head);
+	INIT_WORK(&head->work, cls_cgroup_destroy_work);
+	tcf_queue_work(&head->work);
 }
 
 static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 6b29cef90bec..9c08fcdaf41c 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -57,7 +57,10 @@ struct flow_filter {
 	u32			divisor;
 	u32			baseclass;
 	u32			hashrnd;
-	struct rcu_head		rcu;
+	union {
+		struct work_struct	work;
+		struct rcu_head		rcu;
+	};
 };
 
 static inline u32 addr_fold(void *addr)
@@ -369,14 +372,24 @@ static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = {
 	[TCA_FLOW_PERTURB]	= { .type = NLA_U32 },
 };
 
-static void flow_destroy_filter(struct rcu_head *head)
+static void flow_destroy_filter_work(struct work_struct *work)
 {
author	David S. Miller <davem@davemloft.net>	2017-10-30 14:10:01 +0900
committer	David S. Miller <davem@davemloft.net>	2017-10-30 21:09:24 +0900
commit	e1ea2f9856b765a2eaabb403a6751f70efc9ba4c (patch)
tree	771f0f96fdab1b27757730e96d911c73f5499ee4 /net
parent	aad93c70b9a3b80dbc383a31e77a119f69bdd856 (diff)
parent	0b07194bb55ed836c2cc7c22e866b87a14681984 (diff)