summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-10-29 08:11:49 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-10-29 08:11:49 -0700
commit19e12196da601f7b80b5639adbcecf60fae13389 (patch)
treefdf8c4acac3e7b126c13ef57dbb66f87ff53e441 /net
parent25a5d23b47994cdb451dcd2bc8ac310a1492f71b (diff)
parent6c325f4eca9ee9eb32cf58768e6e4ebcabaa8d6e (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking fixes from David Miller: 1) Fix route leak in xfrm_bundle_create(). 2) In mac80211, validate user rate mask before configuring it. From Johannes Berg. 3) Properly enforce memory limits in fair queueing code, from Toke Hoiland-Jorgensen. 4) Fix lockdep splat in inet_csk_route_req(), from Eric Dumazet. 5) Fix TSO header allocation and management in mvpp2 driver, from Yan Markman. 6) Don't take socket lock in BH handler in strparser code, from Tom Herbert. 7) Don't show sockets from other namespaces in AF_UNIX code, from Andrei Vagin. 8) Fix double free in error path of tap_open(), from Girish Moodalbail. 9) Fix TX map failure path in igb and ixgbe, from Jean-Philippe Brucker and Alexander Duyck. 10) Fix DCB mode programming in stmmac driver, from Jose Abreu. 11) Fix err_count handling in various tunnels (ipip, ip6_gre). From Xin Long. 12) Properly align SKB head before building SKB in tuntap, from Jason Wang. 13) Avoid matching qdiscs with a zero handle during lookups, from Cong Wang. 14) Fix various endianness bugs in sctp, from Xin Long. 15) Fix tc filter callback races and add selftests which trigger the problem, from Cong Wang. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (73 commits) selftests: Introduce a new test case to tc testsuite selftests: Introduce a new script to generate tc batch file net_sched: fix call_rcu() race on act_sample module removal net_sched: add rtnl assertion to tcf_exts_destroy() net_sched: use tcf_queue_work() in tcindex filter net_sched: use tcf_queue_work() in rsvp filter net_sched: use tcf_queue_work() in route filter net_sched: use tcf_queue_work() in u32 filter net_sched: use tcf_queue_work() in matchall filter net_sched: use tcf_queue_work() in fw filter net_sched: use tcf_queue_work() in flower filter net_sched: use tcf_queue_work() in flow filter net_sched: use tcf_queue_work() in cgroup filter net_sched: use tcf_queue_work() in bpf filter net_sched: use tcf_queue_work() in basic filter net_sched: introduce a workqueue for RCU callbacks of tc filter sctp: fix some type cast warnings introduced since very beginning sctp: fix a type cast warnings that causes a_rwnd gets the wrong value sctp: fix some type cast warnings introduced by transport rhashtable sctp: fix some type cast warnings introduced by stream reconf ...
Diffstat (limited to 'net')
-rw-r--r--net/core/filter.c32
-rw-r--r--net/dccp/ipv4.c2
-rw-r--r--net/dsa/dsa2.c7
-rw-r--r--net/ipv4/inet_connection_sock.c3
-rw-r--r--net/ipv4/ipip.c59
-rw-r--r--net/ipv4/tcp_ipv4.c2
-rw-r--r--net/ipv4/tcp_output.c10
-rw-r--r--net/ipv6/ip6_gre.c20
-rw-r--r--net/mac80211/cfg.c12
-rw-r--r--net/mac80211/key.c37
-rw-r--r--net/rds/ib_send.c16
-rw-r--r--net/sched/act_sample.c1
-rw-r--r--net/sched/cls_api.c69
-rw-r--r--net/sched/cls_basic.c20
-rw-r--r--net/sched/cls_bpf.c19
-rw-r--r--net/sched/cls_cgroup.c22
-rw-r--r--net/sched/cls_flow.c19
-rw-r--r--net/sched/cls_flower.c19
-rw-r--r--net/sched/cls_fw.c19
-rw-r--r--net/sched/cls_matchall.c19
-rw-r--r--net/sched/cls_route.c19
-rw-r--r--net/sched/cls_rsvp.h19
-rw-r--r--net/sched/cls_tcindex.c38
-rw-r--r--net/sched/cls_u32.c29
-rw-r--r--net/sched/sch_api.c2
-rw-r--r--net/sctp/input.c22
-rw-r--r--net/sctp/ipv6.c8
-rw-r--r--net/sctp/sm_make_chunk.c9
-rw-r--r--net/sctp/sm_sideeffect.c8
-rw-r--r--net/sctp/socket.c32
-rw-r--r--net/sctp/stream.c26
-rw-r--r--net/sctp/ulpevent.c2
-rw-r--r--net/strparser/strparser.c17
-rw-r--r--net/unix/diag.c2
-rw-r--r--net/wireless/sme.c50
-rw-r--r--net/xfrm/xfrm_policy.c16
-rw-r--r--net/xfrm/xfrm_user.c25
37 files changed, 560 insertions, 171 deletions
diff --git a/net/core/filter.c b/net/core/filter.c
index aa0265997f93..6ae94f825f72 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1844,14 +1844,15 @@ BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
{
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
+ /* If user passes invalid input drop the packet. */
if (unlikely(flags))
- return SK_ABORTED;
+ return SK_DROP;
tcb->bpf.key = key;
tcb->bpf.flags = flags;
tcb->bpf.map = map;
- return SK_REDIRECT;
+ return SK_PASS;
}
struct sock *do_sk_redirect_map(struct sk_buff *skb)
@@ -4243,6 +4244,31 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
return insn - insn_buf;
}
+static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
+ const struct bpf_insn *si,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog, u32 *target_size)
+{
+ struct bpf_insn *insn = insn_buf;
+ int off;
+
+ switch (si->off) {
+ case offsetof(struct __sk_buff, data_end):
+ off = si->off;
+ off -= offsetof(struct __sk_buff, data_end);
+ off += offsetof(struct sk_buff, cb);
+ off += offsetof(struct tcp_skb_cb, bpf.data_end);
+ *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
+ si->src_reg, off);
+ break;
+ default:
+ return bpf_convert_ctx_access(type, si, insn_buf, prog,
+ target_size);
+ }
+
+ return insn - insn_buf;
+}
+
const struct bpf_verifier_ops sk_filter_prog_ops = {
.get_func_proto = sk_filter_func_proto,
.is_valid_access = sk_filter_is_valid_access,
@@ -4301,7 +4327,7 @@ const struct bpf_verifier_ops sock_ops_prog_ops = {
const struct bpf_verifier_ops sk_skb_prog_ops = {
.get_func_proto = sk_skb_func_proto,
.is_valid_access = sk_skb_is_valid_access,
- .convert_ctx_access = bpf_convert_ctx_access,
+ .convert_ctx_access = sk_skb_convert_ctx_access,
.gen_prologue = sk_skb_prologue,
};
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 0490916864f9..e65fcb45c3f6 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -495,7 +495,7 @@ static int dccp_v4_send_response(const struct sock *sk, struct request_sock *req
ireq->ir_rmt_addr);
err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
ireq->ir_rmt_addr,
- rcu_dereference(ireq->ireq_opt));
+ ireq_opt_deref(ireq));
err = net_xmit_eval(err);
}
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 873af0108e24..045d8a176279 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -496,14 +496,15 @@ static int dsa_cpu_parse(struct dsa_port *port, u32 index,
if (!ethernet)
return -EINVAL;
ethernet_dev = of_find_net_device_by_node(ethernet);
+ if (!ethernet_dev)
+ return -EPROBE_DEFER;
} else {
ethernet_dev = dsa_dev_to_net_device(ds->cd->netdev[index]);
+ if (!ethernet_dev)
+ return -EPROBE_DEFER;
dev_put(ethernet_dev);
}
- if (!ethernet_dev)
- return -EPROBE_DEFER;
-
if (!dst->cpu_dp) {
dst->cpu_dp = port;
dst->cpu_dp->netdev = ethernet_dev;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 5ec9136a7c36..b47a59cb3573 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -543,7 +543,8 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk,
struct ip_options_rcu *opt;
struct rtable *rt;
- opt = rcu_dereference(ireq->ireq_opt);
+ opt = ireq_opt_deref(ireq);
+
flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
sk->sk_protocol, inet_sk_flowi_flags(sk),
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index fb1ad22b5e29..cdd627355ed1 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -128,43 +128,68 @@ static struct rtnl_link_ops ipip_link_ops __read_mostly;
static int ipip_err(struct sk_buff *skb, u32 info)
{
-
-/* All the routers (except for Linux) return only
- 8 bytes of packet payload. It means, that precise relaying of
- ICMP in the real Internet is absolutely infeasible.
- */
+ /* All the routers (except for Linux) return only
+ * 8 bytes of packet payload. It means, that precise relaying of
+ * ICMP in the real Internet is absolutely infeasible.
+ */
struct net *net = dev_net(skb->dev);
struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
const struct iphdr *iph = (const struct iphdr *)skb->data;
- struct ip_tunnel *t;
- int err;
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
+ struct ip_tunnel *t;
+ int err = 0;
+
+ switch (type) {
+ case ICMP_DEST_UNREACH:
+ switch (code) {
+ case ICMP_SR_FAILED:
+ /* Impossible event. */
+ goto out;
+ default:
+ /* All others are translated to HOST_UNREACH.
+ * rfc2003 contains "deep thoughts" about NET_UNREACH,
+ * I believe they are just ether pollution. --ANK
+ */
+ break;
+ }
+ break;
+
+ case ICMP_TIME_EXCEEDED:
+ if (code != ICMP_EXC_TTL)
+ goto out;
+ break;
+
+ case ICMP_REDIRECT:
+ break;
+
+ default:
+ goto out;
+ }
- err = -ENOENT;
t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
iph->daddr, iph->saddr, 0);
- if (!t)
+ if (!t) {
+ err = -ENOENT;
goto out;
+ }
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
- ipv4_update_pmtu(skb, dev_net(skb->dev), info,
- t->parms.link, 0, iph->protocol, 0);
- err = 0;
+ ipv4_update_pmtu(skb, net, info, t->parms.link, 0,
+ iph->protocol, 0);
goto out;
}
if (type == ICMP_REDIRECT) {
- ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
- iph->protocol, 0);
- err = 0;
+ ipv4_redirect(skb, net, t->parms.link, 0, iph->protocol, 0);
goto out;
}
- if (t->parms.iph.daddr == 0)
+ if (t->parms.iph.daddr == 0) {
+ err = -ENOENT;
goto out;
+ }
- err = 0;
if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
goto out;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 4c43365c374c..5b027c69cbc5 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -877,7 +877,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
ireq->ir_rmt_addr,
- rcu_dereference(ireq->ireq_opt));
+ ireq_opt_deref(ireq));
err = net_xmit_eval(err);
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 0bc9e46a5369..ae60dd3faed0 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -739,8 +739,10 @@ static void tcp_tsq_handler(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
if (tp->lost_out > tp->retrans_out &&
- tp->snd_cwnd > tcp_packets_in_flight(tp))
+ tp->snd_cwnd > tcp_packets_in_flight(tp)) {
+ tcp_mstamp_refresh(tp);
tcp_xmit_retransmit_queue(sk);
+ }
tcp_write_xmit(sk, tcp_current_mss(sk), tp->nonagle,
0, GFP_ATOMIC);
@@ -2237,6 +2239,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
sent_pkts = 0;
+ tcp_mstamp_refresh(tp);
if (!push_one) {
/* Do MTU probing. */
result = tcp_mtu_probe(sk);
@@ -2248,7 +2251,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
}
max_segs = tcp_tso_segs(sk, mss_now);
- tcp_mstamp_refresh(tp);
while ((skb = tcp_send_head(sk))) {
unsigned int limit;
@@ -2841,8 +2843,10 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC);
err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
-ENOBUFS;
- if (!err)
+ if (!err) {
skb->skb_mstamp = tp->tcp_mstamp;
+ tcp_rate_skb_sent(sk, skb);
+ }
} else {
err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
}
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 1602b491b281..59c121b932ac 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -408,13 +408,16 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
case ICMPV6_DEST_UNREACH:
net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n",
t->parms.name);
- break;
+ if (code != ICMPV6_PORT_UNREACH)
+ break;
+ return;
case ICMPV6_TIME_EXCEED:
if (code == ICMPV6_EXC_HOPLIMIT) {
net_dbg_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n",
t->parms.name);
+ break;
}
- break;
+ return;
case ICMPV6_PARAMPROB:
teli = 0;
if (code == ICMPV6_HDR_FIELD)
@@ -430,7 +433,7 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
net_dbg_ratelimited("%s: Recipient unable to parse tunneled packet!\n",
t->parms.name);
}
- break;
+ return;
case ICMPV6_PKT_TOOBIG:
mtu = be32_to_cpu(info) - offset - t->tun_hlen;
if (t->dev->type == ARPHRD_ETHER)
@@ -438,7 +441,7 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
t->dev->mtu = mtu;
- break;
+ return;
}
if (time_before(jiffies, t->err_time + IP6TUNNEL_ERR_TIMEO))
@@ -500,8 +503,8 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
__u32 *pmtu, __be16 proto)
{
struct ip6_tnl *tunnel = netdev_priv(dev);
- __be16 protocol = (dev->type == ARPHRD_ETHER) ?
- htons(ETH_P_TEB) : proto;
+ struct dst_entry *dst = skb_dst(skb);
+ __be16 protocol;
if (dev->type == ARPHRD_ETHER)
IPCB(skb)->flags = 0;
@@ -515,9 +518,14 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
tunnel->o_seqno++;
/* Push GRE header. */
+ protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto;
gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno));
+ /* TooBig packet may have updated dst->dev's mtu */
+ if (dst && dst_mtu(dst) > dst->dev->mtu)
+ dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu);
+
return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu,
NEXTHDR_GRE);
}
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index a354f1939e49..fb15d3b97cb2 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -2727,12 +2727,6 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
if (!ieee80211_sdata_running(sdata))
return -ENETDOWN;
- if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) {
- ret = drv_set_bitrate_mask(local, sdata, mask);
- if (ret)
- return ret;
- }
-
/*
* If active validate the setting and reject it if it doesn't leave
* at least one basic rate usable, since we really have to be able
@@ -2748,6 +2742,12 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
return -EINVAL;
}
+ if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) {
+ ret = drv_set_bitrate_mask(local, sdata, mask);
+ if (ret)
+ return ret;
+ }
+
for (i = 0; i < NUM_NL80211_BANDS; i++) {
struct ieee80211_supported_band *sband = wiphy->bands[i];
int j;
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index ae995c8480db..938049395f90 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -19,6 +19,7 @@
#include <linux/slab.h>
#include <linux/export.h>
#include <net/mac80211.h>
+#include <crypto/algapi.h>
#include <asm/unaligned.h>
#include "ieee80211_i.h"
#include "driver-ops.h"
@@ -609,6 +610,39 @@ void ieee80211_key_free_unused(struct ieee80211_key *key)
ieee80211_key_free_common(key);
}
+static bool ieee80211_key_identical(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_key *old,
+ struct ieee80211_key *new)
+{
+ u8 tkip_old[WLAN_KEY_LEN_TKIP], tkip_new[WLAN_KEY_LEN_TKIP];
+ u8 *tk_old, *tk_new;
+
+ if (!old || new->conf.keylen != old->conf.keylen)
+ return false;
+
+ tk_old = old->conf.key;
+ tk_new = new->conf.key;
+
+ /*
+ * In station mode, don't compare the TX MIC key, as it's never used
+ * and offloaded rekeying may not care to send it to the host. This
+ * is the case in iwlwifi, for example.
+ */
+ if (sdata->vif.type == NL80211_IFTYPE_STATION &&
+ new->conf.cipher == WLAN_CIPHER_SUITE_TKIP &&
+ new->conf.keylen == WLAN_KEY_LEN_TKIP &&
+ !(new->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE)) {
+ memcpy(tkip_old, tk_old, WLAN_KEY_LEN_TKIP);
+ memcpy(tkip_new, tk_new, WLAN_KEY_LEN_TKIP);
+ memset(tkip_old + NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY, 0, 8);
+ memset(tkip_new + NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY, 0, 8);
+ tk_old = tkip_old;
+ tk_new = tkip_new;
+ }
+
+ return !crypto_memneq(tk_old, tk_new, new->conf.keylen);
+}
+
int ieee80211_key_link(struct ieee80211_key *key,
struct ieee80211_sub_if_data *sdata,
struct sta_info *sta)
@@ -634,8 +668,7 @@ int ieee80211_key_link(struct ieee80211_key *key,
* Silently accept key re-installation without really installing the
* new version of the key to avoid nonce reuse or replay issues.
*/
- if (old_key && key->conf.keylen == old_key->conf.keylen &&
- !memcmp(key->conf.key, old_key->conf.key, key->conf.keylen)) {
+ if (ieee80211_key_identical(sdata, old_key, key)) {
ieee80211_key_free_unused(key);
ret = 0;
goto out;
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 6ab39dbcca01..8557a1cae041 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -661,13 +661,15 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
}
}
- rds_ib_set_wr_signal_state(ic, send, 0);
+ rds_ib_set_wr_signal_state(ic, send, false);
/*
* Always signal the last one if we're stopping due to flow control.
*/
- if (ic->i_flowctl && flow_controlled && i == (work_alloc-1))
- send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
+ if (ic->i_flowctl && flow_controlled && i == (work_alloc - 1)) {
+ rds_ib_set_wr_signal_state(ic, send, true);
+ send->s_wr.send_flags |= IB_SEND_SOLICITED;
+ }
if (send->s_wr.send_flags & IB_SEND_SIGNALED)
nr_sig++;
@@ -705,11 +707,8 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
if (scat == &rm->data.op_sg[rm->data.op_count]) {
prev->s_op = ic->i_data_op;
prev->s_wr.send_flags |= IB_SEND_SOLICITED;
- if (!(prev->s_wr.send_flags & IB_SEND_SIGNALED)) {
- ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs;
- prev->s_wr.send_flags |= IB_SEND_SIGNALED;
- nr_sig++;
- }
+ if (!(prev->s_wr.send_flags & IB_SEND_SIGNALED))
+ nr_sig += rds_ib_set_wr_signal_state(ic, prev, true);
ic->i_data_op = NULL;
}
@@ -792,6 +791,7 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
send->s_atomic_wr.compare_add_mask = op->op_m_fadd.nocarry_mask;
send->s_atomic_wr.swap_mask = 0;
}
+ send->s_wr.send_flags = 0;
nr_sig = rds_ib_set_wr_signal_state(ic, send, op->op_notify);
send->s_atomic_wr.wr.num_sge = 1;
send->s_atomic_wr.wr.next = NULL;
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index ec986ae52808..a9f9a2ccc664 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -264,6 +264,7 @@ static int __init sample_init_module(void)
static void __exit sample_cleanup_module(void)
{
+ rcu_barrier();
tcf_unregister_action(&act_sample_ops, &sample_net_ops);
}
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 0b2219adf520..231181c602ed 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -77,6 +77,8 @@ out:
}
EXPORT_SYMBOL(register_tcf_proto_ops);
+static struct workqueue_struct *tc_filter_wq;
+
int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
{
struct tcf_proto_ops *t;
@@ -86,6 +88,7 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
* tcf_proto_ops's destroy() handler.
*/
rcu_barrier();
+ flush_workqueue(tc_filter_wq);
write_lock(&cls_mod_lock);
list_for_each_entry(t, &tcf_proto_base, head) {
@@ -100,6 +103,12 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
}
EXPORT_SYMBOL(unregister_tcf_proto_ops);
+bool tcf_queue_work(struct work_struct *work)
+{
+ return queue_work(tc_filter_wq, work);
+}
+EXPORT_SYMBOL(tcf_queue_work);
+
/* Select new prio value from the range, managed by kernel. */
static inline u32 tcf_auto_prio(struct tcf_proto *tp)
@@ -266,23 +275,30 @@ err_chain_create:
}
EXPORT_SYMBOL(tcf_block_get);
-void tcf_block_put(struct tcf_block *block)
+static void tcf_block_put_final(struct work_struct *work)
{
+ struct tcf_block *block = container_of(work, struct tcf_block, work);
struct tcf_chain *chain, *tmp;
- if (!block)
- return;
-
- /* XXX: Standalone actions are not allowed to jump to any chain, and
- * bound actions should be all removed after flushing. However,
- * filters are destroyed in RCU callbacks, we have to hold the chains
- * first, otherwise we would always race with RCU callbacks on this list
- * without proper locking.
- */
+ /* At this point, all the chains should have refcnt == 1. */
+ rtnl_lock();
+ list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
+ tcf_chain_put(chain);
+ rtnl_unlock();
+ kfree(block);
+}
- /* Wait for existing RCU callbacks to cool down. */
- rcu_barrier();
+/* XXX: Standalone actions are not allowed to jump to any chain, and bound
+ * actions should be all removed after flushing. However, filters are destroyed
+ * in RCU callbacks, we have to hold the chains first, otherwise we would
+ * always race with RCU callbacks on this list without proper locking.
+ */
+static void tcf_block_put_deferred(struct work_struct *work)
+{
+ struct tcf_block *block = container_of(work, struct tcf_block, work);
+ struct tcf_chain *chain;
+ rtnl_lock();
/* Hold a refcnt for all chains, except 0, in case they are gone. */
list_for_each_entry(chain, &block->chain_list, list)
if (chain->index)
@@ -292,13 +308,27 @@ void tcf_block_put(struct tcf_block *block)
list_for_each_entry(chain, &block->chain_list, list)
tcf_chain_flush(chain);
- /* Wait for RCU callbacks to release the reference count. */
+ INIT_WORK(&block->work, tcf_block_put_final);
+ /* Wait for RCU callbacks to release the reference count and make
+ * sure their works have been queued before this.
+ */
rcu_barrier();
+ tcf_queue_work(&block->work);
+ rtnl_unlock();
+}
- /* At this point, all the chains should have refcnt == 1. */
- list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
- tcf_chain_put(chain);
- kfree(block);
+void tcf_block_put(struct tcf_block *block)
+{
+ if (!block)
+ return;
+
+ INIT_WORK(&block->work, tcf_block_put_deferred);
+ /* Wait for existing RCU callbacks to cool down, make sure their works
+ * have been queued before this. We can not flush pending works here
+ * because we are holding the RTNL lock.
+ */
+ rcu_barrier();
+ tcf_queue_work(&block->work);
}
EXPORT_SYMBOL(tcf_block_put);
@@ -879,6 +909,7 @@ void tcf_exts_destroy(struct tcf_exts *exts)
#ifdef CONFIG_NET_CLS_ACT
LIST_HEAD(actions);
+ ASSERT_RTNL();
tcf_exts_to_list(exts, &actions);
tcf_action_destroy(&actions, TCA_ACT_UNBIND);
kfree(exts->actions);
@@ -1030,6 +1061,10 @@ EXPORT_SYMBOL(tcf_exts_get_dev);
static int __init tc_filter_init(void)
{
+ tc_filter_wq = alloc_ordered_workqueue("tc_filter_workqueue", 0);
+ if (!tc_filter_wq)
+ return -ENOMEM;
+
rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter,
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index d89ebafd2239..f177649a2419 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -34,7 +34,10 @@ struct basic_filter {
struct tcf_result res;
struct tcf_proto *tp;
struct list_head link;
- struct rcu_head rcu;
+ union {
+ struct work_struct work;
+ struct rcu_head rcu;
+ };
};
static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp,
@@ -82,15 +85,26 @@ static int basic_init(struct tcf_proto *tp)
return 0;
}
-static void basic_delete_filter(struct rcu_head *head)
+static void basic_delete_filter_work(struct work_struct *work)
{
- struct basic_filter *f = container_of(head, struct basic_filter, rcu);
+ struct basic_filter *f = container_of(work, struct basic_filter, work);
+ rtnl_lock();
tcf_exts_destroy(&f->exts);
tcf_em_tree_destroy(&f->ematches);
+ rtnl_unlock();
+
kfree(f);
}
+static void basic_delete_filter(struct rcu_head *head)
+{
+ struct basic_filter *f = container_of(head, struct basic_filter, rcu);
+
+ INIT_WORK(&f->work, basic_delete_filter_work);
+ tcf_queue_work(&f->work);
+}
+
static void basic_destroy(struct tcf_proto *tp)
{
struct basic_head *head = rtnl_dereference(tp->root);
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 520c5027646a..037a3ae86829 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -49,7 +49,10 @@ struct cls_bpf_prog {
struct sock_filter *bpf_ops;
const char *bpf_name;
struct tcf_proto *tp;
- struct rcu_head rcu;
+ union {
+ struct work_struct work;
+ struct rcu_head rcu;
+ };
};
static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
@@ -257,9 +260,21 @@ static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog)
kfree(prog);
}
+static void cls_bpf_delete_prog_work(struct work_struct *work)
+{
+ struct cls_bpf_prog *prog = container_of(work, struct cls_bpf_prog, work);
+
+ rtnl_lock();
+ __cls_bpf_delete_prog(prog);
+ rtnl_unlock();
+}
+
static void cls_bpf_delete_prog_rcu(struct rcu_head *rcu)
{
- __cls_bpf_delete_prog(container_of(rcu, struct cls_bpf_prog, rcu));
+ struct cls_bpf_prog *prog = container_of(rcu, struct cls_bpf_prog, rcu);
+
+ INIT_WORK(&prog->work, cls_bpf_delete_prog_work);
+ tcf_queue_work(&prog->work);
}
static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog)
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index d48452f87975..a97e069bee89 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -23,7 +23,10 @@ struct cls_cgroup_head {
struct tcf_exts exts;
struct tcf_ematch_tree ematches;
struct tcf_proto *tp;
- struct rcu_head rcu;
+ union {
+ struct work_struct work;
+ struct rcu_head rcu;
+ };
};
static int cls_cgroup_classify(struct sk_buff *skb, const struct tcf_proto *tp,
@@ -57,15 +60,26 @@ static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = {
[TCA_CGROUP_EMATCHES] = { .type = NLA_NESTED },
};
+static void cls_cgroup_destroy_work(struct work_struct *work)
+{
+ struct cls_cgroup_head *head = container_of(work,
+ struct cls_cgroup_head,
+ work);
+ rtnl_lock();
+ tcf_exts_destroy(&head->exts);
+ tcf_em_tree_destroy(&head->ematches);
+ kfree(head);
+ rtnl_unlock();
+}
+
static void cls_cgroup_destroy_rcu(struct rcu_head *root)
{
struct cls_cgroup_head *head = container_of(root,
struct cls_cgroup_head,
rcu);
- tcf_exts_destroy(&head->exts);
- tcf_em_tree_destroy(&head->ematches);
- kfree(head);
+ INIT_WORK(&head->work, cls_cgroup_destroy_work);
+ tcf_queue_work(&head->work);
}
static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 2a3a60ec5b86..67f3a2af6aab 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -57,7 +57,10 @@ struct flow_filter {
u32 divisor;
u32 baseclass;
u32 hash