summaryrefslogtreecommitdiffstats
path: root/net/netfilter
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2017-09-03 17:08:42 -0700
committerDavid S. Miller <davem@davemloft.net>2017-09-03 17:08:42 -0700
commitb63f6044d8e43e4a1eef8b0a2310cec872fd1d38 (patch)
treed7d91953b9037b2f8eaf2e160aec6166bb38ba8b /net/netfilter
parent942e7e5fc1ea4625d971e62f00b5ae3d2f8855de (diff)
parent1aff64715edb8565e99337b842d814d636641b50 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
Pablo Neira Ayuso says: ==================== Netfilter updates for net-next The following patchset contains Netfilter updates for your net-next tree. Basically, updates to the conntrack core, enhancements for nf_tables, conversion of netfilter hooks from linked list to array to improve memory locality and asorted improvements for the Netfilter codebase. More specifically, they are: 1) Add expection to hashes after timer initialization to prevent access from another CPU that walks on the hashes and calls del_timer(), from Florian Westphal. 2) Don't update nf_tables chain counters from hot path, this is only used by the x_tables compatibility layer. 3) Get rid of nested rcu_read_lock() calls from netfilter hook path. Hooks are always guaranteed to run from rcu read side, so remove nested rcu_read_lock() where possible. Patch from Taehee Yoo. 4) nf_tables new ruleset generation notifications include PID and name of the process that has updated the ruleset, from Phil Sutter. 5) Use skb_header_pointer() from nft_fib, so we can reuse this code from the nf_family netdev family. Patch from Pablo M. Bermudo. 6) Add support for nft_fib in nf_tables netdev family, also from Pablo. 7) Use deferrable workqueue for conntrack garbage collection, to reduce power consumption, from Patch from Subash Abhinov Kasiviswanathan. 8) Add nf_ct_expect_iterate_net() helper and use it. From Florian Westphal. 9) Call nf_ct_unconfirmed_destroy only from cttimeout, from Florian. 10) Drop references on conntrack removal path when skbuffs has escaped via nfqueue, from Florian. 11) Don't queue packets to nfqueue with dying conntrack, from Florian. 12) Constify nf_hook_ops structure, from Florian. 13) Remove neededlessly branch in nf_tables trace code, from Phil Sutter. 14) Add nla_strdup(), from Phil Sutter. 15) Rise nf_tables objects name size up to 255 chars, people want to use DNS names, so increase this according to what RFC 1035 specifies. Patch series from Phil Sutter. 16) Kill nf_conntrack_default_on, it's broken. Default on conntrack hook registration on demand, suggested by Eric Dumazet, patch from Florian. 17) Remove unused variables in compat_copy_entry_from_user both in ip_tables and arp_tables code. Patch from Taehee Yoo. 18) Constify struct nf_conntrack_l4proto, from Julia Lawall. 19) Constify nf_loginfo structure, also from Julia. 20) Use a single rb root in connlimit, from Taehee Yoo. 21) Remove unused netfilter_queue_init() prototype, from Taehee Yoo. 22) Use audit_log() instead of open-coding it, from Geliang Tang. 23) Allow to mangle tcp options via nft_exthdr, from Florian. 24) Allow to fetch TCP MSS from nft_rt, from Florian. This includes a fix for a miscalculation of the minimal length. 25) Simplify branch logic in h323 helper, from Nick Desaulniers. 26) Calculate netlink attribute size for conntrack tuple at compile time, from Florian. 27) Remove protocol name field from nf_conntrack_{l3,l4}proto structure. From Florian. 28) Remove holes in nf_conntrack_l4proto structure, so it becomes smaller. From Florian. 29) Get rid of print_tuple() indirection for /proc conntrack listing. Place all the code in net/netfilter/nf_conntrack_standalone.c. Patch from Florian. 30) Do not built in print_conntrack() if CONFIG_NF_CONNTRACK_PROCFS is off. From Florian. 31) Constify most nf_conntrack_{l3,l4}proto helper functions, from Florian. 32) Fix broken indentation in ebtables extensions, from Colin Ian King. 33) Fix several harmless sparse warning, from Florian. 34) Convert netfilter hook infrastructure to use array for better memory locality, joint work done by Florian and Aaron Conole. Moreover, add some instrumentation to debug this. 35) Batch nf_unregister_net_hooks() calls, to call synchronize_net once per batch, from Florian. 36) Get rid of noisy logging in ICMPv6 conntrack helper, from Florian. 37) Get rid of obsolete NFDEBUG() instrumentation, from Varsha Rao. 38) Remove unused code in the generic protocol tracker, from Davide Caratti. I think I will have material for a second Netfilter batch in my queue if time allow to make it fit in this merge window. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/netfilter')
-rw-r--r--net/netfilter/Kconfig9
-rw-r--r--net/netfilter/Makefile1
-rw-r--r--net/netfilter/core.c351
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c10
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_ftp.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c11
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c10
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_udp.c10
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c46
-rw-r--r--net/netfilter/nf_conntrack_broadcast.c2
-rw-r--r--net/netfilter/nf_conntrack_core.c44
-rw-r--r--net/netfilter/nf_conntrack_expect.c66
-rw-r--r--net/netfilter/nf_conntrack_helper.c34
-rw-r--r--net/netfilter/nf_conntrack_l3proto_generic.c7
-rw-r--r--net/netfilter/nf_conntrack_netlink.c106
-rw-r--r--net/netfilter/nf_conntrack_pptp.c2
-rw-r--r--net/netfilter/nf_conntrack_proto.c90
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c18
-rw-r--r--net/netfilter/nf_conntrack_proto_generic.c20
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c15
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c19
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c19
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c17
-rw-r--r--net/netfilter/nf_conntrack_sip.c6
-rw-r--r--net/netfilter/nf_conntrack_standalone.c97
-rw-r--r--net/netfilter/nf_internals.h10
-rw-r--r--net/netfilter/nf_queue.c68
-rw-r--r--net/netfilter/nf_sockopt.c2
-rw-r--r--net/netfilter/nf_tables_api.c128
-rw-r--r--net/netfilter/nf_tables_core.c28
-rw-r--r--net/netfilter/nf_tables_trace.c42
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c22
-rw-r--r--net/netfilter/nfnetlink_log.c2
-rw-r--r--net/netfilter/nfnetlink_queue.c21
-rw-r--r--net/netfilter/nft_exthdr.c213
-rw-r--r--net/netfilter/nft_fib_netdev.c87
-rw-r--r--net/netfilter/nft_payload.c2
-rw-r--r--net/netfilter/nft_rt.c73
-rw-r--r--net/netfilter/nft_set_rbtree.c49
-rw-r--r--net/netfilter/x_tables.c14
-rw-r--r--net/netfilter/xt_CT.c2
-rw-r--r--net/netfilter/xt_TCPMSS.c2
-rw-r--r--net/netfilter/xt_TPROXY.c4
-rw-r--r--net/netfilter/xt_addrtype.c3
-rw-r--r--net/netfilter/xt_connlimit.c26
-rw-r--r--net/netfilter/xt_hashlimit.c8
-rw-r--r--net/netfilter/xt_osf.c2
48 files changed, 1206 insertions, 617 deletions
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 9b28864cc36a..e4a13cc8a2e7 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -636,6 +636,15 @@ config NFT_FWD_NETDEV
help
This option enables packet forwarding for the "netdev" family.
+config NFT_FIB_NETDEV
+ depends on NFT_FIB_IPV4
+ depends on NFT_FIB_IPV6
+ tristate "Netfilter nf_tables netdev fib lookups support"
+ help
+ This option allows using the FIB expression from the netdev table.
+ The lookup will be delegated to the IPv4 or IPv6 FIB depending
+ on the protocol of the packet.
+
endif # NF_TABLES_NETDEV
endif # NF_TABLES
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 913380919301..d3891c93edd6 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -100,6 +100,7 @@ obj-$(CONFIG_NFT_REDIR) += nft_redir.o
obj-$(CONFIG_NFT_HASH) += nft_hash.o
obj-$(CONFIG_NFT_FIB) += nft_fib.o
obj-$(CONFIG_NFT_FIB_INET) += nft_fib_inet.o
+obj-$(CONFIG_NFT_FIB_NETDEV) += nft_fib_netdev.o
# nf_tables netdev
obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 974cf2a3795a..04fe25abc5f6 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -21,7 +21,7 @@
#include <linux/inetdevice.h>
#include <linux/proc_fs.h>
#include <linux/mutex.h>
-#include <linux/slab.h>
+#include <linux/mm.h>
#include <linux/rcupdate.h>
#include <net/net_namespace.h>
#include <net/sock.h>
@@ -62,10 +62,182 @@ EXPORT_SYMBOL(nf_hooks_needed);
#endif
static DEFINE_MUTEX(nf_hook_mutex);
+
+/* max hooks per family/hooknum */
+#define MAX_HOOK_COUNT 1024
+
#define nf_entry_dereference(e) \
rcu_dereference_protected(e, lockdep_is_held(&nf_hook_mutex))
-static struct nf_hook_entry __rcu **nf_hook_entry_head(struct net *net, const struct nf_hook_ops *reg)
+static struct nf_hook_entries *allocate_hook_entries_size(u16 num)
+{
+ struct nf_hook_entries *e;
+ size_t alloc = sizeof(*e) +
+ sizeof(struct nf_hook_entry) * num +
+ sizeof(struct nf_hook_ops *) * num;
+
+ if (num == 0)
+ return NULL;
+
+ e = kvzalloc(alloc, GFP_KERNEL);
+ if (e)
+ e->num_hook_entries = num;
+ return e;
+}
+
+static unsigned int accept_all(void *priv,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ return NF_ACCEPT; /* ACCEPT makes nf_hook_slow call next hook */
+}
+
+static const struct nf_hook_ops dummy_ops = {
+ .hook = accept_all,
+ .priority = INT_MIN,
+};
+
+static struct nf_hook_entries *
+nf_hook_entries_grow(const struct nf_hook_entries *old,
+ const struct nf_hook_ops *reg)
+{
+ unsigned int i, alloc_entries, nhooks, old_entries;
+ struct nf_hook_ops **orig_ops = NULL;
+ struct nf_hook_ops **new_ops;
+ struct nf_hook_entries *new;
+ bool inserted = false;
+
+ alloc_entries = 1;
+ old_entries = old ? old->num_hook_entries : 0;
+
+ if (old) {
+ orig_ops = nf_hook_entries_get_hook_ops(old);
+
+ for (i = 0; i < old_entries; i++) {
+ if (orig_ops[i] != &dummy_ops)
+ alloc_entries++;
+ }
+ }
+
+ if (alloc_entries > MAX_HOOK_COUNT)
+ return ERR_PTR(-E2BIG);
+
+ new = allocate_hook_entries_size(alloc_entries);
+ if (!new)
+ return ERR_PTR(-ENOMEM);
+
+ new_ops = nf_hook_entries_get_hook_ops(new);
+
+ i = 0;
+ nhooks = 0;
+ while (i < old_entries) {
+ if (orig_ops[i] == &dummy_ops) {
+ ++i;
+ continue;
+ }
+ if (inserted || reg->priority > orig_ops[i]->priority) {
+ new_ops[nhooks] = (void *)orig_ops[i];
+ new->hooks[nhooks] = old->hooks[i];
+ i++;
+ } else {
+ new_ops[nhooks] = (void *)reg;
+ new->hooks[nhooks].hook = reg->hook;
+ new->hooks[nhooks].priv = reg->priv;
+ inserted = true;
+ }
+ nhooks++;
+ }
+
+ if (!inserted) {
+ new_ops[nhooks] = (void *)reg;
+ new->hooks[nhooks].hook = reg->hook;
+ new->hooks[nhooks].priv = reg->priv;
+ }
+
+ return new;
+}
+
+static void hooks_validate(const struct nf_hook_entries *hooks)
+{
+#ifdef CONFIG_DEBUG_KERNEL
+ struct nf_hook_ops **orig_ops;
+ int prio = INT_MIN;
+ size_t i = 0;
+
+ orig_ops = nf_hook_entries_get_hook_ops(hooks);
+
+ for (i = 0; i < hooks->num_hook_entries; i++) {
+ if (orig_ops[i] == &dummy_ops)
+ continue;
+
+ WARN_ON(orig_ops[i]->priority < prio);
+
+ if (orig_ops[i]->priority > prio)
+ prio = orig_ops[i]->priority;
+ }
+#endif
+}
+
+/*
+ * __nf_hook_entries_try_shrink - try to shrink hook array
+ *
+ * @pp -- location of hook blob
+ *
+ * Hook unregistration must always succeed, so to-be-removed hooks
+ * are replaced by a dummy one that will just move to next hook.
+ *
+ * This counts the current dummy hooks, attempts to allocate new blob,
+ * copies the live hooks, then replaces and discards old one.
+ *
+ * return values:
+ *
+ * Returns address to free, or NULL.
+ */
+static void *__nf_hook_entries_try_shrink(struct nf_hook_entries __rcu **pp)
+{
+ struct nf_hook_entries *old, *new = NULL;
+ unsigned int i, j, skip = 0, hook_entries;
+ struct nf_hook_ops **orig_ops;
+ struct nf_hook_ops **new_ops;
+
+ old = nf_entry_dereference(*pp);
+ if (WARN_ON_ONCE(!old))
+ return NULL;
+
+ orig_ops = nf_hook_entries_get_hook_ops(old);
+ for (i = 0; i < old->num_hook_entries; i++) {
+ if (orig_ops[i] == &dummy_ops)
+ skip++;
+ }
+
+ /* if skip == hook_entries all hooks have been removed */
+ hook_entries = old->num_hook_entries;
+ if (skip == hook_entries)
+ goto out_assign;
+
+ if (WARN_ON(skip == 0))
+ return NULL;
+
+ hook_entries -= skip;
+ new = allocate_hook_entries_size(hook_entries);
+ if (!new)
+ return NULL;
+
+ new_ops = nf_hook_entries_get_hook_ops(new);
+ for (i = 0, j = 0; i < old->num_hook_entries; i++) {
+ if (orig_ops[i] == &dummy_ops)
+ continue;
+ new->hooks[j] = old->hooks[i];
+ new_ops[j] = (void *)orig_ops[i];
+ j++;
+ }
+ hooks_validate(new);
+out_assign:
+ rcu_assign_pointer(*pp, new);
+ return old;
+}
+
+static struct nf_hook_entries __rcu **nf_hook_entry_head(struct net *net, const struct nf_hook_ops *reg)
{
if (reg->pf != NFPROTO_NETDEV)
return net->nf.hooks[reg->pf]+reg->hooknum;
@@ -76,13 +248,14 @@ static struct nf_hook_entry __rcu **nf_hook_entry_head(struct net *net, const st
return &reg->dev->nf_hooks_ingress;
}
#endif
+ WARN_ON_ONCE(1);
return NULL;
}
int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
{
- struct nf_hook_entry __rcu **pp;
- struct nf_hook_entry *entry, *p;
+ struct nf_hook_entries *p, *new_hooks;
+ struct nf_hook_entries __rcu **pp;
if (reg->pf == NFPROTO_NETDEV) {
#ifndef CONFIG_NETFILTER_INGRESS
@@ -98,23 +271,19 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
if (!pp)
return -EINVAL;
- entry = kmalloc(sizeof(*entry), GFP_KERNEL);
- if (!entry)
- return -ENOMEM;
-
- nf_hook_entry_init(entry, reg);
-
mutex_lock(&nf_hook_mutex);
- /* Find the spot in the list */
- for (; (p = nf_entry_dereference(*pp)) != NULL; pp = &p->next) {
- if (reg->priority < nf_hook_entry_priority(p))
- break;
- }
- rcu_assign_pointer(entry->next, p);
- rcu_assign_pointer(*pp, entry);
+ p = nf_entry_dereference(*pp);
+ new_hooks = nf_hook_entries_grow(p, reg);
+
+ if (!IS_ERR(new_hooks))
+ rcu_assign_pointer(*pp, new_hooks);
mutex_unlock(&nf_hook_mutex);
+ if (IS_ERR(new_hooks))
+ return PTR_ERR(new_hooks);
+
+ hooks_validate(new_hooks);
#ifdef CONFIG_NETFILTER_INGRESS
if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
net_inc_ingress_queue();
@@ -122,48 +291,74 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
#ifdef HAVE_JUMP_LABEL
static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]);
#endif
+ synchronize_net();
+ BUG_ON(p == new_hooks);
+ kvfree(p);
return 0;
}
EXPORT_SYMBOL(nf_register_net_hook);
-static struct nf_hook_entry *
-__nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
+/*
+ * __nf_unregister_net_hook - remove a hook from blob
+ *
+ * @oldp: current address of hook blob
+ * @unreg: hook to unregister
+ *
+ * This cannot fail, hook unregistration must always succeed.
+ * Therefore replace the to-be-removed hook with a dummy hook.
+ */
+static void __nf_unregister_net_hook(struct nf_hook_entries *old,
+ const struct nf_hook_ops *unreg)
{
- struct nf_hook_entry __rcu **pp;
- struct nf_hook_entry *p;
-
- pp = nf_hook_entry_head(net, reg);
- if (WARN_ON_ONCE(!pp))
- return NULL;
+ struct nf_hook_ops **orig_ops;
+ bool found = false;
+ unsigned int i;
- mutex_lock(&nf_hook_mutex);
- for (; (p = nf_entry_dereference(*pp)) != NULL; pp = &p->next) {
- if (nf_hook_entry_ops(p) == reg) {
- rcu_assign_pointer(*pp, p->next);
- break;
- }
- }
- mutex_unlock(&nf_hook_mutex);
- if (!p) {
- WARN(1, "nf_unregister_net_hook: hook not found!\n");
- return NULL;
+ orig_ops = nf_hook_entries_get_hook_ops(old);
+ for (i = 0; i < old->num_hook_entries; i++) {
+ if (orig_ops[i] != unreg)
+ continue;
+ WRITE_ONCE(old->hooks[i].hook, accept_all);
+ WRITE_ONCE(orig_ops[i], &dummy_ops);
+ found = true;
+ break;
}
+
+ if (found) {
#ifdef CONFIG_NETFILTER_INGRESS
- if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
- net_dec_ingress_queue();
+ if (unreg->pf == NFPROTO_NETDEV && unreg->hooknum == NF_NETDEV_INGRESS)
+ net_dec_ingress_queue();
#endif
#ifdef HAVE_JUMP_LABEL
- static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
+ static_key_slow_dec(&nf_hooks_needed[unreg->pf][unreg->hooknum]);
#endif
-
- return p;
+ } else {
+ WARN_ONCE(1, "hook not found, pf %d num %d", unreg->pf, unreg->hooknum);
+ }
}
void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
{
- struct nf_hook_entry *p = __nf_unregister_net_hook(net, reg);
+ struct nf_hook_entries __rcu **pp;
+ struct nf_hook_entries *p;
unsigned int nfq;
+ pp = nf_hook_entry_head(net, reg);
+ if (!pp)
+ return;
+
+ mutex_lock(&nf_hook_mutex);
+
+ p = nf_entry_dereference(*pp);
+ if (WARN_ON_ONCE(!p)) {
+ mutex_unlock(&nf_hook_mutex);
+ return;
+ }
+
+ __nf_unregister_net_hook(p, reg);
+
+ p = __nf_hook_entries_try_shrink(pp);
+ mutex_unlock(&nf_hook_mutex);
if (!p)
return;
@@ -173,7 +368,7 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
nfq = nf_queue_nf_hook_drop(net);
if (nfq)
synchronize_net();
- kfree(p);
+ kvfree(p);
}
EXPORT_SYMBOL(nf_unregister_net_hook);
@@ -200,26 +395,59 @@ EXPORT_SYMBOL(nf_register_net_hooks);
void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg,
unsigned int hookcount)
{
- struct nf_hook_entry *to_free[16];
- unsigned int i, n, nfq;
+ struct nf_hook_entries *to_free[16], *p;
+ struct nf_hook_entries __rcu **pp;
+ unsigned int i, j, n;
+
+ mutex_lock(&nf_hook_mutex);
+ for (i = 0; i < hookcount; i++) {
+ pp = nf_hook_entry_head(net, &reg[i]);
+ if (!pp)
+ continue;
+
+ p = nf_entry_dereference(*pp);
+ if (WARN_ON_ONCE(!p))
+ continue;
+ __nf_unregister_net_hook(p, &reg[i]);
+ }
+ mutex_unlock(&nf_hook_mutex);
do {
n = min_t(unsigned int, hookcount, ARRAY_SIZE(to_free));
- for (i = 0; i < n; i++)
- to_free[i] = __nf_unregister_net_hook(net, &reg[i]);
+ mutex_lock(&nf_hook_mutex);
- synchronize_net();
+ for (i = 0, j = 0; i < hookcount && j < n; i++) {
+ pp = nf_hook_entry_head(net, &reg[i]);
+ if (!pp)
+ continue;
+
+ p = nf_entry_dereference(*pp);
+ if (!p)
+ continue;
+
+ to_free[j] = __nf_hook_entries_try_shrink(pp);
+ if (to_free[j])
+ ++j;
+ }
+
+ mutex_unlock(&nf_hook_mutex);
+
+ if (j) {
+ unsigned int nfq;
- /* need 2nd synchronize_net() if nfqueue is used, skb
- * can get reinjected right before nf_queue_hook_drop()
- */
- nfq = nf_queue_nf_hook_drop(net);
- if (nfq)
synchronize_net();
- for (i = 0; i < n; i++)
- kfree(to_free[i]);
+ /* need 2nd synchronize_net() if nfqueue is used, skb
+ * can get reinjected right before nf_queue_hook_drop()
+ */
+ nfq = nf_queue_nf_hook_drop(net);
+ if (nfq)
+ synchronize_net();
+
+ for (i = 0; i < j; i++)
+ kvfree(to_free[i]);
+ }
reg += n;
hookcount -= n;
@@ -230,16 +458,15 @@ EXPORT_SYMBOL(nf_unregister_net_hooks);
/* Returns 1 if okfn() needs to be executed by the caller,
* -EPERM for NF_DROP, 0 otherwise. Caller must hold rcu_read_lock. */
int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
- struct nf_hook_entry *entry)
+ const struct nf_hook_entries *e, unsigned int s)
{
unsigned int verdict;
int ret;
- do {
- verdict = nf_hook_entry_hookfn(entry, skb, state);
+ for (; s < e->num_hook_entries; s++) {
+ verdict = nf_hook_entry_hookfn(&e->hooks[s], skb, state);
switch (verdict & NF_VERDICT_MASK) {
case NF_ACCEPT:
- entry = rcu_dereference(entry->next);
break;
case NF_DROP:
kfree_skb(skb);
@@ -248,8 +475,8 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
ret = -EPERM;
return ret;
case NF_QUEUE:
- ret = nf_queue(skb, state, &entry, verdict);
- if (ret == 1 && entry)
+ ret = nf_queue(skb, state, e, s, verdict);
+ if (ret == 1)
continue;
return ret;
default:
@@ -258,7 +485,7 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
*/
return 0;
}
- } while (entry);
+ }
return 1;
}
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index e31956b58aba..5cb7cac9177d 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -125,14 +125,12 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
s->cnt.inbytes += skb->len;
u64_stats_update_end(&s->syncp);
- rcu_read_lock();
svc = rcu_dereference(dest->svc);
s = this_cpu_ptr(svc->stats.cpustats);
u64_stats_update_begin(&s->syncp);
s->cnt.inpkts++;
s->cnt.inbytes += skb->len;
u64_stats_update_end(&s->syncp);
- rcu_read_unlock();
s = this_cpu_ptr(ipvs->tot_stats.cpustats);
u64_stats_update_begin(&s->syncp);
@@ -159,14 +157,12 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
s->cnt.outbytes += skb->len;
u64_stats_update_end(&s->syncp);
- rcu_read_lock();
svc = rcu_dereference(dest->svc);
s = this_cpu_ptr(svc->stats.cpustats);
u64_stats_update_begin(&s->syncp);
s->cnt.outpkts++;
s->cnt.outbytes += skb->len;
u64_stats_update_end(&s->syncp);
- rcu_read_unlock();
s = this_cpu_ptr(ipvs->tot_stats.cpustats);
u64_stats_update_begin(&s->syncp);
@@ -1222,7 +1218,6 @@ static struct ip_vs_conn *__ip_vs_rs_conn_out(unsigned int hooknum,
if (!pptr)
return NULL;
- rcu_read_lock();
dest = ip_vs_find_real_service(ipvs, af, iph->protocol,
&iph->saddr, pptr[0]);
if (dest) {
@@ -1237,7 +1232,6 @@ static struct ip_vs_conn *__ip_vs_rs_conn_out(unsigned int hooknum,
pptr[0], pptr[1]);
}
}
- rcu_read_unlock();
return cp;
}
@@ -1689,11 +1683,9 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related,
if (dest) {
struct ip_vs_dest_dst *dest_dst;
- rcu_read_lock();
dest_dst = rcu_dereference(dest->dest_dst);
if (dest_dst)
mtu = dst_mtu(dest_dst->dst_cache);
- rcu_read_unlock();
}
if (mtu > 68 + sizeof(struct iphdr))
mtu -= sizeof(struct iphdr);
@@ -2109,7 +2101,7 @@ ip_vs_forward_icmp_v6(void *priv, struct sk_buff *skb,
#endif
-static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
+static const struct nf_hook_ops ip_vs_ops[] = {
/* After packet filtering, change source only for VS/NAT */
{
.hook = ip_vs_reply4,
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 1fa3c2307b6e..4f940d7eb2f7 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -550,18 +550,15 @@ bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol,
/* Check for "full" addressed entries */
hash = ip_vs_rs_hashkey(af, daddr, dport);
- rcu_read_lock();
hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
if (dest->port == dport &&
dest->af == af &&
ip_vs_addr_equal(af, &dest->addr, daddr) &&
(dest->protocol == protocol || dest->vfwmark)) {
/* HIT */
- rcu_read_unlock();
return true;
}
}
- rcu_read_unlock();
return false;
}
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index fb780be76d15..3e17d32b629d 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -269,13 +269,11 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
* hopefully it will succeed on the retransmitted
* packet.
*/
- rcu_read_lock();
mangled = nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
iph->ihl * 4,
start - data,
end - start,
buf, buf_len);
- rcu_read_unlock();
if (mangled) {
ip_vs_nfct_expect_related(skb, ct, n_cp,
IPPROTO_TCP, 0, 0);
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 3ffad4adaddf..e1efa446b305 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -38,7 +38,6 @@ sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
return 0;
}
- rcu_read_lock();
if (likely(!ip_vs_iph_inverse(iph)))
svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
&iph->daddr, ports[1]);
@@ -53,7 +52,6 @@ sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
* It seems that we are very loaded.
* We have to drop this packet :(
*/
- rcu_read_unlock();
*verdict = NF_DROP;
return 0;
}
@@ -67,11 +65,9 @@ sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
*verdict = ip_vs_leave(svc, skb, pd, iph);
else
*verdict = NF_DROP;
- rcu_read_unlock();
return 0;
}
}
- rcu_read_unlock();
/* NF_ACCEPT */
return 1;
}
@@ -526,12 +522,10 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)
/* Lookup application incarnations and bind the right one */
hash = sctp_app_hashkey(cp->vport);
- rcu_read_lock();
list_for_each_entry_rcu(inc, &ipvs->sctp_apps[hash], p_list) {
if (inc->port == cp->vport) {
if (unlikely(!ip_vs_app_inc_get(inc)))
break;
- rcu_read_unlock();
IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
"%s:%u to app %s on port %u\n",
@@ -544,11 +538,10 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)
cp->app = inc;
if (inc->init_conn)
result = inc->init_conn(inc, cp);