summaryrefslogtreecommitdiffstats
path: root/net/netfilter
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2016-09-06 12:45:26 -0700
committerDavid S. Miller <davem@davemloft.net>2016-09-06 12:45:26 -0700
commit60175ccdf46ac5076725cb3e66f6bc2e2766ad2b (patch)
treea5433388291a0151a00c54db596d2baf8ce649ab /net/netfilter
parent2f5281ba2a8feaf6f0aee93356f350855bb530fc (diff)
parent779994fa3636d46848edb402fe7517968e036e6f (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
Pablo Neira Ayuso says: ==================== Netfilter updates for net-next The following patchset contains Netfilter updates for your net-next tree. Most relevant updates are the removal of per-conntrack timers to use a workqueue/garbage collection approach instead from Florian Westphal, the hash and numgen expression for nf_tables from Laura Garcia, updates on nf_tables hash set to honor the NLM_F_EXCL flag, removal of ip_conntrack sysctl and many other incremental updates on our Netfilter codebase. More specifically, they are: 1) Retrieve only 4 bytes to fetch ports in case of non-linear skb transport area in dccp, sctp, tcp, udp and udplite protocol conntrackers, from Gao Feng. 2) Missing whitespace on error message in physdev match, from Hangbin Liu. 3) Skip redundant IPv4 checksum calculation in nf_dup_ipv4, from Liping Zhang. 4) Add nf_ct_expires() helper function and use it, from Florian Westphal. 5) Replace opencoded nf_ct_kill() call in IPVS conntrack support, also from Florian. 6) Rename nf_tables set implementation to nft_set_{name}.c 7) Introduce the hash expression to allow arbitrary hashing of selector concatenations, from Laura Garcia Liebana. 8) Remove ip_conntrack sysctl backward compatibility code, this code has been around for long time already, and we have two interfaces to do this already: nf_conntrack sysctl and ctnetlink. 9) Use nf_conntrack_get_ht() helper function whenever possible, instead of opencoding fetch of hashtable pointer and size, patch from Liping Zhang. 10) Add quota expression for nf_tables. 11) Add number generator expression for nf_tables, this supports incremental and random generators that can be combined with maps, very useful for load balancing purpose, again from Laura Garcia Liebana. 12) Fix a typo in a debug message in FTP conntrack helper, from Colin Ian King. 13) Introduce a nft_chain_parse_hook() helper function to parse chain hook configuration, this is used by a follow up patch to perform better chain update validation. 14) Add rhashtable_lookup_get_insert_key() to rhashtable and use it from the nft_set_hash implementation to honor the NLM_F_EXCL flag. 15) Missing nulls check in nf_conntrack from nf_conntrack_tuple_taken(), patch from Florian Westphal. 16) Don't use the DYING bit to know if the conntrack event has been already delivered, instead a state variable to track event re-delivery states, also from Florian. 17) Remove the per-conntrack timer, use the workqueue approach that was discussed during the NFWS, from Florian Westphal. 18) Use the netlink conntrack table dump path to kill stale entries, again from Florian. 19) Add a garbage collector to get rid of stale conntracks, from Florian. 20) Reschedule garbage collector if eviction rate is high. 21) Get rid of the __nf_ct_kill_acct() helper. 22) Use ARPHRD_ETHER instead of hardcoded 1 from ARP logger. 23) Make nf_log_set() interface assertive on unsupported families. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/netfilter')
-rw-r--r--net/netfilter/Kconfig22
-rw-r--r--net/netfilter/Makefile7
-rw-r--r--net/netfilter/ipvs/ip_vs_nfct.c7
-rw-r--r--net/netfilter/nf_conntrack_core.c229
-rw-r--r--net/netfilter/nf_conntrack_ecache.c22
-rw-r--r--net/netfilter/nf_conntrack_ftp.c2
-rw-r--r--net/netfilter/nf_conntrack_netlink.c44
-rw-r--r--net/netfilter/nf_conntrack_pptp.c3
-rw-r--r--net/netfilter/nf_conntrack_proto.c81
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c3
-rw-r--r--net/netfilter/nf_conntrack_proto_generic.c39
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c89
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c131
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c53
-rw-r--r--net/netfilter/nf_conntrack_proto_udplite.c3
-rw-r--r--net/netfilter/nf_conntrack_standalone.c3
-rw-r--r--net/netfilter/nf_log.c8
-rw-r--r--net/netfilter/nf_nat_core.c6
-rw-r--r--net/netfilter/nf_tables_api.c203
-rw-r--r--net/netfilter/nft_hash.c417
-rw-r--r--net/netfilter/nft_numgen.c192
-rw-r--r--net/netfilter/nft_quota.c121
-rw-r--r--net/netfilter/nft_set_hash.c404
-rw-r--r--net/netfilter/nft_set_rbtree.c (renamed from net/netfilter/nft_rbtree.c)12
-rw-r--r--net/netfilter/xt_conntrack.c4
-rw-r--r--net/netfilter/xt_physdev.c4
26 files changed, 1186 insertions, 923 deletions
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 9266ceebd112..e8d56d9a4df2 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -474,6 +474,12 @@ config NFT_META
This option adds the "meta" expression that you can use to match and
to set packet metainformation such as the packet mark.
+config NFT_NUMGEN
+ tristate "Netfilter nf_tables number generator module"
+ help
+ This option adds the number generator expression used to perform
+ incremental counting and random numbers bound to a upper limit.
+
config NFT_CT
depends on NF_CONNTRACK
tristate "Netfilter nf_tables conntrack module"
@@ -481,13 +487,13 @@ config NFT_CT
This option adds the "meta" expression that you can use to match
connection tracking information such as the flow state.
-config NFT_RBTREE
+config NFT_SET_RBTREE
tristate "Netfilter nf_tables rbtree set module"
help
This option adds the "rbtree" set type (Red Black tree) that is used
to build interval-based sets.
-config NFT_HASH
+config NFT_SET_HASH
tristate "Netfilter nf_tables hash set module"
help
This option adds the "hash" set type that is used to build one-way
@@ -542,6 +548,12 @@ config NFT_QUEUE
This is required if you intend to use the userspace queueing
infrastructure (also known as NFQUEUE) from nftables.
+config NFT_QUOTA
+ tristate "Netfilter nf_tables quota module"
+ help
+ This option adds the "quota" expression that you can use to match
+ enforce bytes quotas.
+
config NFT_REJECT
default m if NETFILTER_ADVANCED=n
tristate "Netfilter nf_tables reject support"
@@ -563,6 +575,12 @@ config NFT_COMPAT
x_tables match/target extensions over the nf_tables
framework.
+config NFT_HASH
+ tristate "Netfilter nf_tables hash module"
+ help
+ This option adds the "hash" expression that you can use to perform
+ a hash operation on registers.
+
if NF_TABLES_NETDEV
config NF_DUP_NETDEV
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 69134541d65b..0c8581100ac6 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -80,18 +80,21 @@ obj-$(CONFIG_NF_TABLES_NETDEV) += nf_tables_netdev.o
obj-$(CONFIG_NFT_COMPAT) += nft_compat.o
obj-$(CONFIG_NFT_EXTHDR) += nft_exthdr.o
obj-$(CONFIG_NFT_META) += nft_meta.o
+obj-$(CONFIG_NFT_NUMGEN) += nft_numgen.o
obj-$(CONFIG_NFT_CT) += nft_ct.o
obj-$(CONFIG_NFT_LIMIT) += nft_limit.o
obj-$(CONFIG_NFT_NAT) += nft_nat.o
obj-$(CONFIG_NFT_QUEUE) += nft_queue.o
+obj-$(CONFIG_NFT_QUOTA) += nft_quota.o
obj-$(CONFIG_NFT_REJECT) += nft_reject.o
obj-$(CONFIG_NFT_REJECT_INET) += nft_reject_inet.o
-obj-$(CONFIG_NFT_RBTREE) += nft_rbtree.o
-obj-$(CONFIG_NFT_HASH) += nft_hash.o
+obj-$(CONFIG_NFT_SET_RBTREE) += nft_set_rbtree.o
+obj-$(CONFIG_NFT_SET_HASH) += nft_set_hash.o
obj-$(CONFIG_NFT_COUNTER) += nft_counter.o
obj-$(CONFIG_NFT_LOG) += nft_log.o
obj-$(CONFIG_NFT_MASQ) += nft_masq.o
obj-$(CONFIG_NFT_REDIR) += nft_redir.o
+obj-$(CONFIG_NFT_HASH) += nft_hash.o
# nf_tables netdev
obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o
diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c
index f04fd8df210b..fc230d99aa3b 100644
--- a/net/netfilter/ipvs/ip_vs_nfct.c
+++ b/net/netfilter/ipvs/ip_vs_nfct.c
@@ -281,13 +281,10 @@ void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
h = nf_conntrack_find_get(cp->ipvs->net, &nf_ct_zone_dflt, &tuple);
if (h) {
ct = nf_ct_tuplehash_to_ctrack(h);
- /* Show what happens instead of calling nf_ct_kill() */
- if (del_timer(&ct->timeout)) {
- IP_VS_DBG(7, "%s: ct=%p, deleted conntrack timer for tuple="
+ if (nf_ct_kill(ct)) {
+ IP_VS_DBG(7, "%s: ct=%p, deleted conntrack for tuple="
FMT_TUPLE "\n",
__func__, ct, ARG_TUPLE(&tuple));
- if (ct->timeout.function)
- ct->timeout.function(ct->timeout.data);
} else {
IP_VS_DBG(7, "%s: ct=%p, no conntrack timer for tuple="
FMT_TUPLE "\n",
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index dd2c43abf9e2..ac1db4019d5c 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -72,12 +72,24 @@ EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock);
struct hlist_nulls_head *nf_conntrack_hash __read_mostly;
EXPORT_SYMBOL_GPL(nf_conntrack_hash);
+struct conntrack_gc_work {
+ struct delayed_work dwork;
+ u32 last_bucket;
+ bool exiting;
+};
+
static __read_mostly struct kmem_cache *nf_conntrack_cachep;
static __read_mostly spinlock_t nf_conntrack_locks_all_lock;
-static __read_mostly seqcount_t nf_conntrack_generation;
static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
static __read_mostly bool nf_conntrack_locks_all;
+#define GC_MAX_BUCKETS_DIV 64u
+#define GC_MAX_BUCKETS 8192u
+#define GC_INTERVAL (5 * HZ)
+#define GC_MAX_EVICTS 256u
+
+static struct conntrack_gc_work conntrack_gc_work;
+
void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
{
spin_lock(lock);
@@ -164,7 +176,7 @@ unsigned int nf_conntrack_htable_size __read_mostly;
EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
unsigned int nf_conntrack_max __read_mostly;
-EXPORT_SYMBOL_GPL(nf_conntrack_max);
+seqcount_t nf_conntrack_generation __read_mostly;
DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
@@ -372,7 +384,6 @@ destroy_conntrack(struct nf_conntrack *nfct)
pr_debug("destroy_conntrack(%p)\n", ct);
NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
- NF_CT_ASSERT(!timer_pending(&ct->timeout));
if (unlikely(nf_ct_is_template(ct))) {
nf_ct_tmpl_free(ct);
@@ -435,35 +446,30 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
{
struct nf_conn_tstamp *tstamp;
+ if (test_and_set_bit(IPS_DYING_BIT, &ct->status))
+ return false;
+
tstamp = nf_conn_tstamp_find(ct);
if (tstamp && tstamp->stop == 0)
tstamp->stop = ktime_get_real_ns();
- if (nf_ct_is_dying(ct))
- goto delete;
-
if (nf_conntrack_event_report(IPCT_DESTROY, ct,
portid, report) < 0) {
- /* destroy event was not delivered */
+ /* destroy event was not delivered. nf_ct_put will
+ * be done by event cache worker on redelivery.
+ */
nf_ct_delete_from_lists(ct);
nf_conntrack_ecache_delayed_work(nf_ct_net(ct));
return false;
}
nf_conntrack_ecache_work(nf_ct_net(ct));
- set_bit(IPS_DYING_BIT, &ct->status);
- delete:
nf_ct_delete_from_lists(ct);
nf_ct_put(ct);
return true;
}
EXPORT_SYMBOL_GPL(nf_ct_delete);
-static void death_by_timeout(unsigned long ul_conntrack)
-{
- nf_ct_delete((struct nf_conn *)ul_conntrack, 0, 0);
-}
-
static inline bool
nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
const struct nf_conntrack_tuple *tuple,
@@ -481,22 +487,17 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
net_eq(net, nf_ct_net(ct));
}
-/* must be called with rcu read lock held */
-void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize)
+/* caller must hold rcu readlock and none of the nf_conntrack_locks */
+static void nf_ct_gc_expired(struct nf_conn *ct)
{
- struct hlist_nulls_head *hptr;
- unsigned int sequence, hsz;
+ if (!atomic_inc_not_zero(&ct->ct_general.use))
+ return;
- do {
- sequence = read_seqcount_begin(&nf_conntrack_generation);
- hsz = nf_conntrack_htable_size;
- hptr = nf_conntrack_hash;
- } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+ if (nf_ct_should_gc(ct))
+ nf_ct_kill(ct);
- *hash = hptr;
- *hsize = hsz;
+ nf_ct_put(ct);
}
-EXPORT_SYMBOL_GPL(nf_conntrack_get_ht);
/*
* Warning :
@@ -510,16 +511,24 @@ ____nf_conntrack_find(struct net *net, const struct nf_conntrack_zone *zone,
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_head *ct_hash;
struct hlist_nulls_node *n;
- unsigned int bucket, sequence;
+ unsigned int bucket, hsize;
begin:
- do {
- sequence = read_seqcount_begin(&nf_conntrack_generation);
- bucket = scale_hash(hash);
- ct_hash = nf_conntrack_hash;
- } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+ nf_conntrack_get_ht(&ct_hash, &hsize);
+ bucket = reciprocal_scale(hash, hsize);
hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[bucket], hnnode) {
+ struct nf_conn *ct;
+
+ ct = nf_ct_tuplehash_to_ctrack(h);
+ if (nf_ct_is_expired(ct)) {
+ nf_ct_gc_expired(ct);
+ continue;
+ }
+
+ if (nf_ct_is_dying(ct))
+ continue;
+
if (nf_ct_key_equal(h, tuple, zone, net)) {
NF_CT_STAT_INC_ATOMIC(net, found);
return h;
@@ -618,7 +627,6 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
zone, net))
goto out;
- add_timer(&ct->timeout);
smp_wmb();
/* The caller holds a reference to this object */
atomic_set(&ct->ct_general.use, 2);
@@ -771,8 +779,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
/* Timer relative to confirmation time, not original
setting time, otherwise we'd get timer wrap in
weird delay cases. */
- ct->timeout.expires += jiffies;
- add_timer(&ct->timeout);
+ ct->timeout += nfct_time_stamp;
atomic_inc(&ct->ct_general.use);
ct->status |= IPS_CONFIRMED;
@@ -823,29 +830,41 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone;
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_head *ct_hash;
- unsigned int hash, sequence;
+ unsigned int hash, hsize;
struct hlist_nulls_node *n;
struct nf_conn *ct;
zone = nf_ct_zone(ignored_conntrack);
rcu_read_lock();
- do {
- sequence = read_seqcount_begin(&nf_conntrack_generation);
- hash = hash_conntrack(net, tuple);
- ct_hash = nf_conntrack_hash;
- } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+ begin:
+ nf_conntrack_get_ht(&ct_hash, &hsize);
+ hash = __hash_conntrack(net, tuple, hsize);
hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[hash], hnnode) {
ct = nf_ct_tuplehash_to_ctrack(h);
- if (ct != ignored_conntrack &&
- nf_ct_key_equal(h, tuple, zone, net)) {
+
+ if (ct == ignored_conntrack)
+ continue;
+
+ if (nf_ct_is_expired(ct)) {
+ nf_ct_gc_expired(ct);
+ continue;
+ }
+
+ if (nf_ct_key_equal(h, tuple, zone, net)) {
NF_CT_STAT_INC_ATOMIC(net, found);
rcu_read_unlock();
return 1;
}
NF_CT_STAT_INC_ATOMIC(net, searched);
}
+
+ if (get_nulls_value(n) != hash) {
+ NF_CT_STAT_INC_ATOMIC(net, search_restart);
+ goto begin;
+ }
+
rcu_read_unlock();
return 0;
@@ -867,6 +886,11 @@ static unsigned int early_drop_list(struct net *net,
hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) {
tmp = nf_ct_tuplehash_to_ctrack(h);
+ if (nf_ct_is_expired(tmp)) {
+ nf_ct_gc_expired(tmp);
+ continue;
+ }
+
if (test_bit(IPS_ASSURED_BIT, &tmp->status) ||
!net_eq(nf_ct_net(tmp), net) ||
nf_ct_is_dying(tmp))
@@ -884,7 +908,6 @@ static unsigned int early_drop_list(struct net *net,
*/
if (net_eq(nf_ct_net(tmp), net) &&
nf_ct_is_confirmed(tmp) &&
- del_timer(&tmp->timeout) &&
nf_ct_delete(tmp, 0, 0))
drops++;
@@ -900,14 +923,11 @@ static noinline int early_drop(struct net *net, unsigned int _hash)
for (i = 0; i < NF_CT_EVICTION_RANGE; i++) {
struct hlist_nulls_head *ct_hash;
- unsigned hash, sequence, drops;
+ unsigned int hash, hsize, drops;
rcu_read_lock();
- do {
- sequence = read_seqcount_begin(&nf_conntrack_generation);
- hash = scale_hash(_hash++);
- ct_hash = nf_conntrack_hash;
- } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+ nf_conntrack_get_ht(&ct_hash, &hsize);
+ hash = reciprocal_scale(_hash++, hsize);
drops = early_drop_list(net, &ct_hash[hash]);
rcu_read_unlock();
@@ -921,6 +941,69 @@ static noinline int early_drop(struct net *net, unsigned int _hash)
return false;
}
+static void gc_worker(struct work_struct *work)
+{
+ unsigned int i, goal, buckets = 0, expired_count = 0;
+ unsigned long next_run = GC_INTERVAL;
+ unsigned int ratio, scanned = 0;
+ struct conntrack_gc_work *gc_work;
+
+ gc_work = container_of(work, struct conntrack_gc_work, dwork.work);
+
+ goal = min(nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV, GC_MAX_BUCKETS);
+ i = gc_work->last_bucket;
+
+ do {
+ struct nf_conntrack_tuple_hash *h;
+ struct hlist_nulls_head *ct_hash;
+ struct hlist_nulls_node *n;
+ unsigned int hashsz;
+ struct nf_conn *tmp;
+
+ i++;
+ rcu_read_lock();
+
+ nf_conntrack_get_ht(&ct_hash, &hashsz);
+ if (i >= hashsz)
+ i = 0;
+
+ hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) {
+ tmp = nf_ct_tuplehash_to_ctrack(h);
+
+ scanned++;
+ if (nf_ct_is_expired(tmp)) {
+ nf_ct_gc_expired(tmp);
+ expired_count++;
+ continue;
+ }
+ }
+
+ /* could check get_nulls_value() here and restart if ct
+ * was moved to another chain. But given gc is best-effort
+ * we will just continue with next hash slot.
+ */
+ rcu_read_unlock();
+ cond_resched_rcu_qs();
+ } while (++buckets < goal &&
+ expired_count < GC_MAX_EVICTS);
+
+ if (gc_work->exiting)
+ return;
+
+ ratio = scanned ? expired_count * 100 / scanned : 0;
+ if (ratio >= 90)
+ next_run = 0;
+
+ gc_work->last_bucket = i;
+ schedule_delayed_work(&gc_work->dwork, next_run);
+}
+
+static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
+{
+ INIT_DELAYED_WORK(&gc_work->dwork, gc_worker);
+ gc_work->exiting = false;
+}
+
static struct nf_conn *
__nf_conntrack_alloc(struct net *net,
const struct nf_conntrack_zone *zone,
@@ -957,8 +1040,6 @@ __nf_conntrack_alloc(struct net *net,
/* save hash for reusing when confirming */
*(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash;
ct->status = 0;
- /* Don't set timer yet: wait for confirmation */
- setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct);
write_pnet(&ct->ct_net, net);
memset(&ct->__nfct_init_offset[0], 0,
offsetof(struct nf_conn, proto) -
@@ -1332,7 +1413,6 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
unsigned long extra_jiffies,
int do_acct)
{
- NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct);
NF_CT_ASSERT(skb);
/* Only update if this is not a fixed timeout */
@@ -1340,39 +1420,25 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
goto acct;
/* If not in hash table, timer will not be active yet */
- if (!nf_ct_is_confirmed(ct)) {
- ct->timeout.expires = extra_jiffies;
- } else {
- unsigned long newtime = jiffies + extra_jiffies;
-
- /* Only update the timeout if the new timeout is at least
- HZ jiffies from the old timeout. Need del_timer for race
- avoidance (may already be dying). */
- if (newtime - ct->timeout.expires >= HZ)
- mod_timer_pending(&ct->timeout, newtime);
- }
+ if (nf_ct_is_confirmed(ct))
+ extra_jiffies += nfct_time_stamp;
+ ct->timeout = extra_jiffies;
acct:
if (do_acct)
nf_ct_acct_update(ct, ctinfo, skb->len);
}
EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
-bool __nf_ct_kill_acct(struct nf_conn *ct,
- enum ip_conntrack_info ctinfo,
- const struct sk_buff *skb,
- int do_acct)
+bool nf_ct_kill_acct(struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ const struct sk_buff *skb)
{
- if (do_acct)
- nf_ct_acct_update(ct, ctinfo, skb->len);
+ nf_ct_acct_update(ct, ctinfo, skb->len);
- if (del_timer(&ct->timeout)) {
- ct->timeout.function((unsigned long)ct);
- return true;
- }
- return false;
+ return nf_ct_delete(ct, 0, 0);
}
-EXPORT_SYMBOL_GPL(__nf_ct_kill_acct);
+EXPORT_SYMBOL_GPL(nf_ct_kill_acct);
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
@@ -1505,11 +1571,8 @@ void nf_ct_iterate_cleanup(struct net *net,
while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) {
/* Time to push up daises... */
- if (del_timer(&ct->timeout))
- nf_ct_delete(ct, portid, report);
-
- /* ... else the timer will get him soon. */
+ nf_ct_delete(ct, portid, report);
nf_ct_put(ct);
cond_resched();
}
@@ -1545,6 +1608,7 @@ static int untrack_refs(void)
void nf_conntrack_cleanup_start(void)
{
+ conntrack_gc_work.exiting = true;
RCU_INIT_POINTER(ip_ct_attach, NULL);
}
@@ -1554,6 +1618,7 @@ void nf_conntrack_cleanup_end(void)
while (untrack_refs() > 0)
schedule();
+ cancel_delayed_work_sync(&conntrack_gc_work.dwork);
nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size);
nf_conntrack_proto_fini();
@@ -1828,6 +1893,10 @@ int nf_conntrack_init_start(void)
}
/* - and look it like as a confirmed connection */
nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);
+
+ conntrack_gc_work_init(&conntrack_gc_work);
+ schedule_delayed_work(&conntrack_gc_work.dwork, GC_INTERVAL);
+
return 0;
err_proto:
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index d28011b42845..da9df2d56e66 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -49,8 +49,13 @@ static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu)
hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) {
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+ struct nf_conntrack_ecache *e;
- if (nf_ct_is_dying(ct))
+ if (!nf_ct_is_confirmed(ct))
+ continue;
+
+ e = nf_ct_ecache_find(ct);
+ if (!e || e->state != NFCT_ECACHE_DESTROY_FAIL)
continue;
if (nf_conntrack_event(IPCT_DESTROY, ct)) {
@@ -58,8 +63,7 @@ static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu)
break;
}
- /* we've got the event delivered, now it's dying */
- set_bit(IPS_DYING_BIT, &ct->status);
+ e->state = NFCT_ECACHE_DESTROY_SENT;
refs[evicted] = ct;
if (++evicted >= ARRAY_SIZE(refs)) {
@@ -130,7 +134,7 @@ int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct,
if (!e)
goto out_unlock;
- if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct)) {
+ if (nf_ct_is_confirmed(ct)) {
struct nf_ct_event item = {
.ct = ct,
.portid = e->portid ? e->portid : portid,
@@ -150,11 +154,13 @@ int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct,
* triggered by a process, we store the PORTID
* to include it in the retransmission.
*/
- if (eventmask & (1 << IPCT_DESTROY) &&
- e->portid == 0 && portid != 0)
- e->portid = portid;
- else
+ if (eventmask & (1 << IPCT_DESTROY)) {
+ if (e->portid == 0 && portid != 0)
+ e->portid = portid;
+ e->state = NFCT_ECACHE_DESTROY_FAIL;
+ } else {
e->missed |= eventmask;
+ }
} else {
e->missed &= ~missed;
}
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 43147005bea3..b6934b5edf7a 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -237,7 +237,7 @@ static int try_eprt(const char *data, size_t dlen, struct nf_conntrack_man *cmd,
}
delim = data[0];
if (isdigit(delim) || delim < 33 || delim > 126 || data[2] != delim) {
- pr_debug("try_eprt: invalid delimitter.\n");
+ pr_debug("try_eprt: invalid delimiter.\n");
return 0;
}
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index fdfc71f416b7..c052b712c49f 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -149,10 +149,7 @@ nla_put_failure:
static int ctnetlink_dump_timeout(struct sk_buff *skb, const struct nf_conn *ct)
{
- long timeout = ((long)ct->timeout.expires - (long)jiffies) / HZ;
-
- if (timeout < 0)
- timeout = 0;
+ long timeout = nf_ct_expires(ct) / HZ;
if (nla_put_be32(skb, CTA_TIMEOUT, htonl(timeout)))
goto nla_put_failure;
@@ -818,14 +815,23 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
struct hlist_nulls_node *n;
struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
u_int8_t l3proto = nfmsg->nfgen_family;
- int res;
+ struct nf_conn *nf_ct_evict[8];
+ int res, i;
spinlock_t *lockp;
last = (struct nf_conn *)cb->args[1];
+ i = 0;
local_bh_disable();
for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) {
restart:
+ while (i) {
+ i--;
+ if (nf_ct_should_gc(nf_ct_evict[i]))
+ nf_ct_kill(nf_ct_evict[i]);
+ nf_ct_put(nf_ct_evict[i]);
+ }
+
lockp = &nf_conntrack_locks[cb->args[0] % CONNTRACK_LOCKS];
nf_conntrack_lock(lockp);
if (cb->args[0] >= nf_conntrack_htable_size) {
@@ -837,6 +843,13 @@ restart:
if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
continue;
ct = nf_ct_tuplehash_to_ctrack(h);
+ if (nf_ct_is_expired(ct)) {
+ if (i < ARRAY_SIZE(nf_ct_evict) &&
+ atomic_inc_not_zero(&ct->ct_general.use))
+ nf_ct_evict[i++] = ct;
+ continue;
+ }
+
if (!net_eq(net, nf_ct_net(ct)))
continue;
@@ -878,6 +891,13 @@ out:
if (last)
nf_ct_put(last);
+ while (i) {
+ i--;
+ if (nf_ct_should_gc(nf_ct_evict[i]))
+ nf_ct_kill(nf_ct_evict[i]);
+ nf_ct_put(nf_ct_evict[i]);
+ }
+
return skb->len;
}
@@ -1147,9 +1167,7 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
}
}
- if (del_timer(&ct->timeout))
- nf_ct_delete(ct, NETLINK_CB(skb).portid, nlmsg_report(nlh));
-
+ nf_ct_delete(ct, NETLINK_CB(skb).portid, nlmsg_report(nlh));
nf_ct_put(ct);
return 0;
@@ -1517,11 +1535,10 @@ static int ctnetlink_change_timeout(struct nf_conn *ct,
{
u_int32_t timeout = ntohl(nla_get_be32(cda[CTA_TIMEOUT]));
- if (!del_timer(&ct->timeout))
- return -ETIME;
+ ct->timeout = nfct_time_stamp + timeout * HZ;
- ct->timeout.expires = jiffies + timeout * HZ;
- add_timer(&ct->timeout);
+ if (test_bit(IPS_DYING_BIT, &ct->status))
+ return -ETIME;
return 0;
}
@@ -1719,9 +1736,8 @@ ctnetlink_create_conntrack(struct net *net,
if (!cda[CTA_TIMEOUT])
goto err1;
- ct->timeout.expires = ntohl(nla_get_be32(cda[CTA_TIMEOUT]));
- ct->timeout.expires = jiffies + ct->timeout.expires * HZ;
+ ct->timeout = nfct_time_stamp + ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ;
rcu_read_lock();
if (cda[CTA_HELP]) {
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 5588c7ae1ac2..f60a4755d71e 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -157,8 +157,7 @@ static int destroy_sibling_or_exp(struct net *net, struct nf_conn *ct,
pr_debug("setting timeout of conntrack %p to 0\n", sibling);
sibling->proto.gre.timeout = 0;
sibling->proto.gre.stream_timeout = 0;
- if (del_timer(&sibling->timeout))
- sibling->timeout.function((unsigned long)sibling);
+ nf_ct_kill(sibling);
nf_ct_put(sibling);
return 1;
} else {
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index b65d5864b6d9..8d2c7d8c666a 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -159,54 +159,6 @@ static int kill_l4proto(struct nf_conn *i, void *data)
nf_ct_l3num(i) == l4proto->l3proto;
}
-static struct nf_ip_net *nf_ct_l3proto_net(struct net *net,
- struct nf_conntrack_l3proto *l3proto)
-{
- if (l3proto->l3proto == PF_INET)
- return &net->ct.nf_ct_proto;
- else
- return NULL;
-}
-
-static int nf_ct_l3proto_register_sysctl(struct net *net,
- struct nf_conntrack_l3proto *l3proto)
-{
- int err = 0;
- struct nf_ip_net *in = nf_ct_l3proto_net(net, l3proto);
- /* nf_conntrack_l3proto_ipv6 doesn't support sysctl */
- if (in == NULL)
- return 0;
-
-#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
- if (in->ctl_table != NULL) {
- err = nf_ct_register_sysctl(net,
- &in->ctl_table_header,
- l3proto->ctl_table_path,
- in->ctl_table);
- if (err < 0) {
- kfree(in->ctl_table);
- in->ctl_table = NULL;
- }
- }
-#endif
- return err;
-}
-
-static void nf_ct_l3proto_unregister_sysctl(struct net *net,
- struct nf_conntrack_l3proto *l3proto)
-{
- struct nf_ip_net *in = nf_ct_l3proto_net(net, l3proto);
-
- if (in == NULL)
- return;
-#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
- if (in->ctl_table_header != NULL)
- nf_ct_unregister_sysctl(&in->ctl_table_header,
- &in->ctl_table,
- 0);
-#endif
-}
-
int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto)
{
int ret = 0;
@@ -241,7 +193,7 @@ EXPORT_SYMBOL_GPL(nf_ct_l3proto_register);
int nf_ct_l3proto_pernet_register(struct net *net,
struct nf_conntrack_l3proto *proto)
{
- int ret = 0;
+ int ret;
if (proto->init_net) {
ret = proto->init_net(net);
@@ -249,7 +201,7 @@ int nf_ct_l3proto_pernet_register(struct net *net,
return ret;
}
- return nf_ct_l3proto_register_sysctl(net, proto);
+ return 0;
}
EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_register);
@@ -272,8 +224,6 @@ EXPORT_SYMBOL_GPL(nf_ct_l3proto_unregister);
void nf_ct_l3proto_pernet_unregister(struct net *net,
struct nf_conntrack_l3proto *proto)
{
- nf_ct_l3proto_unregister_sysctl(net, proto);
-
/* Remove all contrack entries for this protocol */
nf_ct_iterate_cleanup(net, kill_l3proto, proto, 0, 0);
}
@@ -312,26 +262,6 @@ int nf_ct_l4proto_register_sysctl(struct net *net,
}
}
}
-#ifdef CONFIG_NF_CONNTRACK_PROC_COM