summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2014-05-22 12:06:23 -0400
committerDavid S. Miller <davem@davemloft.net>2014-05-22 12:06:23 -0400
commit8af750d739620a0028dc767b289b0ed1d61fb38b (patch)
treeb3917c2333e1e9e54106e45ab8bbe7c13daf70d5
parent758bd61aa987e82765bd432f37bd81bd197c4b1a (diff)
parentc7c32e72cbe23cea97c5d87ffcf6e23cc1ec1a65 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nftables
Pablo Neira Ayuso says: ==================== Netfilter/nftables updates for net-next The following patchset contains Netfilter/nftables updates for net-next, most relevantly they are: 1) Add set element update notification via netlink, from Arturo Borrero. 2) Put all object updates in one single message batch that is sent to kernel-space. Before this patch only rules where included in the batch. This series also introduces the generic transaction infrastructure so updates to all objects (tables, chains, rules and sets) are applied in an all-or-nothing fashion, these series from me. 3) Defer release of objects via call_rcu to reduce the time required to commit changes. The assumption is that all objects are destroyed in reverse order to ensure that dependencies betweem them are fulfilled (ie. rules and sets are destroyed first, then chains, and finally tables). 4) Allow to match by bridge port name, from Tomasz Bursztyka. This series include two patches to prepare this new feature. 5) Implement the proper set selection based on the characteristics of the data. The new infrastructure also allows you to specify your preferences in terms of memory and computational complexity so the underlying set type is also selected according to your needs, from Patrick McHardy. 6) Several cleanup patches for nft expressions, including one minor possible compilation breakage due to missing mark support, also from Patrick. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/netfilter/nf_tables.h130
-rw-r--r--include/net/netfilter/nft_meta.h36
-rw-r--r--include/uapi/linux/netfilter/nf_tables.h37
-rw-r--r--net/bridge/Makefile2
-rw-r--r--net/bridge/netfilter/Kconfig14
-rw-r--r--net/bridge/netfilter/Makefile1
-rw-r--r--net/bridge/netfilter/nft_meta_bridge.c139
-rw-r--r--net/netfilter/nf_tables_api.c1236
-rw-r--r--net/netfilter/nft_ct.c96
-rw-r--r--net/netfilter/nft_hash.c54
-rw-r--r--net/netfilter/nft_lookup.c10
-rw-r--r--net/netfilter/nft_meta.c103
-rw-r--r--net/netfilter/nft_rbtree.c21
13 files changed, 1407 insertions, 472 deletions
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index e6bc14d8fa9a..7ee6ce6564ae 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -72,21 +72,23 @@ static inline void nft_data_debug(const struct nft_data *data)
* struct nft_ctx - nf_tables rule/set context
*
* @net: net namespace
- * @skb: netlink skb
- * @nlh: netlink message header
* @afi: address family info
* @table: the table the chain is contained in
* @chain: the chain the rule is contained in
* @nla: netlink attributes
+ * @portid: netlink portID of the original message
+ * @seq: netlink sequence number
+ * @report: notify via unicast netlink message
*/
struct nft_ctx {
struct net *net;
- const struct sk_buff *skb;
- const struct nlmsghdr *nlh;
- const struct nft_af_info *afi;
- const struct nft_table *table;
- const struct nft_chain *chain;
+ struct nft_af_info *afi;
+ struct nft_table *table;
+ struct nft_chain *chain;
const struct nlattr * const *nla;
+ u32 portid;
+ u32 seq;
+ bool report;
};
struct nft_data_desc {
@@ -146,6 +148,44 @@ struct nft_set_iter {
};
/**
+ * struct nft_set_desc - description of set elements
+ *
+ * @klen: key length
+ * @dlen: data length
+ * @size: number of set elements
+ */
+struct nft_set_desc {
+ unsigned int klen;
+ unsigned int dlen;
+ unsigned int size;
+};
+
+/**
+ * enum nft_set_class - performance class
+ *
+ * @NFT_LOOKUP_O_1: constant, O(1)
+ * @NFT_LOOKUP_O_LOG_N: logarithmic, O(log N)
+ * @NFT_LOOKUP_O_N: linear, O(N)
+ */
+enum nft_set_class {
+ NFT_SET_CLASS_O_1,
+ NFT_SET_CLASS_O_LOG_N,
+ NFT_SET_CLASS_O_N,
+};
+
+/**
+ * struct nft_set_estimate - estimation of memory and performance
+ * characteristics
+ *
+ * @size: required memory
+ * @class: lookup performance class
+ */
+struct nft_set_estimate {
+ unsigned int size;
+ enum nft_set_class class;
+};
+
+/**
* struct nft_set_ops - nf_tables set operations
*
* @lookup: look up an element within the set
@@ -174,7 +214,11 @@ struct nft_set_ops {
struct nft_set_iter *iter);
unsigned int (*privsize)(const struct nlattr * const nla[]);
+ bool (*estimate)(const struct nft_set_desc *desc,
+ u32 features,
+ struct nft_set_estimate *est);
int (*init)(const struct nft_set *set,
+ const struct nft_set_desc *desc,
const struct nlattr * const nla[]);
void (*destroy)(const struct nft_set *set);
@@ -194,6 +238,8 @@ void nft_unregister_set(struct nft_set_ops *ops);
* @name: name of the set
* @ktype: key type (numeric type defined by userspace, not used in the kernel)
* @dtype: data type (verdict or numeric type defined by userspace)
+ * @size: maximum set size
+ * @nelems: number of elements
* @ops: set ops
* @flags: set flags
* @klen: key length
@@ -206,6 +252,8 @@ struct nft_set {
char name[IFNAMSIZ];
u32 ktype;
u32 dtype;
+ u32 size;
+ u32 nelems;
/* runtime data below here */
const struct nft_set_ops *ops ____cacheline_aligned;
u16 flags;
@@ -222,6 +270,8 @@ static inline void *nft_set_priv(const struct nft_set *set)
struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
const struct nlattr *nla);
+struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
+ const struct nlattr *nla);
/**
* struct nft_set_binding - nf_tables set binding
@@ -341,18 +391,75 @@ struct nft_rule {
};
/**
- * struct nft_rule_trans - nf_tables rule update in transaction
+ * struct nft_trans - nf_tables object update in transaction
*
+ * @rcu_head: rcu head to defer release of transaction data
* @list: used internally
- * @ctx: rule context
- * @rule: rule that needs to be updated
+ * @msg_type: message type
+ * @ctx: transaction context
+ * @data: internal information related to the transaction
*/
-struct nft_rule_trans {
+struct nft_trans {
+ struct rcu_head rcu_head;
struct list_head list;
+ int msg_type;
struct nft_ctx ctx;
+ char data[0];
+};
+
+struct nft_trans_rule {
struct nft_rule *rule;
};
+#define nft_trans_rule(trans) \
+ (((struct nft_trans_rule *)trans->data)->rule)
+
+struct nft_trans_set {
+ struct nft_set *set;
+ u32 set_id;
+};
+
+#define nft_trans_set(trans) \
+ (((struct nft_trans_set *)trans->data)->set)
+#define nft_trans_set_id(trans) \
+ (((struct nft_trans_set *)trans->data)->set_id)
+
+struct nft_trans_chain {
+ bool update;
+ char name[NFT_CHAIN_MAXNAMELEN];
+ struct nft_stats __percpu *stats;
+ u8 policy;
+};
+
+#define nft_trans_chain_update(trans) \
+ (((struct nft_trans_chain *)trans->data)->update)
+#define nft_trans_chain_name(trans) \
+ (((struct nft_trans_chain *)trans->data)->name)
+#define nft_trans_chain_stats(trans) \
+ (((struct nft_trans_chain *)trans->data)->stats)
+#define nft_trans_chain_policy(trans) \
+ (((struct nft_trans_chain *)trans->data)->policy)
+
+struct nft_trans_table {
+ bool update;
+ bool enable;
+};
+
+#define nft_trans_table_update(trans) \
+ (((struct nft_trans_table *)trans->data)->update)
+#define nft_trans_table_enable(trans) \
+ (((struct nft_trans_table *)trans->data)->enable)
+
+struct nft_trans_elem {
+ struct nft_set *set;
+ struct nft_set_elem elem;
+};
+
+#define nft_trans_elem_set(trans) \
+ (((struct nft_trans_elem *)trans->data)->set)
+#define nft_trans_elem(trans) \
+ (((struct nft_trans_elem *)trans->data)->elem)
+
static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule)
{
return (struct nft_expr *)&rule->data[0];
@@ -385,6 +492,7 @@ static inline void *nft_userdata(const struct nft_rule *rule)
enum nft_chain_flags {
NFT_BASE_CHAIN = 0x1,
+ NFT_CHAIN_INACTIVE = 0x2,
};
/**
diff --git a/include/net/netfilter/nft_meta.h b/include/net/netfilter/nft_meta.h
new file mode 100644
index 000000000000..0ee47c3e2e31
--- /dev/null
+++ b/include/net/netfilter/nft_meta.h
@@ -0,0 +1,36 @@
+#ifndef _NFT_META_H_
+#define _NFT_META_H_
+
+struct nft_meta {
+ enum nft_meta_keys key:8;
+ union {
+ enum nft_registers dreg:8;
+ enum nft_registers sreg:8;
+ };
+};
+
+extern const struct nla_policy nft_meta_policy[];
+
+int nft_meta_get_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[]);
+
+int nft_meta_set_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[]);
+
+int nft_meta_get_dump(struct sk_buff *skb,
+ const struct nft_expr *expr);
+
+int nft_meta_set_dump(struct sk_buff *skb,
+ const struct nft_expr *expr);
+
+void nft_meta_get_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt);
+
+void nft_meta_set_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt);
+
+#endif
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index c88ccbfda5f1..2a88f645a5d8 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -212,6 +212,29 @@ enum nft_set_flags {
};
/**
+ * enum nft_set_policies - set selection policy
+ *
+ * @NFT_SET_POL_PERFORMANCE: prefer high performance over low memory use
+ * @NFT_SET_POL_MEMORY: prefer low memory use over high performance
+ */
+enum nft_set_policies {
+ NFT_SET_POL_PERFORMANCE,
+ NFT_SET_POL_MEMORY,
+};
+
+/**
+ * enum nft_set_desc_attributes - set element description
+ *
+ * @NFTA_SET_DESC_SIZE: number of elements in set (NLA_U32)
+ */
+enum nft_set_desc_attributes {
+ NFTA_SET_DESC_UNSPEC,
+ NFTA_SET_DESC_SIZE,
+ __NFTA_SET_DESC_MAX
+};
+#define NFTA_SET_DESC_MAX (__NFTA_SET_DESC_MAX - 1)
+
+/**
* enum nft_set_attributes - nf_tables set netlink attributes
*
* @NFTA_SET_TABLE: table name (NLA_STRING)
@@ -221,6 +244,9 @@ enum nft_set_flags {
* @NFTA_SET_KEY_LEN: key data length (NLA_U32)
* @NFTA_SET_DATA_TYPE: mapping data type (NLA_U32)
* @NFTA_SET_DATA_LEN: mapping data length (NLA_U32)
+ * @NFTA_SET_POLICY: selection policy (NLA_U32)
+ * @NFTA_SET_DESC: set description (NLA_NESTED)
+ * @NFTA_SET_ID: uniquely identifies a set in a transaction (NLA_U32)
*/
enum nft_set_attributes {
NFTA_SET_UNSPEC,
@@ -231,6 +257,9 @@ enum nft_set_attributes {
NFTA_SET_KEY_LEN,
NFTA_SET_DATA_TYPE,
NFTA_SET_DATA_LEN,
+ NFTA_SET_POLICY,
+ NFTA_SET_DESC,
+ NFTA_SET_ID,
__NFTA_SET_MAX
};
#define NFTA_SET_MAX (__NFTA_SET_MAX - 1)
@@ -266,12 +295,14 @@ enum nft_set_elem_attributes {
* @NFTA_SET_ELEM_LIST_TABLE: table of the set to be changed (NLA_STRING)
* @NFTA_SET_ELEM_LIST_SET: name of the set to be changed (NLA_STRING)
* @NFTA_SET_ELEM_LIST_ELEMENTS: list of set elements (NLA_NESTED: nft_set_elem_attributes)
+ * @NFTA_SET_ELEM_LIST_SET_ID: uniquely identifies a set in a transaction (NLA_U32)
*/
enum nft_set_elem_list_attributes {
NFTA_SET_ELEM_LIST_UNSPEC,
NFTA_SET_ELEM_LIST_TABLE,
NFTA_SET_ELEM_LIST_SET,
NFTA_SET_ELEM_LIST_ELEMENTS,
+ NFTA_SET_ELEM_LIST_SET_ID,
__NFTA_SET_ELEM_LIST_MAX
};
#define NFTA_SET_ELEM_LIST_MAX (__NFTA_SET_ELEM_LIST_MAX - 1)
@@ -457,12 +488,14 @@ enum nft_cmp_attributes {
* @NFTA_LOOKUP_SET: name of the set where to look for (NLA_STRING)
* @NFTA_LOOKUP_SREG: source register of the data to look for (NLA_U32: nft_registers)
* @NFTA_LOOKUP_DREG: destination register (NLA_U32: nft_registers)
+ * @NFTA_LOOKUP_SET_ID: uniquely identifies a set in a transaction (NLA_U32)
*/
enum nft_lookup_attributes {
NFTA_LOOKUP_UNSPEC,
NFTA_LOOKUP_SET,
NFTA_LOOKUP_SREG,
NFTA_LOOKUP_DREG,
+ NFTA_LOOKUP_SET_ID,
__NFTA_LOOKUP_MAX
};
#define NFTA_LOOKUP_MAX (__NFTA_LOOKUP_MAX - 1)
@@ -536,6 +569,8 @@ enum nft_exthdr_attributes {
* @NFT_META_SECMARK: packet secmark (skb->secmark)
* @NFT_META_NFPROTO: netfilter protocol
* @NFT_META_L4PROTO: layer 4 protocol number
+ * @NFT_META_BRI_IIFNAME: packet input bridge interface name
+ * @NFT_META_BRI_OIFNAME: packet output bridge interface name
*/
enum nft_meta_keys {
NFT_META_LEN,
@@ -555,6 +590,8 @@ enum nft_meta_keys {
NFT_META_SECMARK,
NFT_META_NFPROTO,
NFT_META_L4PROTO,
+ NFT_META_BRI_IIFNAME,
+ NFT_META_BRI_OIFNAME,
};
/**
diff --git a/net/bridge/Makefile b/net/bridge/Makefile
index e85498b2f166..906a18b4e74a 100644
--- a/net/bridge/Makefile
+++ b/net/bridge/Makefile
@@ -16,4 +16,4 @@ bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o
bridge-$(CONFIG_BRIDGE_VLAN_FILTERING) += br_vlan.o
-obj-$(CONFIG_BRIDGE_NF_EBTABLES) += netfilter/
+obj-$(CONFIG_BRIDGE_NETFILTER) += netfilter/
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index 5ca74a0e595f..3baf29d34e62 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -2,13 +2,25 @@
# Bridge netfilter configuration
#
#
-config NF_TABLES_BRIDGE
+menuconfig NF_TABLES_BRIDGE
depends on NF_TABLES
+ select BRIDGE_NETFILTER
tristate "Ethernet Bridge nf_tables support"
+if NF_TABLES_BRIDGE
+
+config NFT_BRIDGE_META
+ tristate "Netfilter nf_table bridge meta support"
+ depends on NFT_META
+ help
+ Add support for bridge dedicated meta key.
+
+endif # NF_TABLES_BRIDGE
+
menuconfig BRIDGE_NF_EBTABLES
tristate "Ethernet Bridge tables (ebtables) support"
depends on BRIDGE && NETFILTER
+ select BRIDGE_NETFILTER
select NETFILTER_XTABLES
help
ebtables is a general, extensible frame/packet identification
diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile
index ea7629f58b3d..6f2f3943d66f 100644
--- a/net/bridge/netfilter/Makefile
+++ b/net/bridge/netfilter/Makefile
@@ -3,6 +3,7 @@
#
obj-$(CONFIG_NF_TABLES_BRIDGE) += nf_tables_bridge.o
+obj-$(CONFIG_NFT_BRIDGE_META) += nft_meta_bridge.o
obj-$(CONFIG_BRIDGE_NF_EBTABLES) += ebtables.o
diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c
new file mode 100644
index 000000000000..4f02109d708f
--- /dev/null
+++ b/net/bridge/netfilter/nft_meta_bridge.c
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2014 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nft_meta.h>
+
+#include "../br_private.h"
+
+static void nft_meta_bridge_get_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_meta *priv = nft_expr_priv(expr);
+ const struct net_device *in = pkt->in, *out = pkt->out;
+ struct nft_data *dest = &data[priv->dreg];
+ const struct net_bridge_port *p;
+
+ switch (priv->key) {
+ case NFT_META_BRI_IIFNAME:
+ if (in == NULL || (p = br_port_get_rcu(in)) == NULL)
+ goto err;
+ break;
+ case NFT_META_BRI_OIFNAME:
+ if (out == NULL || (p = br_port_get_rcu(out)) == NULL)
+ goto err;
+ break;
+ default:
+ goto out;
+ }
+
+ strncpy((char *)dest->data, p->br->dev->name, sizeof(dest->data));
+ return;
+out:
+ return nft_meta_get_eval(expr, data, pkt);
+err:
+ data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static int nft_meta_bridge_get_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_meta *priv = nft_expr_priv(expr);
+ int err;
+
+ priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
+ switch (priv->key) {
+ case NFT_META_BRI_IIFNAME:
+ case NFT_META_BRI_OIFNAME:
+ break;
+ default:
+ return nft_meta_get_init(ctx, expr, tb);
+ }
+
+ priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG]));
+ err = nft_validate_output_register(priv->dreg);
+ if (err < 0)
+ return err;
+
+ err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+static struct nft_expr_type nft_meta_bridge_type;
+static const struct nft_expr_ops nft_meta_bridge_get_ops = {
+ .type = &nft_meta_bridge_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
+ .eval = nft_meta_bridge_get_eval,
+ .init = nft_meta_bridge_get_init,
+ .dump = nft_meta_get_dump,
+};
+
+static const struct nft_expr_ops nft_meta_bridge_set_ops = {
+ .type = &nft_meta_bridge_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
+ .eval = nft_meta_set_eval,
+ .init = nft_meta_set_init,
+ .dump = nft_meta_set_dump,
+};
+
+static const struct nft_expr_ops *
+nft_meta_bridge_select_ops(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[])
+{
+ if (tb[NFTA_META_KEY] == NULL)
+ return ERR_PTR(-EINVAL);
+
+ if (tb[NFTA_META_DREG] && tb[NFTA_META_SREG])
+ return ERR_PTR(-EINVAL);
+
+ if (tb[NFTA_META_DREG])
+ return &nft_meta_bridge_get_ops;
+
+ if (tb[NFTA_META_SREG])
+ return &nft_meta_bridge_set_ops;
+
+ return ERR_PTR(-EINVAL);
+}
+
+static struct nft_expr_type nft_meta_bridge_type __read_mostly = {
+ .family = NFPROTO_BRIDGE,
+ .name = "meta",
+ .select_ops = &nft_meta_bridge_select_ops,
+ .policy = nft_meta_policy,
+ .maxattr = NFTA_META_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_meta_bridge_module_init(void)
+{
+ return nft_register_expr(&nft_meta_bridge_type);
+}
+
+static void __exit nft_meta_bridge_module_exit(void)
+{
+ nft_unregister_expr(&nft_meta_bridge_type);
+}
+
+module_init(nft_meta_bridge_module_init);
+module_exit(nft_meta_bridge_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Tomasz Bursztyka <tomasz.bursztyka@linux.intel.com>");
+MODULE_ALIAS_NFT_AF_EXPR(AF_BRIDGE, "meta");
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 3fd159db9f06..047884776586 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -88,6 +88,45 @@ nf_tables_afinfo_lookup(struct net *net, int family, bool autoload)
return ERR_PTR(-EAFNOSUPPORT);
}
+static void nft_ctx_init(struct nft_ctx *ctx,
+ const struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ struct nft_af_info *afi,
+ struct nft_table *table,
+ struct nft_chain *chain,
+ const struct nlattr * const *nla)
+{
+ ctx->net = sock_net(skb->sk);
+ ctx->afi = afi;
+ ctx->table = table;
+ ctx->chain = chain;
+ ctx->nla = nla;
+ ctx->portid = NETLINK_CB(skb).portid;
+ ctx->report = nlmsg_report(nlh);
+ ctx->seq = nlh->nlmsg_seq;
+}
+
+static struct nft_trans *nft_trans_alloc(struct nft_ctx *ctx, int msg_type,
+ u32 size)
+{
+ struct nft_trans *trans;
+
+ trans = kzalloc(sizeof(struct nft_trans) + size, GFP_KERNEL);
+ if (trans == NULL)
+ return NULL;
+
+ trans->msg_type = msg_type;
+ trans->ctx = *ctx;
+
+ return trans;
+}
+
+static void nft_trans_destroy(struct nft_trans *trans)
+{
+ list_del(&trans->list);
+ kfree(trans);
+}
+
/*
* Tables
*/
@@ -197,20 +236,13 @@ nla_put_failure:
return -1;
}
-static int nf_tables_table_notify(const struct sk_buff *oskb,
- const struct nlmsghdr *nlh,
- const struct nft_table *table,
- int event, int family)
+static int nf_tables_table_notify(const struct nft_ctx *ctx, int event)
{
struct sk_buff *skb;
- u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
- u32 seq = nlh ? nlh->nlmsg_seq : 0;
- struct net *net = oskb ? sock_net(oskb->sk) : &init_net;
- bool report;
int err;
- report = nlh ? nlmsg_report(nlh) : false;
- if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+ if (!ctx->report &&
+ !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
return 0;
err = -ENOBUFS;
@@ -218,18 +250,20 @@ static int nf_tables_table_notify(const struct sk_buff *oskb,
if (skb == NULL)
goto err;
- err = nf_tables_fill_table_info(skb, portid, seq, event, 0,
- family, table);
+ err = nf_tables_fill_table_info(skb, ctx->portid, ctx->seq, event, 0,
+ ctx->afi->family, ctx->table);
if (err < 0) {
kfree_skb(skb);
goto err;
}
- err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report,
- GFP_KERNEL);
+ err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+ ctx->report, GFP_KERNEL);
err:
- if (err < 0)
- nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
+ if (err < 0) {
+ nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+ err);
+ }
return err;
}
@@ -269,6 +303,9 @@ done:
return skb->len;
}
+/* Internal table flags */
+#define NFT_TABLE_INACTIVE (1 << 15)
+
static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
@@ -295,6 +332,8 @@ static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb,
table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]);
if (IS_ERR(table))
return PTR_ERR(table);
+ if (table->flags & NFT_TABLE_INACTIVE)
+ return -ENOENT;
skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb2)
@@ -343,7 +382,7 @@ err:
return err;
}
-static int nf_tables_table_disable(const struct nft_af_info *afi,
+static void nf_tables_table_disable(const struct nft_af_info *afi,
struct nft_table *table)
{
struct nft_chain *chain;
@@ -353,45 +392,63 @@ static int nf_tables_table_disable(const struct nft_af_info *afi,
nf_unregister_hooks(nft_base_chain(chain)->ops,
afi->nops);
}
-
- return 0;
}
-static int nf_tables_updtable(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const nla[],
- struct nft_af_info *afi, struct nft_table *table)
+static int nf_tables_updtable(struct nft_ctx *ctx)
{
- const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- int family = nfmsg->nfgen_family, ret = 0;
+ struct nft_trans *trans;
+ u32 flags;
+ int ret = 0;
- if (nla[NFTA_TABLE_FLAGS]) {
- u32 flags;
+ if (!ctx->nla[NFTA_TABLE_FLAGS])
+ return 0;
- flags = ntohl(nla_get_be32(nla[NFTA_TABLE_FLAGS]));
- if (flags & ~NFT_TABLE_F_DORMANT)
- return -EINVAL;
+ flags = ntohl(nla_get_be32(ctx->nla[NFTA_TABLE_FLAGS]));
+ if (flags & ~NFT_TABLE_F_DORMANT)
+ return -EINVAL;
+
+ trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE,
+ sizeof(struct nft_trans_table));
+ if (trans == NULL)
+ return -ENOMEM;
- if ((flags & NFT_TABLE_F_DORMANT) &&
- !(table->flags & NFT_TABLE_F_DORMANT)) {
- ret = nf_tables_table_disable(afi, table);
- if (ret >= 0)
- table->flags |= NFT_TABLE_F_DORMANT;
- } else if (!(flags & NFT_TABLE_F_DORMANT) &&
- table->flags & NFT_TABLE_F_DORMANT) {
- ret = nf_tables_table_enable(afi, table);
- if (ret >= 0)
- table->flags &= ~NFT_TABLE_F_DORMANT;
+ if ((flags & NFT_TABLE_F_DORMANT) &&
+ !(ctx->table->flags & NFT_TABLE_F_DORMANT)) {
+ nft_trans_table_enable(trans) = false;
+ } else if (!(flags & NFT_TABLE_F_DORMANT) &&
+ ctx->table->flags & NFT_TABLE_F_DORMANT) {
+ ret = nf_tables_table_enable(ctx->afi, ctx->table);
+ if (ret >= 0) {
+ ctx->table->flags &= ~NFT_TABLE_F_DORMANT;
+ nft_trans_table_enable(trans) = true;
}
- if (ret < 0)
- goto err;
}
+ if (ret < 0)
+ goto err;
- nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family);
+ nft_trans_table_update(trans) = true;
+ list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+ return 0;
err:
+ nft_trans_destroy(trans);
return ret;
}
+static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
+{
+ struct nft_trans *trans;
+
+ trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_table));
+ if (trans == NULL)
+ return -ENOMEM;
+
+ if (msg_type == NFT_MSG_NEWTABLE)
+ ctx->table->flags |= NFT_TABLE_INACTIVE;
+
+ list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+ return 0;
+}
+
static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
@@ -403,6 +460,8 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
u32 flags = 0;
+ struct nft_ctx ctx;
+ int err;
afi = nf_tables_afinfo_lookup(net, family, true);
if (IS_ERR(afi))
@@ -417,11 +476,15 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
}
if (table != NULL) {
+ if (table->flags & NFT_TABLE_INACTIVE)
+ return -ENOENT;
if (nlh->nlmsg_flags & NLM_F_EXCL)
return -EEXIST;
if (nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
- return nf_tables_updtable(nlsk, skb, nlh, nla, afi, table);
+
+ nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
+ return nf_tables_updtable(&ctx);
}
if (nla[NFTA_TABLE_FLAGS]) {
@@ -444,8 +507,14 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
INIT_LIST_HEAD(&table->sets);
table->flags = flags;
+ nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
+ err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE);
+ if (err < 0) {
+ kfree(table);
+ module_put(afi->owner);
+ return err;
+ }
list_add_tail(&table->list, &afi->tables);
- nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family);
return 0;
}
@@ -457,7 +526,8 @@ static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb,
struct nft_af_info *afi;
struct nft_table *table;
struct net *net = sock_net(skb->sk);
- int family = nfmsg->nfgen_family;
+ int family = nfmsg->nfgen_family, err;
+ struct nft_ctx ctx;
afi = nf_tables_afinfo_lookup(net, family, false);
if (IS_ERR(afi))
@@ -466,17 +536,27 @@ static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb,
table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]);
if (IS_ERR(table))
return PTR_ERR(table);
+ if (table->flags & NFT_TABLE_INACTIVE)
+ return -ENOENT;
if (!list_empty(&table->chains) || !list_empty(&table->sets))
return -EBUSY;
+ nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
+ err = nft_trans_table_add(&ctx, NFT_MSG_DELTABLE);
+ if (err < 0)
+ return err;
+
list_del(&table->list);
- nf_tables_table_notify(skb, nlh, table, NFT_MSG_DELTABLE, family);
- kfree(table);
- module_put(afi->owner);
return 0;
}
+static void nf_tables_table_destroy(struct nft_ctx *ctx)
+{
+ kfree(ctx->table);
+ module_put(ctx->afi->owner);
+}
+
int nft_register_chain_type(const struct nf_chain_type *ctype)
{
int err = 0;
@@ -541,7 +621,7 @@ static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = {
.len = NFT_CHAIN_MAXNAMELEN - 1 },
[NFTA_CHAIN_HOOK] = { .type = NLA_NESTED },
[NFTA_CHAIN_POLICY] = { .type = NLA_U32 },
- [NFTA_CHAIN_TYPE] = { .type = NLA_NUL_STRING },
+ [NFTA_CHAIN_TYPE] = { .type = NLA_STRING },
[NFTA_CHAIN_COUNTERS] = { .type = NLA_NESTED },
};
@@ -637,21 +717,13 @@ nla_put_failure:
return -1;
}
-static int nf_tables_chain_notify(const struct sk_buff *oskb,
- const struct nlmsghdr *nlh,
- const struct nft_table *table,
- const struct nft_chain *chain,
- int event, int family)
+static int nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
{
struct sk_buff *skb;
- u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
- struct net *net = oskb ? sock_net(oskb->sk) : &init_net;
- u32 seq = nlh ? nlh->nlmsg_seq : 0;
- bool report;
int err;
- report = nlh ? nlmsg_report(nlh) : false;
- if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+ if (!ctx->report &&
+ !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
return 0;
err = -ENOBUFS;
@@ -659,18 +731,21 @@ static int nf_tables_chain_notify(const struct sk_buff *oskb,
if (skb == NULL)
goto err;
- err = nf_tables_fill_chain_info(skb, portid, seq, event, 0, family,
- table, chain);
+ err = nf_tables_fill_chain_info(skb, ctx->portid, ctx->seq, event, 0,
+ ctx->afi->family, ctx->table,
+ ctx->chain);
if (err < 0) {
kfree_skb(skb);
goto err;
}
- err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report,
- GFP_KERNEL);
+ err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+ ctx->report, GFP_KERNEL);
err:
- if (err < 0)
- nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
+ if (err < 0) {
+ nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+ err);
+ }
return err;
}
@@ -740,10 +815,14 @@ static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb,
table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]);
if (IS_ERR(table))
return PTR_ERR(table);
+ if (table->flags & NFT_TABLE_INACTIVE)
+ return -ENOENT;
chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]);
if (IS_ERR(chain))
return PTR_ERR(chain);
+ if (chain->flags & NFT_CHAIN_INACTIVE)
+ return -ENOENT;
skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb2)
@@ -767,8 +846,7 @@ static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = {
[NFTA_COUNTER_BYTES] = { .type = NLA_U64 },
};
-static int
-nf_tables_counters(struct nft_base_chain *chain, const struct nlattr *attr)
+static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr)
{
struct nlattr *tb[NFTA_COUNTER_MAX+1];
struct nft_stats __percpu *newstats;
@@ -777,14 +855,14 @@ nf_tables_counters(struct nft_base_chain *chain, const struct nlattr *attr)
err = nla_parse_nested(tb, NFTA_COUNTER_MAX, attr, nft_counter_policy);
if (err < 0)
- return err;
+ return ERR_PTR(err);
if (!tb[NFTA_COUNTER_BYTES] || !tb[NFTA_COUNTER_PACKETS])
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
newstats = alloc_percpu(struct nft_stats);
if (newstats == NULL)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
/* Restore old counters on this cpu, no problem. Per-cpu statistics
* are not exposed to userspace.
@@ -793,6 +871,12 @@ nf_tables_counters(struct nft_base_chain *chain, const struct nlattr *attr)
stats->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES]));
stats->pkts = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]));
+ return newstats;
+}
+
+static void nft_chain_stats_replace(struct nft_base_chain *chain,
+ struct nft_stats __percpu *newstats)
+{
if (chain->stats) {
struct nft_stats __percpu *oldstats =
nft_dereference(chain->stats);
@@ -802,17 +886,43 @@ nf_tables_counters(struct nft_base_chain *chain, const struct nlattr *attr)
free_percpu(oldstats);
} else
rcu_assign_pointer(chain->stats, newstats);
+}
+
+static int nft_trans_chain_add(struct nft_ctx *ctx, int msg_type)
+{
+ struct nft_trans *trans;
+
+ trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_chain));
+ if (trans == NULL)
+ return -ENOMEM;
+
+ if (msg_type == NFT_MSG_NEWCHAIN)
+ ctx->chain->flags |= NFT_CHAIN_INACTIVE;
+ list_add_tail(&trans->list, &ctx->net->nft.commit_list);
return 0;
}
+static void nf_tables_chain_destroy(struct nft_chain *chain)
+{
+ BUG_ON(chain->use > 0);
+
+ if (chain->flags & NFT_BASE_CHAIN) {
+ module_put(nft_base_chain(chain)->type->owner);
+ free_percpu(nft_base_chain(chain)->stats);
+ kfree(nft_base_chain(chai