summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/nf_flowtable.txt112
-rw-r--r--include/linux/netfilter/nfnetlink_acct.h3
-rw-r--r--include/linux/netfilter/x_tables.h5
-rw-r--r--include/net/netfilter/nf_conntrack_count.h1
-rw-r--r--include/net/netfilter/nf_conntrack_helper.h3
-rw-r--r--include/net/netfilter/nf_tables.h33
-rw-r--r--include/net/netfilter/xt_rateest.h4
-rw-r--r--include/uapi/linux/netfilter/nf_conntrack_common.h1
-rw-r--r--include/uapi/linux/netfilter/nf_tables.h12
-rw-r--r--include/uapi/linux/netfilter/nfnetlink_conntrack.h10
-rw-r--r--include/uapi/linux/netfilter/xt_connmark.h10
-rw-r--r--include/uapi/linux/netfilter_bridge/ebt_ip.h15
-rw-r--r--include/uapi/linux/netfilter_bridge/ebtables.h16
-rw-r--r--net/bridge/netfilter/Kconfig2
-rw-r--r--net/bridge/netfilter/Makefile1
-rw-r--r--net/bridge/netfilter/ebt_ip.c58
-rw-r--r--net/bridge/netfilter/ebt_stp.c6
-rw-r--r--net/bridge/netfilter/ebtables.c74
-rw-r--r--net/bridge/netfilter/nf_tables_bridge.c79
-rw-r--r--net/ipv4/netfilter/Kconfig4
-rw-r--r--net/ipv4/netfilter/Makefile2
-rw-r--r--net/ipv4/netfilter/arp_tables.c33
-rw-r--r--net/ipv4/netfilter/ip_tables.c31
-rw-r--r--net/ipv4/netfilter/ipt_SYNPROXY.c8
-rw-r--r--net/ipv4/netfilter/ipt_ah.c2
-rw-r--r--net/ipv4/netfilter/nf_tables_arp.c58
-rw-r--r--net/ipv4/netfilter/nf_tables_ipv4.c67
-rw-r--r--net/ipv4/netfilter/nft_chain_nat_ipv4.c20
-rw-r--r--net/ipv4/netfilter/nft_chain_route_ipv4.c6
-rw-r--r--net/ipv6/netfilter/Kconfig2
-rw-r--r--net/ipv6/netfilter/Makefile1
-rw-r--r--net/ipv6/netfilter/ip6_tables.c33
-rw-r--r--net/ipv6/netfilter/ip6t_SYNPROXY.c8
-rw-r--r--net/ipv6/netfilter/nf_tables_ipv6.c65
-rw-r--r--net/ipv6/netfilter/nft_chain_nat_ipv6.c20
-rw-r--r--net/ipv6/netfilter/nft_chain_route_ipv6.c6
-rw-r--r--net/netfilter/Kconfig4
-rw-r--r--net/netfilter/Makefile9
-rw-r--r--net/netfilter/ipset/ip_set_hash_mac.c7
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c4
-rw-r--r--net/netfilter/nf_conncount.c14
-rw-r--r--net/netfilter/nf_conntrack_acct.c6
-rw-r--r--net/netfilter/nf_conntrack_broadcast.c1
-rw-r--r--net/netfilter/nf_conntrack_ecache.c6
-rw-r--r--net/netfilter/nf_conntrack_netbios_ns.c5
-rw-r--r--net/netfilter/nf_conntrack_netlink.c92
-rw-r--r--net/netfilter/nf_conntrack_snmp.c5
-rw-r--r--net/netfilter/nf_conntrack_timestamp.c6
-rw-r--r--net/netfilter/nf_nat_core.c4
-rw-r--r--net/netfilter/nf_nat_ftp.c7
-rw-r--r--net/netfilter/nf_nat_irc.c7
-rw-r--r--net/netfilter/nf_tables_api.c115
-rw-r--r--net/netfilter/nf_tables_inet.c75
-rw-r--r--net/netfilter/nf_tables_netdev.c142
-rw-r--r--net/netfilter/nfnetlink_acct.c3
-rw-r--r--net/netfilter/nfnetlink_cthelper.c25
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c26
-rw-r--r--net/netfilter/nfnetlink_queue.c9
-rw-r--r--net/netfilter/nft_chain_filter.c398
-rw-r--r--net/netfilter/nft_ct.c38
-rw-r--r--net/netfilter/nft_dynset.c5
-rw-r--r--net/netfilter/nft_lookup.c4
-rw-r--r--net/netfilter/nft_objref.c5
-rw-r--r--net/netfilter/x_tables.c191
-rw-r--r--net/netfilter/xt_RATEEST.c91
-rw-r--r--net/netfilter/xt_cluster.c10
-rw-r--r--net/netfilter/xt_connlimit.c4
-rw-r--r--net/netfilter/xt_connmark.c77
-rw-r--r--net/netfilter/xt_hashlimit.c3
-rw-r--r--net/netfilter/xt_limit.c2
-rw-r--r--net/netfilter/xt_nfacct.c2
-rw-r--r--net/netfilter/xt_rateest.c10
-rw-r--r--net/netfilter/xt_string.c1
-rw-r--r--net/netfilter/xt_time.c13
75 files changed, 1383 insertions, 858 deletions
diff --git a/Documentation/networking/nf_flowtable.txt b/Documentation/networking/nf_flowtable.txt
new file mode 100644
index 000000000000..54128c50d508
--- /dev/null
+++ b/Documentation/networking/nf_flowtable.txt
@@ -0,0 +1,112 @@
+Netfilter's flowtable infrastructure
+====================================
+
+This documentation describes the software flowtable infrastructure available in
+Netfilter since Linux kernel 4.16.
+
+Overview
+--------
+
+Initial packets follow the classic forwarding path, once the flow enters the
+established state according to the conntrack semantics (ie. we have seen traffic
+in both directions), then you can decide to offload the flow to the flowtable
+from the forward chain via the 'flow offload' action available in nftables.
+
+Packets that find an entry in the flowtable (ie. flowtable hit) are sent to the
+output netdevice via neigh_xmit(), hence, they bypass the classic forwarding
+path (the visible effect is that you do not see these packets from any of the
+netfilter hooks coming after the ingress). In case of flowtable miss, the packet
+follows the classic forward path.
+
+The flowtable uses a resizable hashtable, lookups are based on the following
+7-tuple selectors: source, destination, layer 3 and layer 4 protocols, source
+and destination ports and the input interface (useful in case there are several
+conntrack zones in place).
+
+Flowtables are populated via the 'flow offload' nftables action, so the user can
+selectively specify what flows are placed into the flow table. Hence, packets
+follow the classic forwarding path unless the user explicitly instruct packets
+to use this new alternative forwarding path via nftables policy.
+
+This is represented in Fig.1, which describes the classic forwarding path
+including the Netfilter hooks and the flowtable fastpath bypass.
+
+ userspace process
+ ^ |
+ | |
+ _____|____ ____\/___
+ / \ / \
+ | input | | output |
+ \__________/ \_________/
+ ^ |
+ | |
+ _________ __________ --------- _____\/_____
+ / \ / \ |Routing | / \
+ --> ingress ---> prerouting ---> |decision| | postrouting |--> neigh_xmit
+ \_________/ \__________/ ---------- \____________/ ^
+ | ^ | | ^ |
+ flowtable | | ____\/___ | |
+ | | | / \ | |
+ __\/___ | --------->| forward |------------ |
+ |-----| | \_________/ |
+ |-----| | 'flow offload' rule |
+ |-----| | adds entry to |
+ |_____| | flowtable |
+ | | |
+ / \ | |
+ /hit\_no_| |
+ \ ? / |
+ \ / |
+ |__yes_________________fastpath bypass ____________________________|
+
+ Fig.1 Netfilter hooks and flowtable interactions
+
+The flowtable entry also stores the NAT configuration, so all packets are
+mangled according to the NAT policy that matches the initial packets that went
+through the classic forwarding path. The TTL is decremented before calling
+neigh_xmit(). Fragmented traffic is passed up to follow the classic forwarding
+path given that the transport selectors are missing, therefore flowtable lookup
+is not possible.
+
+Example configuration
+---------------------
+
+Enabling the flowtable bypass is relatively easy, you only need to create a
+flowtable and add one rule to your forward chain.
+
+ table inet x {
+ flowtable f {
+ hook ingress priority 0 devices = { eth0, eth1 };
+ }
+ chain y {
+ type filter hook forward priority 0; policy accept;
+ ip protocol tcp flow offload @f
+ counter packets 0 bytes 0
+ }
+ }
+
+This example adds the flowtable 'f' to the ingress hook of the eth0 and eth1
+netdevices. You can create as many flowtables as you want in case you need to
+perform resource partitioning. The flowtable priority defines the order in which
+hooks are run in the pipeline, this is convenient in case you already have a
+nftables ingress chain (make sure the flowtable priority is smaller than the
+nftables ingress chain hence the flowtable runs before in the pipeline).
+
+The 'flow offload' action from the forward chain 'y' adds an entry to the
+flowtable for the TCP syn-ack packet coming in the reply direction. Once the
+flow is offloaded, you will observe that the counter rule in the example above
+does not get updated for the packets that are being forwarded through the
+forwarding bypass.
+
+More reading
+------------
+
+This documentation is based on the LWN.net articles [1][2]. Rafal Milecki also
+made a very complete and comprehensive summary called "A state of network
+acceleration" that describes how things were before this infrastructure was
+mailined [3] and it also makes a rough summary of this work [4].
+
+[1] https://lwn.net/Articles/738214/
+[2] https://lwn.net/Articles/742164/
+[3] http://lists.infradead.org/pipermail/lede-dev/2018-January/010830.html
+[4] http://lists.infradead.org/pipermail/lede-dev/2018-January/010829.html
diff --git a/include/linux/netfilter/nfnetlink_acct.h b/include/linux/netfilter/nfnetlink_acct.h
index b4d741195c28..beee8bffe49e 100644
--- a/include/linux/netfilter/nfnetlink_acct.h
+++ b/include/linux/netfilter/nfnetlink_acct.h
@@ -16,6 +16,5 @@ struct nf_acct;
struct nf_acct *nfnl_acct_find_get(struct net *net, const char *filter_name);
void nfnl_acct_put(struct nf_acct *acct);
void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct);
-int nfnl_acct_overquota(struct net *net, const struct sk_buff *skb,
- struct nf_acct *nfacct);
+int nfnl_acct_overquota(struct net *net, struct nf_acct *nfacct);
#endif /* _NFNL_ACCT_H */
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 14529511c4b8..9077b3ebea08 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -281,6 +281,8 @@ int xt_check_entry_offsets(const void *base, const char *elems,
unsigned int target_offset,
unsigned int next_offset);
+int xt_check_table_hooks(const struct xt_table_info *info, unsigned int valid_hooks);
+
unsigned int *xt_alloc_entry_offsets(unsigned int size);
bool xt_find_jump_offset(const unsigned int *offsets,
unsigned int target, unsigned int size);
@@ -301,6 +303,7 @@ int xt_data_to_user(void __user *dst, const void *src,
void *xt_copy_counters_from_user(const void __user *user, unsigned int len,
struct xt_counters_info *info, bool compat);
+struct xt_counters *xt_counters_alloc(unsigned int counters);
struct xt_table *xt_register_table(struct net *net,
const struct xt_table *table,
@@ -509,7 +512,7 @@ void xt_compat_unlock(u_int8_t af);
int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta);
void xt_compat_flush_offsets(u_int8_t af);
-void xt_compat_init_offsets(u_int8_t af, unsigned int number);
+int xt_compat_init_offsets(u8 af, unsigned int number);
int xt_compat_calc_jump(u_int8_t af, unsigned int offset);
int xt_compat_match_offset(const struct xt_match *match);
diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h
index adf8db44cf86..e61184fbfb71 100644
--- a/include/net/netfilter/nf_conntrack_count.h
+++ b/include/net/netfilter/nf_conntrack_count.h
@@ -11,7 +11,6 @@ void nf_conncount_destroy(struct net *net, unsigned int family,
unsigned int nf_conncount_count(struct net *net,
struct nf_conncount_data *data,
const u32 *key,
- unsigned int family,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone);
#endif
diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h
index fc39bbaf107c..32c2a94a219d 100644
--- a/include/net/netfilter/nf_conntrack_helper.h
+++ b/include/net/netfilter/nf_conntrack_helper.h
@@ -132,8 +132,7 @@ void nf_conntrack_helper_pernet_fini(struct net *net);
int nf_conntrack_helper_init(void);
void nf_conntrack_helper_fini(void);
-int nf_conntrack_broadcast_help(struct sk_buff *skb, unsigned int protoff,
- struct nf_conn *ct,
+int nf_conntrack_broadcast_help(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int timeout);
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 663b015dace5..bd2a18d66189 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -434,11 +434,11 @@ static inline struct nft_set *nft_set_container_of(const void *priv)
return (void *)priv - offsetof(struct nft_set, data);
}
-struct nft_set *nft_set_lookup(const struct net *net,
- const struct nft_table *table,
- const struct nlattr *nla_set_name,
- const struct nlattr *nla_set_id,
- u8 genmask);
+struct nft_set *nft_set_lookup_global(const struct net *net,
+ const struct nft_table *table,
+ const struct nlattr *nla_set_name,
+ const struct nlattr *nla_set_id,
+ u8 genmask);
static inline unsigned long nft_set_gc_interval(const struct nft_set *set)
{
@@ -868,7 +868,7 @@ struct nft_chain {
char *name;
};
-enum nft_chain_type {
+enum nft_chain_types {
NFT_CHAIN_T_DEFAULT = 0,
NFT_CHAIN_T_ROUTE,
NFT_CHAIN_T_NAT,
@@ -876,7 +876,7 @@ enum nft_chain_type {
};
/**
- * struct nf_chain_type - nf_tables chain type info
+ * struct nft_chain_type - nf_tables chain type info
*
* @name: name of the type
* @type: numeric identifier
@@ -884,18 +884,22 @@ enum nft_chain_type {
* @owner: module owner
* @hook_mask: mask of valid hooks
* @hooks: array of hook functions
+ * @init: chain initialization function
+ * @free: chain release function
*/
-struct nf_chain_type {
+struct nft_chain_type {
const char *name;
- enum nft_chain_type type;
+ enum nft_chain_types type;
int family;
struct module *owner;
unsigned int hook_mask;
nf_hookfn *hooks[NF_MAX_HOOKS];
+ int (*init)(struct nft_ctx *ctx);
+ void (*free)(struct nft_ctx *ctx);
};
int nft_chain_validate_dependency(const struct nft_chain *chain,
- enum nft_chain_type type);
+ enum nft_chain_types type);
int nft_chain_validate_hooks(const struct nft_chain *chain,
unsigned int hook_flags);
@@ -917,7 +921,7 @@ struct nft_stats {
*/
struct nft_base_chain {
struct nf_hook_ops ops;
- const struct nf_chain_type *type;
+ const struct nft_chain_type *type;
u8 policy;
u8 flags;
struct nft_stats __percpu *stats;
@@ -970,8 +974,8 @@ struct nft_table {
char *name;
};
-int nft_register_chain_type(const struct nf_chain_type *);
-void nft_unregister_chain_type(const struct nf_chain_type *);
+void nft_register_chain_type(const struct nft_chain_type *);
+void nft_unregister_chain_type(const struct nft_chain_type *);
int nft_register_expr(struct nft_expr_type *);
void nft_unregister_expr(struct nft_expr_type *);
@@ -1345,4 +1349,7 @@ struct nft_trans_flowtable {
#define nft_trans_flowtable(trans) \
(((struct nft_trans_flowtable *)trans->data)->flowtable)
+int __init nft_chain_filter_init(void);
+void __exit nft_chain_filter_fini(void);
+
#endif /* _NET_NF_TABLES_H */
diff --git a/include/net/netfilter/xt_rateest.h b/include/net/netfilter/xt_rateest.h
index b1db13772554..832ab69efda5 100644
--- a/include/net/netfilter/xt_rateest.h
+++ b/include/net/netfilter/xt_rateest.h
@@ -21,7 +21,7 @@ struct xt_rateest {
struct net_rate_estimator __rcu *rate_est;
};
-struct xt_rateest *xt_rateest_lookup(const char *name);
-void xt_rateest_put(struct xt_rateest *est);
+struct xt_rateest *xt_rateest_lookup(struct net *net, const char *name);
+void xt_rateest_put(struct net *net, struct xt_rateest *est);
#endif /* _XT_RATEEST_H */
diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h
index 9574bd40870b..c712eb6879f1 100644
--- a/include/uapi/linux/netfilter/nf_conntrack_common.h
+++ b/include/uapi/linux/netfilter/nf_conntrack_common.h
@@ -129,6 +129,7 @@ enum ip_conntrack_events {
IPCT_NATSEQADJ = IPCT_SEQADJ,
IPCT_SECMARK, /* new security mark has been set */
IPCT_LABEL, /* new connlabel has been set */
+ IPCT_SYNPROXY, /* synproxy has been set */
#ifdef __KERNEL__
__IPCT_MAX
#endif
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 66dceee0ae30..6a3d653d5b27 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -909,8 +909,8 @@ enum nft_rt_attributes {
* @NFT_CT_EXPIRATION: relative conntrack expiration time in ms
* @NFT_CT_HELPER: connection tracking helper assigned to conntrack
* @NFT_CT_L3PROTOCOL: conntrack layer 3 protocol
- * @NFT_CT_SRC: conntrack layer 3 protocol source (IPv4/IPv6 address)
- * @NFT_CT_DST: conntrack layer 3 protocol destination (IPv4/IPv6 address)
+ * @NFT_CT_SRC: conntrack layer 3 protocol source (IPv4/IPv6 address, deprecated)
+ * @NFT_CT_DST: conntrack layer 3 protocol destination (IPv4/IPv6 address, deprecated)
* @NFT_CT_PROTOCOL: conntrack layer 4 protocol
* @NFT_CT_PROTO_SRC: conntrack layer 4 protocol source
* @NFT_CT_PROTO_DST: conntrack layer 4 protocol destination
@@ -920,6 +920,10 @@ enum nft_rt_attributes {
* @NFT_CT_AVGPKT: conntrack average bytes per packet
* @NFT_CT_ZONE: conntrack zone
* @NFT_CT_EVENTMASK: ctnetlink events to be generated for this conntrack
+ * @NFT_CT_SRC_IP: conntrack layer 3 protocol source (IPv4 address)
+ * @NFT_CT_DST_IP: conntrack layer 3 protocol destination (IPv4 address)
+ * @NFT_CT_SRC_IP6: conntrack layer 3 protocol source (IPv6 address)
+ * @NFT_CT_DST_IP6: conntrack layer 3 protocol destination (IPv6 address)
*/
enum nft_ct_keys {
NFT_CT_STATE,
@@ -941,6 +945,10 @@ enum nft_ct_keys {
NFT_CT_AVGPKT,
NFT_CT_ZONE,
NFT_CT_EVENTMASK,
+ NFT_CT_SRC_IP,
+ NFT_CT_DST_IP,
+ NFT_CT_SRC_IP6,
+ NFT_CT_DST_IP6,
};
/**
diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h
index 7397e022ce6e..77987111cab0 100644
--- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h
+++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h
@@ -54,6 +54,7 @@ enum ctattr_type {
CTA_MARK_MASK,
CTA_LABELS,
CTA_LABELS_MASK,
+ CTA_SYNPROXY,
__CTA_MAX
};
#define CTA_MAX (__CTA_MAX - 1)
@@ -190,6 +191,15 @@ enum ctattr_natseq {
};
#define CTA_NAT_SEQ_MAX (__CTA_NAT_SEQ_MAX - 1)
+enum ctattr_synproxy {
+ CTA_SYNPROXY_UNSPEC,
+ CTA_SYNPROXY_ISN,
+ CTA_SYNPROXY_ITS,
+ CTA_SYNPROXY_TSOFF,
+ __CTA_SYNPROXY_MAX,
+};
+#define CTA_SYNPROXY_MAX (__CTA_SYNPROXY_MAX - 1)
+
enum ctattr_expe