summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2015-04-09 14:46:04 -0400
committerDavid S. Miller <davem@davemloft.net>2015-04-09 14:46:04 -0400
commitca69d7102fde3e22b09536867ba14ace84ea80e1 (patch)
tree6cfba19b3885c0ce11df838cdda53cb556e2c46f /net
parent3ab1a30fbded99936956442d8cf8f379064e4a26 (diff)
parentaadd51aa71f8d013c818a312bb2a0c5714830dbc (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
Pablo Neira Ayuso says: ==================== Netfilter updates for net-next The following patchset contains Netfilter updates for your net-next tree. They are: * nf_tables set timeout infrastructure from Patrick Mchardy. 1) Add support for set timeout support. 2) Add support for set element timeouts using the new set extension infrastructure. 4) Add garbage collection helper functions to get rid of stale elements. Elements are accumulated in a batch that are asynchronously released via RCU when the batch is full. 5) Add garbage collection synchronization helpers. This introduces a new element busy bit to address concurrent access from the netlink API and the garbage collector. 5) Add timeout support for the nft_hash set implementation. The garbage collector peridically checks for stale elements from the workqueue. * iptables/nftables cgroup fixes: 6) Ignore non full-socket objects from the input path, otherwise cgroup match may crash, from Daniel Borkmann. 7) Fix cgroup in nf_tables. 8) Save some cycles from xt_socket by skipping packet header parsing when skb->sk is already set because of early demux. Also from Daniel. * br_netfilter updates from Florian Westphal. 9) Save frag_max_size and restore it from the forward path too. 10) Use a per-cpu area to restore the original source MAC address when traffic is DNAT'ed. 11) Add helper functions to access physical devices. 12) Use these new physdev helper function from xt_physdev. 13) Add another nf_bridge_info_get() helper function to fetch the br_netfilter state information. 14) Annotate original layer 2 protocol number in nf_bridge info, instead of using kludgy flags. 15) Also annotate the pkttype mangling when the packet travels back and forth from the IP to the bridge layer, instead of using a flag. * More nf_tables set enhancement from Patrick: 16) Fix possible usage of set variant that doesn't support timeouts. 17) Avoid spurious "set is full" errors from Netlink API when there are pending stale elements scheduled to be released. 18) Restrict loop checks to set maps. 19) Add support for dynamic set updates from the packet path. 20) Add support to store optional user data (eg. comments) per set element. BTW, I have also pulled net-next into nf-next to anticipate the conflict resolution between your okfn() signature changes and Florian's br_netfilter updates. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/bridge/br_netfilter.c144
-rw-r--r--net/ipv4/netfilter/nf_reject_ipv4.c4
-rw-r--r--net/ipv6/netfilter/nf_reject_ipv6.c4
-rw-r--r--net/netfilter/Makefile2
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c32
-rw-r--r--net/netfilter/nf_log_common.c5
-rw-r--r--net/netfilter/nf_queue.c18
-rw-r--r--net/netfilter/nf_tables_api.c186
-rw-r--r--net/netfilter/nf_tables_core.c7
-rw-r--r--net/netfilter/nfnetlink_log.c17
-rw-r--r--net/netfilter/nfnetlink_queue_core.c28
-rw-r--r--net/netfilter/nft_dynset.c218
-rw-r--r--net/netfilter/nft_hash.c117
-rw-r--r--net/netfilter/nft_lookup.c2
-rw-r--r--net/netfilter/nft_meta.c5
-rw-r--r--net/netfilter/xt_cgroup.c2
-rw-r--r--net/netfilter/xt_physdev.c34
-rw-r--r--net/netfilter/xt_socket.c95
18 files changed, 747 insertions, 173 deletions
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index acd31c9f2116..ab55e2472beb 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -111,6 +111,24 @@ static inline __be16 pppoe_proto(const struct sk_buff *skb)
pppoe_proto(skb) == htons(PPP_IPV6) && \
brnf_filter_pppoe_tagged)
+/* largest possible L2 header, see br_nf_dev_queue_xmit() */
+#define NF_BRIDGE_MAX_MAC_HEADER_LENGTH (PPPOE_SES_HLEN + ETH_HLEN)
+
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
+struct brnf_frag_data {
+ char mac[NF_BRIDGE_MAX_MAC_HEADER_LENGTH];
+ u8 encap_size;
+ u8 size;
+};
+
+static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage);
+#endif
+
+static struct nf_bridge_info *nf_bridge_info_get(const struct sk_buff *skb)
+{
+ return skb->nf_bridge;
+}
+
static inline struct rtable *bridge_parent_rtable(const struct net_device *dev)
{
struct net_bridge_port *port;
@@ -189,14 +207,6 @@ static inline void nf_bridge_pull_encap_header_rcsum(struct sk_buff *skb)
skb->network_header += len;
}
-static inline void nf_bridge_save_header(struct sk_buff *skb)
-{
- int header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
-
- skb_copy_from_linear_data_offset(skb, -header_size,
- skb->nf_bridge->data, header_size);
-}
-
/* When handing a packet over to the IP layer
* check whether we have a skb that is in the
* expected format
@@ -252,10 +262,16 @@ drop:
static void nf_bridge_update_protocol(struct sk_buff *skb)
{
- if (skb->nf_bridge->mask & BRNF_8021Q)
+ switch (skb->nf_bridge->orig_proto) {
+ case BRNF_PROTO_8021Q:
skb->protocol = htons(ETH_P_8021Q);
- else if (skb->nf_bridge->mask & BRNF_PPPoE)
+ break;
+ case BRNF_PROTO_PPPOE:
skb->protocol = htons(ETH_P_PPP_SES);
+ break;
+ case BRNF_PROTO_UNCHANGED:
+ break;
+ }
}
/* PF_BRIDGE/PRE_ROUTING *********************************************/
@@ -263,12 +279,12 @@ static void nf_bridge_update_protocol(struct sk_buff *skb)
* bridge PRE_ROUTING hook. */
static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb)
{
- struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+ struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
struct rtable *rt;
- if (nf_bridge->mask & BRNF_PKT_TYPE) {
+ if (nf_bridge->pkt_otherhost) {
skb->pkt_type = PACKET_OTHERHOST;
- nf_bridge->mask ^= BRNF_PKT_TYPE;
+ nf_bridge->pkt_otherhost = false;
}
nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING;
@@ -296,7 +312,6 @@ static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb)
*/
static int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb)
{
- struct nf_bridge_info *nf_bridge = skb->nf_bridge;
struct neighbour *neigh;
struct dst_entry *dst;
@@ -306,6 +321,7 @@ static int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb)
dst = skb_dst(skb);
neigh = dst_neigh_lookup_skb(dst, skb);
if (neigh) {
+ struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
int ret;
if (neigh->hh.hh_len) {
@@ -319,7 +335,7 @@ static int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb)
*/
skb_copy_from_linear_data_offset(skb,
-(ETH_HLEN-ETH_ALEN),
- skb->nf_bridge->data,
+ nf_bridge->neigh_header,
ETH_HLEN-ETH_ALEN);
/* tell br_dev_xmit to continue with forwarding */
nf_bridge->mask |= BRNF_BRIDGED_DNAT;
@@ -392,7 +408,7 @@ static int br_nf_pre_routing_finish(struct sock *sk, struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
struct iphdr *iph = ip_hdr(skb);
- struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+ struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
struct rtable *rt;
int err;
int frag_max_size;
@@ -400,9 +416,9 @@ static int br_nf_pre_routing_finish(struct sock *sk, struct sk_buff *skb)
frag_max_size = IPCB(skb)->frag_max_size;
BR_INPUT_SKB_CB(skb)->frag_max_size = frag_max_size;
- if (nf_bridge->mask & BRNF_PKT_TYPE) {
+ if (nf_bridge->pkt_otherhost) {
skb->pkt_type = PACKET_OTHERHOST;
- nf_bridge->mask ^= BRNF_PKT_TYPE;
+ nf_bridge->pkt_otherhost = false;
}
nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING;
if (dnat_took_place(skb)) {
@@ -485,20 +501,21 @@ static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct
/* Some common code for IPv4/IPv6 */
static struct net_device *setup_pre_routing(struct sk_buff *skb)
{
- struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+ struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
if (skb->pkt_type == PACKET_OTHERHOST) {
skb->pkt_type = PACKET_HOST;
- nf_bridge->mask |= BRNF_PKT_TYPE;
+ nf_bridge->pkt_otherhost = true;
}
nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING;
nf_bridge->physindev = skb->dev;
skb->dev = brnf_get_logical_dev(skb, skb->dev);
+
if (skb->protocol == htons(ETH_P_8021Q))
- nf_bridge->mask |= BRNF_8021Q;
+ nf_bridge->orig_proto = BRNF_PROTO_8021Q;
else if (skb->protocol == htons(ETH_P_PPP_SES))
- nf_bridge->mask |= BRNF_PPPoE;
+ nf_bridge->orig_proto = BRNF_PROTO_PPPOE;
/* Must drop socket now because of tproxy. */
skb_orphan(skb);
@@ -680,14 +697,21 @@ static unsigned int br_nf_local_in(const struct nf_hook_ops *ops,
/* PF_BRIDGE/FORWARD *************************************************/
static int br_nf_forward_finish(struct sock *sk, struct sk_buff *skb)
{
- struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+ struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
struct net_device *in;
if (!IS_ARP(skb) && !IS_VLAN_ARP(skb)) {
+ int frag_max_size;
+
+ if (skb->protocol == htons(ETH_P_IP)) {
+ frag_max_size = IPCB(skb)->frag_max_size;
+ BR_INPUT_SKB_CB(skb)->frag_max_size = frag_max_size;
+ }
+
in = nf_bridge->physindev;
- if (nf_bridge->mask & BRNF_PKT_TYPE) {
+ if (nf_bridge->pkt_otherhost) {
skb->pkt_type = PACKET_OTHERHOST;
- nf_bridge->mask ^= BRNF_PKT_TYPE;
+ nf_bridge->pkt_otherhost = false;
}
nf_bridge_update_protocol(skb);
} else {
@@ -722,6 +746,10 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops,
if (!nf_bridge_unshare(skb))
return NF_DROP;
+ nf_bridge = nf_bridge_info_get(skb);
+ if (!nf_bridge)
+ return NF_DROP;
+
parent = bridge_parent(state->out);
if (!parent)
return NF_DROP;
@@ -735,14 +763,19 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops,
nf_bridge_pull_encap_header(skb);
- nf_bridge = skb->nf_bridge;
if (skb->pkt_type == PACKET_OTHERHOST) {
skb->pkt_type = PACKET_HOST;
- nf_bridge->mask |= BRNF_PKT_TYPE;
+ nf_bridge->pkt_otherhost = true;
}
- if (pf == NFPROTO_IPV4 && br_parse_ip_options(skb))
- return NF_DROP;
+ if (pf == NFPROTO_IPV4) {
+ int frag_max = BR_INPUT_SKB_CB(skb)->frag_max_size;
+
+ if (br_parse_ip_options(skb))
+ return NF_DROP;
+
+ IPCB(skb)->frag_max_size = frag_max;
+ }
nf_bridge->physoutdev = skb->dev;
if (pf == NFPROTO_IPV4)
@@ -792,30 +825,22 @@ static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops,
}
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
-static bool nf_bridge_copy_header(struct sk_buff *skb)
+static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb)
{
+ struct brnf_frag_data *data;
int err;
- unsigned int header_size;
- nf_bridge_update_protocol(skb);
- header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
- err = skb_cow_head(skb, header_size);
- if (err)
- return false;
-
- skb_copy_to_linear_data_offset(skb, -header_size,
- skb->nf_bridge->data, header_size);
- __skb_push(skb, nf_bridge_encap_header_len(skb));
- return true;
-}
+ data = this_cpu_ptr(&brnf_frag_data_storage);
+ err = skb_cow_head(skb, data->size);
-static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb)
-{
- if (!nf_bridge_copy_header(skb)) {
+ if (err) {
kfree_skb(skb);
return 0;
}
+ skb_copy_to_linear_data_offset(skb, -data->size, data->mac, data->size);
+ __skb_push(skb, data->encap_size);
+
return br_dev_queue_push_xmit(sk, skb);
}
@@ -833,14 +858,27 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb)
* boundaries by preserving frag_list rather than refragmenting.
*/
if (skb->len + mtu_reserved > skb->dev->mtu) {
+ struct brnf_frag_data *data;
+
frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
if (br_parse_ip_options(skb))
/* Drop invalid packet */
return NF_DROP;
IPCB(skb)->frag_max_size = frag_max_size;
+
+ nf_bridge_update_protocol(skb);
+
+ data = this_cpu_ptr(&brnf_frag_data_storage);
+ data->encap_size = nf_bridge_encap_header_len(skb);
+ data->size = ETH_HLEN + data->encap_size;
+
+ skb_copy_from_linear_data_offset(skb, -data->size, data->mac,
+ data->size);
+
ret = ip_fragment(sk, skb, br_nf_push_frag_xmit);
- } else
+ } else {
ret = br_dev_queue_push_xmit(sk, skb);
+ }
return ret;
}
@@ -856,7 +894,7 @@ static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+ struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
struct net_device *realoutdev = bridge_parent(skb->dev);
u_int8_t pf;
@@ -882,11 +920,10 @@ static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops,
* about the value of skb->pkt_type. */
if (skb->pkt_type == PACKET_OTHERHOST) {
skb->pkt_type = PACKET_HOST;
- nf_bridge->mask |= BRNF_PKT_TYPE;
+ nf_bridge->pkt_otherhost = true;
}
nf_bridge_pull_encap_header(skb);
- nf_bridge_save_header(skb);
if (pf == NFPROTO_IPV4)
skb->protocol = htons(ETH_P_IP);
else
@@ -925,13 +962,16 @@ static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops,
*/
static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
{
- struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+ struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
skb_pull(skb, ETH_HLEN);
nf_bridge->mask &= ~BRNF_BRIDGED_DNAT;
- skb_copy_to_linear_data_offset(skb, -(ETH_HLEN-ETH_ALEN),
- skb->nf_bridge->data, ETH_HLEN-ETH_ALEN);
+ BUILD_BUG_ON(sizeof(nf_bridge->neigh_header) != (ETH_HLEN - ETH_ALEN));
+
+ skb_copy_to_linear_data_offset(skb, -(ETH_HLEN - ETH_ALEN),
+ nf_bridge->neigh_header,
+ ETH_HLEN - ETH_ALEN);
skb->dev = nf_bridge->physindev;
br_handle_frame_finish(NULL, skb);
}
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index c5b794da51a9..3262e41ff76f 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -13,6 +13,7 @@
#include <net/dst.h>
#include <net/netfilter/ipv4/nf_reject.h>
#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_bridge.h>
#include <net/netfilter/ipv4/nf_reject.h>
const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb,
@@ -146,7 +147,8 @@ void nf_send_reset(struct sk_buff *oldskb, int hook)
*/
if (oldskb->nf_bridge) {
struct ethhdr *oeth = eth_hdr(oldskb);
- nskb->dev = oldskb->nf_bridge->physindev;
+
+ nskb->dev = nf_bridge_get_physindev(oldskb);
niph->tot_len = htons(nskb->len);
ip_send_check(niph);
if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index 3afdce03d94e..94b4c6dfb400 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -13,6 +13,7 @@
#include <net/ip6_checksum.h>
#include <net/netfilter/ipv6/nf_reject.h>
#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter_bridge.h>
#include <net/netfilter/ipv6/nf_reject.h>
const struct tcphdr *nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb,
@@ -195,7 +196,8 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook)
*/
if (oldskb->nf_bridge) {
struct ethhdr *oeth = eth_hdr(oldskb);
- nskb->dev = oldskb->nf_bridge->physindev;
+
+ nskb->dev = nf_bridge_get_physindev(oldskb);
nskb->protocol = htons(ETH_P_IPV6);
ip6h->payload_len = htons(sizeof(struct tcphdr));
if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 89f73a9e9874..a87d8b8ec730 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -70,7 +70,7 @@ obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o
# nf_tables
nf_tables-objs += nf_tables_core.o nf_tables_api.o
-nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o
+nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o nft_dynset.o
nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o
obj-$(CONFIG_NF_TABLES) += nf_tables.o
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 758b002130d9..380ef5148ea1 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -19,6 +19,7 @@
#include <net/netlink.h>
#include <linux/netfilter.h>
+#include <linux/netfilter_bridge.h>
#include <linux/netfilter/ipset/pfxlen.h>
#include <linux/netfilter/ipset/ip_set.h>
#include <linux/netfilter/ipset/ip_set_hash.h>
@@ -211,6 +212,22 @@ hash_netiface4_data_next(struct hash_netiface4_elem *next,
#define HKEY_DATALEN sizeof(struct hash_netiface4_elem_hashed)
#include "ip_set_hash_gen.h"
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+static const char *get_physindev_name(const struct sk_buff *skb)
+{
+ struct net_device *dev = nf_bridge_get_physindev(skb);
+
+ return dev ? dev->name : NULL;
+}
+
+static const char *get_phyoutdev_name(const struct sk_buff *skb)
+{
+ struct net_device *dev = nf_bridge_get_physoutdev(skb);
+
+ return dev ? dev->name : NULL;
+}
+#endif
+
static int
hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
@@ -234,16 +251,15 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
e.ip &= ip_set_netmask(e.cidr);
#define IFACE(dir) (par->dir ? par->dir->name : NULL)
-#define PHYSDEV(dir) (nf_bridge->dir ? nf_bridge->dir->name : NULL)
#define SRCDIR (opt->flags & IPSET_DIM_TWO_SRC)
if (opt->cmdflags & IPSET_FLAG_PHYSDEV) {
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- const struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+ e.iface = SRCDIR ? get_physindev_name(skb) :
+ get_phyoutdev_name(skb);
- if (!nf_bridge)
+ if (!e.iface)
return -EINVAL;
- e.iface = SRCDIR ? PHYSDEV(physindev) : PHYSDEV(physoutdev);
e.physdev = 1;
#else
e.iface = NULL;
@@ -476,11 +492,11 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,
if (opt->cmdflags & IPSET_FLAG_PHYSDEV) {
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- const struct nf_bridge_info *nf_bridge = skb->nf_bridge;
-
- if (!nf_bridge)
+ e.iface = SRCDIR ? get_physindev_name(skb) :
+ get_phyoutdev_name(skb);
+ if (!e.iface)
return -EINVAL;
- e.iface = SRCDIR ? PHYSDEV(physindev) : PHYSDEV(physoutdev);
+
e.physdev = 1;
#else
e.iface = NULL;
diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c
index 2631876ac55b..a5aa5967b8e1 100644
--- a/net/netfilter/nf_log_common.c
+++ b/net/netfilter/nf_log_common.c
@@ -17,6 +17,7 @@
#include <net/route.h>
#include <linux/netfilter.h>
+#include <linux/netfilter_bridge.h>
#include <linux/netfilter/xt_LOG.h>
#include <net/netfilter/nf_log.h>
@@ -163,10 +164,10 @@ nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf,
const struct net_device *physindev;
const struct net_device *physoutdev;
- physindev = skb->nf_bridge->physindev;
+ physindev = nf_bridge_get_physindev(skb);
if (physindev && in != physindev)
nf_log_buf_add(m, "PHYSIN=%s ", physindev->name);
- physoutdev = skb->nf_bridge->physoutdev;
+ physoutdev = nf_bridge_get_physoutdev(skb);
if (physoutdev && out != physoutdev)
nf_log_buf_add(m, "PHYSOUT=%s ", physoutdev->name);
}
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 3f3ac57b2998..2e88032cd5ad 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -10,6 +10,7 @@
#include <linux/proc_fs.h>
#include <linux/skbuff.h>
#include <linux/netfilter.h>
+#include <linux/netfilter_bridge.h>
#include <linux/seq_file.h>
#include <linux/rcupdate.h>
#include <net/protocol.h>
@@ -58,12 +59,14 @@ void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
sock_put(state->sk);
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (entry->skb->nf_bridge) {
- struct nf_bridge_info *nf_bridge = entry->skb->nf_bridge;
+ struct net_device *physdev;
- if (nf_bridge->physindev)
- dev_put(nf_bridge->physindev);
- if (nf_bridge->physoutdev)
- dev_put(nf_bridge->physoutdev);
+ physdev = nf_bridge_get_physindev(entry->skb);
+ if (physdev)
+ dev_put(physdev);
+ physdev = nf_bridge_get_physoutdev(entry->skb);
+ if (physdev)
+ dev_put(physdev);
}
#endif
/* Drop reference to owner of hook which queued us. */
@@ -87,13 +90,12 @@ bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
sock_hold(state->sk);
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (entry->skb->nf_bridge) {
- struct nf_bridge_info *nf_bridge = entry->skb->nf_bridge;
struct net_device *physdev;
- physdev = nf_bridge->physindev;
+ physdev = nf_bridge_get_physindev(entry->skb);
if (physdev)
dev_hold(physdev);
- physdev = nf_bridge->physoutdev;
+ physdev = nf_bridge_get_physoutdev(entry->skb);
if (physdev)
dev_hold(physdev);
}
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 5604c2df05d1..0b96fa0d64b2 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2159,7 +2159,7 @@ nft_select_set_ops(const struct nlattr * const nla[],
features = 0;
if (nla[NFTA_SET_FLAGS] != NULL) {
features = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS]));
- features &= NFT_SET_INTERVAL | NFT_SET_MAP;
+ features &= NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_TIMEOUT;
}
bops = NULL;
@@ -2216,6 +2216,8 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = {
[NFTA_SET_POLICY] = { .type = NLA_U32 },
[NFTA_SET_DESC] = { .type = NLA_NESTED },
[NFTA_SET_ID] = { .type = NLA_U32 },
+ [NFTA_SET_TIMEOUT] = { .type = NLA_U64 },
+ [NFTA_SET_GC_INTERVAL] = { .type = NLA_U32 },
};
static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = {
@@ -2366,6 +2368,13 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
goto nla_put_failure;
}
+ if (set->timeout &&
+ nla_put_be64(skb, NFTA_SET_TIMEOUT, cpu_to_be64(set->timeout)))
+ goto nla_put_failure;
+ if (set->gc_int &&
+ nla_put_be32(skb, NFTA_SET_GC_INTERVAL, htonl(set->gc_int)))
+ goto nla_put_failure;
+
if (set->policy != NFT_SET_POL_PERFORMANCE) {
if (nla_put_be32(skb, NFTA_SET_POLICY, htonl(set->policy)))
goto nla_put_failure;
@@ -2578,7 +2587,8 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
char name[IFNAMSIZ];
unsigned int size;
bool create;
- u32 ktype, dtype, flags, policy;
+ u64 timeout;
+ u32 ktype, dtype, flags, policy, gc_int;
struct nft_set_desc desc;
int err;
@@ -2605,7 +2615,8 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
if (nla[NFTA_SET_FLAGS] != NULL) {
flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS]));
if (flags & ~(NFT_SET_ANONYMOUS | NFT_SET_CONSTANT |
- NFT_SET_INTERVAL | NFT_SET_MAP))
+ NFT_SET_INTERVAL | NFT_SET_MAP |
+ NFT_SET_TIMEOUT))
return -EINVAL;
}
@@ -2631,6 +2642,19 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
} else if (flags & NFT_SET_MAP)
return -EINVAL;
+ timeout = 0;
+ if (nla[NFTA_SET_TIMEOUT] != NULL) {
+ if (!(flags & NFT_SET_TIMEOUT))
+ return -EINVAL;
+ timeout = be64_to_cpu(nla_get_be64(nla[NFTA_SET_TIMEOUT]));
+ }
+ gc_int = 0;
+ if (nla[NFTA_SET_GC_INTERVAL] != NULL) {
+ if (!(flags & NFT_SET_TIMEOUT))
+ return -EINVAL;
+ gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL]));
+ }
+
policy = NFT_SET_POL_PERFORMANCE;
if (nla[NFTA_SET_POLICY] != NULL)
policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY]));
@@ -2699,6 +2723,8 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
set->flags = flags;
set->size = desc.size;
set->policy = policy;
+ set->timeout = timeout;
+ set->gc_int = gc_int;
err = ops->init(set, &desc, nla);
if (err < 0)
@@ -2785,12 +2811,13 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
if (!list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS)
return -EBUSY;
- if (set->flags & NFT_SET_MAP) {
+ if (binding->flags & NFT_SET_MAP) {
/* If the set is already bound to the same chain all
* jumps are already validated for that chain.
*/
list_for_each_entry(i, &set->bindings, list) {
- if (i->chain == binding->chain)
+ if (binding->flags & NFT_SET_MAP &&
+ i->chain == binding->chain)
goto bind;
}
@@ -2837,6 +2864,18 @@ const struct nft_set_ext_type nft_set_ext_types[] = {
.len = sizeof(u8),
.align = __alignof__(u8),
},
+ [NFT_SET_EXT_TIMEOUT] = {
+ .len = sizeof(u64),
+ .align = __alignof__(u64),
+ },
+ [NFT_SET_EXT_EXPIRATION] = {
+ .len = sizeof(unsigned long),
+ .align = __alignof__(unsigned long),
+ },
+ [NFT_SET_EXT_USERDATA] = {
+ .len = sizeof(struct nft_userdata),
+ .align = __alignof__(struct nft_userdata),
+ },
};
EXPORT_SYMBOL_GPL(nft_set_ext_types);
@@ -2848,6 +2887,9 @@ static const struct nla_policy nft_set_elem_policy[NFTA_SET_ELEM_MAX + 1] = {
[NFTA_SET_ELEM_KEY] = { .type = NLA_NESTED },
[NFTA_SET_ELEM_DATA] = { .type = NLA_NESTED },
[NFTA_SET_ELEM_FLAGS] = { .type = NLA_U32 },
+ [NFTA_SET_ELEM_TIMEOUT] = { .type = NLA_U64 },
+ [NFTA_SET_ELEM_USERDATA] = { .type = NLA_BINARY,
+ .len = NFT_USERDATA_MAXLEN },
};
static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + 1] = {
@@ -2909,6 +2951,34 @@ static int nf_tables_fill_setelem(struct sk_buff *skb,
htonl(*nft_set_ext_flags(ext))))
goto nla_put_failure;
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) &&
+ nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT,
+ cpu_to_be64(*nft_set_ext_timeout(ext))))
+ goto nla_put_failure;
+
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
+ unsigned long expires, now = jiffies;
+
+ expires = *nft_set_ext_expiration(ext);
+ if (time_before(now, expires))
+ expires -= now;
+ else
+ expires = 0;
+
+ if (nla_put_be64(skb, NFTA_SET_ELEM_EXPIRATION,
+ cpu_to_be64(jiffies_to_msecs(expires))))
+ goto nla_put_failure;
+ }
+
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_USERDATA)) {
+ struct nft_userdata *udata;
+
+ udata = nft_set_ext_userdata(ext);
+ if (nla_put(skb, NFTA_SET_ELEM_USERDATA,
+ udata->len + 1, udata->data))
+ goto nla_put_failure;
+ }
+
nla_nest_end(skb, nest);
return 0;
@@ -3128,11 +3198,11 @@ static struct nft_trans *nft_trans_elem_alloc(struct nft_ctx *ctx,
return trans;
}
-static void *nft_set_elem_init(const struct nft_set *set,
- const struct nft_set_ext_tmpl *tmpl,
- const struct nft_data *key,
- const struct nft_data *data,
- gfp_t gfp)
+void *nft_set_elem_init(const struct nft_set *set,
+ const struct nft_set_ext_tmpl *tmpl,
+ const struct nft_data *key,
+ const struct nft_data *data,
+ u64 timeout, gfp_t gfp)
{
struct nft_set_ext *ext;
void *elem;
@@ -3147,6 +3217,11 @@ static void *nft_set_elem_init(const struct nft_set *set,
memcpy(nft_set_ext_key(ext), key, set->klen);
if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
memcpy(nft_set_ext_data(ext), data, set->dlen);
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION))
+ *nft_set_ext_expiration(ext) =
+ jiffies + msecs_to_jiffies(timeout);
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT))
+ *nft_set_ext_timeout(ext) = timeout;
return elem;
}
@@ -3172,15 +3247,15 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_ext *ext;
struct nft_set_elem elem;
struct nft_set_binding *binding;
+ struct nft_userdata *udata;
struct nft_data data;
enum nft_registers dreg;
struct nft_trans *trans;
+ u64 timeout;
u32 flags;
+ u8 ulen;
int err;
- if (set->size && set->nelems == set->size)
- return -ENFILE;
-
err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
nft_set_elem_policy);
if (err < 0)
@@ -3215,6 +3290,15 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
return -EINVAL;
}
+ timeout = 0;
+ if (nla[NFTA_SET_ELEM_TIMEOUT] != NULL) {
+ if (!(set->flags & NFT_SET_TIMEOUT))
+ return -EINVAL;
+ timeout = be64_to_cpu(nla_get_be64(nla[NFTA_SET_ELEM_TIMEOUT]));
+ } else if (set->flags & NFT_SET_TIMEOUT) {
+ timeout = set->timeout;
+ }
+
err = nft_data_init(ctx, &elem.key, &d1, nla[NFTA_SET_ELEM_KEY]);
if (err < 0)
goto err1;
@@ -3223,6 +3307,11 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
goto err2;
nft_set_ext_add(&tmpl, NFT_SET_EXT_KEY);
+ if (timeout > 0) {
+ nft_set_ext_add(&tmpl, NFT_SET_EXT_EXPIRATION);
+ if (timeout != set->timeout)
+ nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT);
+ }
if (nla[NFTA_SET_ELEM_DATA] != NULL) {
err = nft_data_init(ctx, &data, &d2, nla[NFTA_SET_ELEM_DATA]);
@@ -3241,6 +3330,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
.chain = (struct nft_chain *)binding->chain,
};
+ if (!(binding->flags & NFT_SET_MAP))
+ continue;
+
err = nft_validate_data_load(&bind_ctx, dreg,
&data, d2.type);
if (err < 0)
@@ -3250,20 +3342,38 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
nft_set_ext_add(&tmpl, NFT_SET_EXT_DATA);
}
+ /* The full maximum length of userdata can exceed the maximum
+ * offset value (U8_MAX) for following extensions, therefor it
+ * must be the last extension added.
+ */
+ ulen = 0;
+ if (nla[NFTA_SET_ELEM_USERDATA] != NULL) {
+ ulen = nla_len(nla[NFTA_SET_ELEM_USERDATA]);
+ if (ulen > 0)
+ nft_set_ext_add_length(&tmpl, NFT_SET_EXT_USERDATA,
+ ulen);
+ }
+
err = -ENOMEM;
- elem.priv = nft_set_elem_init(set, &tmpl, &elem.key, &data, GFP_KERNEL);
+ elem.priv = nft_set_elem_init(set, &tmpl, &elem.key, &data,
+ timeout, GFP_KERNEL);
if (elem.priv == NULL)
goto err3;
ext = nft_set_elem_ext(set, elem.priv);
if (flags)
*nft_set_ext_flags(ext) = flags;
+ if (ulen > 0) {
+ udata = nft_set_ext_userdata(ext);
+ udata->len = ulen - 1;
+ nla_memcpy(&udata->data, nla[NFTA_SET_ELEM_USERDATA], ulen);
+ }
trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set);
if (trans == NULL)
goto err4;
- ext->genmask = nft_genmask_cur(ctx->net);
+ ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK;
err = set->ops->insert(set, &elem);
if (err < 0)
goto err5;
@@ -3316,11 +3426,15 @@ static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb,
return -EBUSY;
nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
+ if (set->size &&
+ !atomic_add_unless(&set->nelems, 1, set->size + set->ndeact))
+ return -ENFILE;
+
err = nft_add_set_elem(&ctx, set, attr);
- if (err < 0)
+ if (err < 0) {
+ atomic_dec(&set->nelems);
break;
-
- set->nelems++;
+ }
}
return err;
}
@@ -3402,11 +3516,36 @@ static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb,
if (err < 0)
break;
- set->nelems--;
+ set->ndeact++;
}
return err;
}
+void nft_set_gc_batch_release(struct rcu_head *rcu)
+{
+ struct nft_set_gc_batch *gcb;
+ unsigned int i;
+
+ gcb = container_of(rcu, struct nft_set_gc_batch, head.rcu);
+ for (i = 0; i < gcb->head.cnt; i++)
+ nft_set_elem_destroy(gcb->head.set, gcb->elems[i]);
+ kfree(gcb);
+}
+EXPORT_SYMBOL_GPL(nft_set_gc_batch_release);
+
+struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set,
+ gfp_t gfp)
+{
+ struct nft_set_gc_batch *gcb;
+
+ gcb = kzalloc(sizeof(*gcb), gfp);
+ if (gcb == NULL)
+ return gcb;
+ gcb->