Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nftables

Pablo Neira Ayuso says: <pablo@netfilter.org> ==================== nftables updates for net-next The following patchset contains nftables updates for your net-next tree, they are: * Add set operation to the meta expression by means of the select_ops() infrastructure, this allows us to set the packet mark among other things. From Arturo Borrero Gonzalez. * Fix wrong format in sscanf in nf_tables_set_alloc_name(), from Daniel Borkmann. * Add new queue expression to nf_tables. These comes with two previous patches to prepare this new feature, one to add mask in nf_tables_core to evaluate the queue verdict appropriately and another to refactor common code with xt_NFQUEUE, from Eric Leblond. * Do not hide nftables from Kconfig if nfnetlink is not enabled, also from Eric Leblond. * Add the reject expression to nf_tables, this adds the missing TCP RST support. It comes with an initial patch to refactor common code with xt_NFQUEUE, again from Eric Leblond. * Remove an unused variable assignment in nf_tables_dump_set(), from Michal Nazarewicz. * Remove the nft_meta_target code, now that Arturo added the set operation to the meta expression, from me. * Add help information for nf_tables to Kconfig, also from me. * Allow to dump all sets by specifying NFPROTO_UNSPEC, similar feature is available to other nf_tables objects, requested by Arturo, from me. * Expose the table usage counter, so we can know how many chains are using this table without dumping the list of chains, from Tomasz Bursztyka. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
author: David S. Miller <davem@davemloft.net> 2014-01-06 13:29:30 -0500
committer: David S. Miller <davem@davemloft.net> 2014-01-06 13:29:30 -0500
commit: 9aa28f2b71055d5ae17a2e1daee359d4174bb13e (patch)
tree: fbf4e0fd11eb924e0bece74a87f442bc54441b35 /net
parent: 6a8c4796df74045088a916581c736432d08c53c0 (diff)
parent: c9c8e485978a308c8a359140da187d55120f8fee (diff)
14 files changed, 487 insertions, 534 deletions
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 40d56073cd19..81c6910cfa92 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -39,23 +39,33 @@ config NF_CONNTRACK_PROC_COMPAT
 config NF_TABLES_IPV4
 	depends on NF_TABLES
 	tristate "IPv4 nf_tables support"
-
-config NFT_REJECT_IPV4
-	depends on NF_TABLES_IPV4
-	tristate "nf_tables IPv4 reject support"
+	help
+	  This option enables the IPv4 support for nf_tables.
 
 config NFT_CHAIN_ROUTE_IPV4
 	depends on NF_TABLES_IPV4
 	tristate "IPv4 nf_tables route chain support"
+	help
+	  This option enables the "route" chain for IPv4 in nf_tables. This
+	  chain type is used to force packet re-routing after mangling header
+	  fields such as the source, destination, type of service and
+	  the packet mark.
 
 config NFT_CHAIN_NAT_IPV4
 	depends on NF_TABLES_IPV4
 	depends on NF_NAT_IPV4 && NFT_NAT
 	tristate "IPv4 nf_tables nat chain support"
+	help
+	  This option enables the "nat" chain for IPv4 in nf_tables. This
+	  chain type is used to perform Network Address Translation (NAT)
+	  packet transformations such as the source, destination address and
+	  source and destination ports.
 
 config NF_TABLES_ARP
 	depends on NF_TABLES
 	tristate "ARP nf_tables support"
+	help
+	  This option enables the ARP support for nf_tables.
 
 config IP_NF_IPTABLES
 	tristate "IP tables support (required for filtering/masq/NAT)"
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 19df72b7ba88..c16be9d58420 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -28,7 +28,6 @@ obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o
 obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o
 
 obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o
-obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o
 obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o
 obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o
 obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index b969131ad1c1..5b6e0df4ccff 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -17,10 +17,6 @@
 #include <linux/udp.h>
 #include <linux/icmp.h>
 #include <net/icmp.h>
-#include <net/ip.h>
-#include <net/tcp.h>
-#include <net/route.h>
-#include <net/dst.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv4/ipt_REJECT.h>
@@ -28,128 +24,12 @@
 #include <linux/netfilter_bridge.h>
 #endif
 
+#include <net/netfilter/ipv4/nf_reject.h>
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv4");
 
-/* Send RST reply */
-static void send_reset(struct sk_buff *oldskb, int hook)
-{
-	struct sk_buff *nskb;
-	const struct iphdr *oiph;
-	struct iphdr *niph;
-	const struct tcphdr *oth;
-	struct tcphdr _otcph, *tcph;
-
-	/* IP header checks: fragment. */
-	if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET))
-		return;
-
-	oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb),
-				 sizeof(_otcph), &_otcph);
-	if (oth == NULL)
-		return;
-
-	/* No RST for RST. */
-	if (oth->rst)
-		return;
-
-	if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
-		return;
-
-	/* Check checksum */
-	if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP))
-		return;
-	oiph = ip_hdr(oldskb);
-
-	nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) +
-			 LL_MAX_HEADER, GFP_ATOMIC);
-	if (!nskb)
-		return;
-
-	skb_reserve(nskb, LL_MAX_HEADER);
-
-	skb_reset_network_header(nskb);
-	niph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr));
-	niph->version	= 4;
-	niph->ihl	= sizeof(struct iphdr) / 4;
-	niph->tos	= 0;
-	niph->id	= 0;
-	niph->frag_off	= htons(IP_DF);
-	niph->protocol	= IPPROTO_TCP;
-	niph->check	= 0;
-	niph->saddr	= oiph->daddr;
-	niph->daddr	= oiph->saddr;
-
-	skb_reset_transport_header(nskb);
-	tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
-	memset(tcph, 0, sizeof(*tcph));
-	tcph->source	= oth->dest;
-	tcph->dest	= oth->source;
-	tcph->doff	= sizeof(struct tcphdr) / 4;
-
-	if (oth->ack)
-		tcph->seq = oth->ack_seq;
-	else {
-		tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin +
-				      oldskb->len - ip_hdrlen(oldskb) -
-				      (oth->doff << 2));
-		tcph->ack = 1;
-	}
-
-	tcph->rst	= 1;
-	tcph->check = ~tcp_v4_check(sizeof(struct tcphdr), niph->saddr,
-				    niph->daddr, 0);
-	nskb->ip_summed = CHECKSUM_PARTIAL;
-	nskb->csum_start = (unsigned char *)tcph - nskb->head;
-	nskb->csum_offset = offsetof(struct tcphdr, check);
-
-	/* ip_route_me_harder expects skb->dst to be set */
-	skb_dst_set_noref(nskb, skb_dst(oldskb));
-
-	nskb->protocol = htons(ETH_P_IP);
-	if (ip_route_me_harder(nskb, RTN_UNSPEC))
-		goto free_nskb;
-
-	niph->ttl	= ip4_dst_hoplimit(skb_dst(nskb));
-
-	/* "Never happens" */
-	if (nskb->len > dst_mtu(skb_dst(nskb)))
-		goto free_nskb;
-
-	nf_ct_attach(nskb, oldskb);
-
-#ifdef CONFIG_BRIDGE_NETFILTER
-	/* If we use ip_local_out for bridged traffic, the MAC source on
-	 * the RST will be ours, instead of the destination's.  This confuses
-	 * some routers/firewalls, and they drop the packet.  So we need to
-	 * build the eth header using the original destination's MAC as the
-	 * source, and send the RST packet directly.
-	 */
-	if (oldskb->nf_bridge) {
-		struct ethhdr *oeth = eth_hdr(oldskb);
-		nskb->dev = oldskb->nf_bridge->physindev;
-		niph->tot_len = htons(nskb->len);
-		ip_send_check(niph);
-		if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
-				    oeth->h_source, oeth->h_dest, nskb->len) < 0)
-			goto free_nskb;
-		dev_queue_xmit(nskb);
-	} else
-#endif
-		ip_local_out(nskb);
-
-	return;
-
- free_nskb:
-	kfree_skb(nskb);
-}
-
-static inline void send_unreach(struct sk_buff *skb_in, int code)
-{
-	icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
-}
-
 static unsigned int
 reject_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
@@ -157,28 +37,28 @@ reject_tg(struct sk_buff *skb, const struct xt_action_param *par)
 
 	switch (reject->with) {
 	case IPT_ICMP_NET_UNREACHABLE:
-		send_unreach(skb, ICMP_NET_UNREACH);
+		nf_send_unreach(skb, ICMP_NET_UNREACH);
 		break;
 	case IPT_ICMP_HOST_UNREACHABLE:
-		send_unreach(skb, ICMP_HOST_UNREACH);
+		nf_send_unreach(skb, ICMP_HOST_UNREACH);
 		break;
 	case IPT_ICMP_PROT_UNREACHABLE:
-		send_unreach(skb, ICMP_PROT_UNREACH);
+		nf_send_unreach(skb, ICMP_PROT_UNREACH);
 		break;
 	case IPT_ICMP_PORT_UNREACHABLE:
-		send_unreach(skb, ICMP_PORT_UNREACH);
+		nf_send_unreach(skb, ICMP_PORT_UNREACH);
 		break;
 	case IPT_ICMP_NET_PROHIBITED:
-		send_unreach(skb, ICMP_NET_ANO);
+		nf_send_unreach(skb, ICMP_NET_ANO);
 		break;
 	case IPT_ICMP_HOST_PROHIBITED:
-		send_unreach(skb, ICMP_HOST_ANO);
+		nf_send_unreach(skb, ICMP_HOST_ANO);
 		break;
 	case IPT_ICMP_ADMIN_PROHIBITED:
-		send_unreach(skb, ICMP_PKT_FILTERED);
+		nf_send_unreach(skb, ICMP_PKT_FILTERED);
 		break;
 	case IPT_TCP_RESET:
-		send_reset(skb, par->hooknum);
+		nf_send_reset(skb, par->hooknum);
 	case IPT_ICMP_ECHOREPLY:
 		/* Doesn't happen. */
 		break;
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 7702f9e90a04..35750df744dc 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -28,15 +28,27 @@ config NF_CONNTRACK_IPV6
 config NF_TABLES_IPV6
 	depends on NF_TABLES
 	tristate "IPv6 nf_tables support"
+	help
+	  This option enables the IPv6 support for nf_tables.
 
 config NFT_CHAIN_ROUTE_IPV6
 	depends on NF_TABLES_IPV6
 	tristate "IPv6 nf_tables route chain support"
+	help
+	  This option enables the "route" chain for IPv6 in nf_tables. This
+	  chain type is used to force packet re-routing after mangling header
+	  fields such as the source, destination, flowlabel, hop-limit and
+	  the packet mark.
 
 config NFT_CHAIN_NAT_IPV6
 	depends on NF_TABLES_IPV6
 	depends on NF_NAT_IPV6 && NFT_NAT
 	tristate "IPv6 nf_tables nat chain support"
+	help
+	  This option enables the "nat" chain for IPv6 in nf_tables. This
+	  chain type is used to perform Network Address Translation (NAT)
+	  packet transformations such as the source, destination address and
+	  source and destination ports.
 
 config IP6_NF_IPTABLES
 	tristate "IP6 tables support (required for filtering)"
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index da00a2ecde55..544b0a9da1b5 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -23,181 +23,18 @@
 #include <linux/skbuff.h>
 #include <linux/icmpv6.h>
 #include <linux/netdevice.h>
-#include <net/ipv6.h>
-#include <net/tcp.h>
 #include <net/icmp.h>
-#include <net/ip6_checksum.h>
-#include <net/ip6_fib.h>
-#include <net/ip6_route.h>
 #include <net/flow.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
 #include <linux/netfilter_ipv6/ip6t_REJECT.h>
 
+#include <net/netfilter/ipv6/nf_reject.h>
+
 MODULE_AUTHOR("Yasuyuki KOZAKAI <yasuyuki.kozakai@toshiba.co.jp>");
 MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv6");
 MODULE_LICENSE("GPL");
 
-/* Send RST reply */
-static void send_reset(struct net *net, struct sk_buff *oldskb, int hook)
-{
-	struct sk_buff *nskb;
-	struct tcphdr otcph, *tcph;
-	unsigned int otcplen, hh_len;
-	int tcphoff, needs_ack;
-	const struct ipv6hdr *oip6h = ipv6_hdr(oldskb);
-	struct ipv6hdr *ip6h;
-#define DEFAULT_TOS_VALUE	0x0U
-	const __u8 tclass = DEFAULT_TOS_VALUE;
-	struct dst_entry *dst = NULL;
-	u8 proto;
-	__be16 frag_off;
-	struct flowi6 fl6;
-
-	if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) ||
-	    (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) {
-		pr_debug("addr is not unicast.\n");
-		return;
-	}
-
-	proto = oip6h->nexthdr;
-	tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto, &frag_off);
-
-	if ((tcphoff < 0) || (tcphoff > oldskb->len)) {
-		pr_debug("Cannot get TCP header.\n");
-		return;
-	}
-
-	otcplen = oldskb->len - tcphoff;
-
-	/* IP header checks: fragment, too short. */
-	if (proto != IPPROTO_TCP || otcplen < sizeof(struct tcphdr)) {
-		pr_debug("proto(%d) != IPPROTO_TCP, "
-			 "or too short. otcplen = %d\n",
-			 proto, otcplen);
-		return;
-	}
-
-	if (skb_copy_bits(oldskb, tcphoff, &otcph, sizeof(struct tcphdr)))
-		BUG();
-
-	/* No RST for RST. */
-	if (otcph.rst) {
-		pr_debug("RST is set\n");
-		return;
-	}
-
-	/* Check checksum. */
-	if (nf_ip6_checksum(oldskb, hook, tcphoff, IPPROTO_TCP)) {
-		pr_debug("TCP checksum is invalid\n");
-		return;
-	}
-
-	memset(&fl6, 0, sizeof(fl6));
-	fl6.flowi6_proto = IPPROTO_TCP;
-	fl6.saddr = oip6h->daddr;
-	fl6.daddr = oip6h->saddr;
-	fl6.fl6_sport = otcph.dest;
-	fl6.fl6_dport = otcph.source;
-	security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6));
-	dst = ip6_route_output(net, NULL, &fl6);
-	if (dst == NULL || dst->error) {
-		dst_release(dst);
-		return;
-	}
-	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
-	if (IS_ERR(dst))
-		return;
-
-	hh_len = (dst->dev->hard_header_len + 15)&~15;
-	nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr)
-			 + sizeof(struct tcphdr) + dst->trailer_len,
-			 GFP_ATOMIC);
-
-	if (!nskb) {
-		net_dbg_ratelimited("cannot alloc skb\n");
-		dst_release(dst);
-		return;
-	}
-
-	skb_dst_set(nskb, dst);
-
-	skb_reserve(nskb, hh_len + dst->header_len);
-
-	skb_put(nskb, sizeof(struct ipv6hdr));
-	skb_reset_network_header(nskb);
-	ip6h = ipv6_hdr(nskb);
-	ip6_flow_hdr(ip6h, tclass, 0);
-	ip6h->hop_limit = ip6_dst_hoplimit(dst);
-	ip6h->nexthdr = IPPROTO_TCP;
-	ip6h->saddr = oip6h->daddr;
-	ip6h->daddr = oip6h->saddr;
-
-	skb_reset_transport_header(nskb);
-	tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
-	/* Truncate to length (no data) */
-	tcph->doff = sizeof(struct tcphdr)/4;
-	tcph->source = otcph.dest;
-	tcph->dest = otcph.source;
-
-	if (otcph.ack) {
-		needs_ack = 0;
-		tcph->seq = otcph.ack_seq;
-		tcph->ack_seq = 0;
-	} else {
-		needs_ack = 1;
-		tcph->ack_seq = htonl(ntohl(otcph.seq) + otcph.syn + otcph.fin
-				      + otcplen - (otcph.doff<<2));
-		tcph->seq = 0;
-	}
-
-	/* Reset flags */
-	((u_int8_t *)tcph)[13] = 0;
-	tcph->rst = 1;
-	tcph->ack = needs_ack;
-	tcph->window = 0;
-	tcph->urg_ptr = 0;
-	tcph->check = 0;
-
-	/* Adjust TCP checksum */
-	tcph->check = csum_ipv6_magic(&ipv6_hdr(nskb)->saddr,
-				      &ipv6_hdr(nskb)->daddr,
-				      sizeof(struct tcphdr), IPPROTO_TCP,
-				      csum_partial(tcph,
-						   sizeof(struct tcphdr), 0));
-
-	nf_ct_attach(nskb, oldskb);
-
-#ifdef CONFIG_BRIDGE_NETFILTER
-	/* If we use ip6_local_out for bridged traffic, the MAC source on
-	 * the RST will be ours, instead of the destination's.  This confuses
-	 * some routers/firewalls, and they drop the packet.  So we need to
-	 * build the eth header using the original destination's MAC as the
-	 * source, and send the RST packet directly.
-	 */
-	if (oldskb->nf_bridge) {
-		struct ethhdr *oeth = eth_hdr(oldskb);
-		nskb->dev = oldskb->nf_bridge->physindev;
-		nskb->protocol = htons(ETH_P_IPV6);
-		ip6h->payload_len = htons(sizeof(struct tcphdr));
-		if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
-				    oeth->h_source, oeth->h_dest, nskb->len) < 0)
-			return;
-		dev_queue_xmit(nskb);
-	} else
-#endif
-		ip6_local_out(nskb);
-}
-
-static inline void
-send_unreach(struct net *net, struct sk_buff *skb_in, unsigned char code,
-	     unsigned int hooknum)
-{
-	if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL)
-		skb_in->dev = net->loopback_dev;
-
-	icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0);
-}
 
 static unsigned int
 reject_tg6(struct sk_buff *skb, const struct xt_action_param *par)
@@ -208,25 +45,25 @@ reject_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 	pr_debug("%s: medium point\n", __func__);
 	switch (reject->with) {
 	case IP6T_ICMP6_NO_ROUTE:
-		send_unreach(net, skb, ICMPV6_NOROUTE, par->hooknum);
+		nf_send_unreach6(net, skb, ICMPV6_NOROUTE, par->hooknum);
 		break;
 	case IP6T_ICMP6_ADM_PROHIBITED:
-		send_unreach(net, skb, ICMPV6_ADM_PROHIBITED, par->hooknum);
+		nf_send_unreach6(net, skb, ICMPV6_ADM_PROHIBITED, par->hooknum);
 		break;
 	case IP6T_ICMP6_NOT_NEIGHBOUR:
-		send_unreach(net, skb, ICMPV6_NOT_NEIGHBOUR, par->hooknum);
+		nf_send_unreach6(net, skb, ICMPV6_NOT_NEIGHBOUR, par->hooknum);
 		break;
 	case IP6T_ICMP6_ADDR_UNREACH:
-		send_unreach(net, skb, ICMPV6_ADDR_UNREACH, par->hooknum);
+		nf_send_unreach6(net, skb, ICMPV6_ADDR_UNREACH, par->hooknum);
 		break;
 	case IP6T_ICMP6_PORT_UNREACH:
-		send_unreach(net, skb, ICMPV6_PORT_UNREACH, par->hooknum);
+		nf_send_unreach6(net, skb, ICMPV6_PORT_UNREACH, par->hooknum);
 		break;
 	case IP6T_ICMP6_ECHOREPLY:
 		/* Do nothing */
 		break;
 	case IP6T_TCP_RESET:
-		send_reset(net, skb, par->hooknum);
+		nf_send_reset6(net, skb, par->hooknum);
 		break;
 	default:
 		net_info_ratelimited("case %u not handled yet\n", reject->with);
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index c17902cb5df9..c3b3b26c4c4e 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -414,47 +414,104 @@ config NETFILTER_SYNPROXY
 endif # NF_CONNTRACK
 
 config NF_TABLES
-	depends on NETFILTER_NETLINK
+	select NETFILTER_NETLINK
 	tristate "Netfilter nf_tables support"
+	help
+	  nftables is the new packet classification framework that intends to
+	  replace the existing {ip,ip6,arp,eb}_tables infrastructure. It
+	  provides a pseudo-state machine with an extensible instruction-set
+	  (also known as expressions) that the userspace 'nft' utility
+	  (http://www.netfilter.org/projects/nftables) uses to build the
+	  rule-set. It also comes with the generic set infrastructure that
+	  allows you to construct mappings between matchings and actions
+	  for performance lookups.
+
+	  To compile it as a module, choose M here.
 
 config NFT_EXTHDR
 	depends on NF_TABLES
 	tristate "Netfilter nf_tables IPv6 exthdr module"
+	help
+	  This option adds the "exthdr" expression that you can use to match
+	  IPv6 extension headers.
 
 config NFT_META
 	depends on NF_TABLES
 	tristate "Netfilter nf_tables meta module"
+	help
+	  This option adds the "meta" expression that you can use to match and
+	  to set packet metainformation such as the packet mark.
 
 config NFT_CT
 	depends on NF_TABLES
 	depends on NF_CONNTRACK
 	tristate "Netfilter nf_tables conntrack module"
+	help
+	  This option adds the "meta" expression that you can use to match
+	  connection tracking information such as the flow state.
 
 config NFT_RBTREE
 	depends on NF_TABLES
 	tristate "Netfilter nf_tables rbtree set module"
+	help
+	  This option adds the "rbtree" set type (Red Black tree) that is used
+	  to build interval-based sets.
 
 config NFT_HASH
 	depends on NF_TABLES
 	tristate "Netfilter nf_tables hash set module"
+	help
+	  This option adds the "hash" set type that is used to build one-way
+	  mappings between matchings and actions.
 
 config NFT_COUNTER
 	depends on NF_TABLES
 	tristate "Netfilter nf_tables counter module"
+	help
+	  This option adds the "counter" expression that you can use to
+	  include packet and byte counters in a rule.
 
 config NFT_LOG
 	depends on NF_TABLES
 	tristate "Netfilter nf_tables log module"
+	help
+	  This option adds the "log" expression that you can use to log
+	  packets matching some criteria.
 
 config NFT_LIMIT
 	depends on NF_TABLES
 	tristate "Netfilter nf_tables limit module"
+	help
+	  This option adds the "limit" expression that you can use to
+	  ratelimit rule matchings.
 
 config NFT_NAT
 	depends on NF_TABLES
 	depends on NF_CONNTRACK
 	depends on NF_NAT
 	tristate "Netfilter nf_tables nat module"
+	help
+	  This option adds the "nat" expression that you can use to perform
+	  typical Network Address Translation (NAT) packet transformations.
+
+config NFT_QUEUE
+	depends on NF_TABLES
+	depends on NETFILTER_XTABLES
+	depends on NETFILTER_NETLINK_QUEUE
+	tristate "Netfilter nf_tables queue module"
+	help
+	  This is required if you intend to use the userspace queueing
+	  infrastructure (also known as NFQUEUE) from nftables.
+
+config NFT_REJECT
+	depends on NF_TABLES
+	depends on NF_TABLES_IPV6 || !NF_TABLES_IPV6
+	default m if NETFILTER_ADVANCED=n
+	tristate "Netfilter nf_tables reject support"
+	help
+	  This option adds the "reject" expression that you can use to
+	  explicitly deny and notify via TCP reset/ICMP informational errors
+	  unallowed traffic.
 
 config NFT_COMPAT
 	depends on NF_TABLES
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 407fc232f625..78b4e1c9c595 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -76,7 +76,8 @@ obj-$(CONFIG_NFT_META)		+= nft_meta.o
 obj-$(CONFIG_NFT_CT)		+= nft_ct.o
 obj-$(CONFIG_NFT_LIMIT)		+= nft_limit.o
 obj-$(CONFIG_NFT_NAT)		+= nft_nat.o
-#nf_tables-objs			+= nft_meta_target.o
+obj-$(CONFIG_NFT_QUEUE)		+= nft_queue.o
+obj-$(CONFIG_NFT_REJECT) 	+= nft_reject.o
 obj-$(CONFIG_NFT_RBTREE)	+= nft_rbtree.o
 obj-$(CONFIG_NFT_HASH)		+= nft_hash.o
 obj-$(CONFIG_NFT_COUNTER)	+= nft_counter.o
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index f93b7d06f4be..629b6da98318 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -180,7 +180,8 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, u32 portid, u32 seq,
 	nfmsg->res_id		= 0;
 
 	if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) ||
-	    nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)))
+	    nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)) ||
+	    nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use)))
 		goto nla_put_failure;
 
 	return nlmsg_end(skb, nlh);
@@ -1923,12 +1924,14 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx,
 {
 	struct net *net = sock_net(skb->sk);
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
-	const struct nft_af_info *afi;
+	const struct nft_af_info *afi = NULL;
 	const struct nft_table *table = NULL;
 
-	afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false);
-	if (IS_ERR(afi))
-		return PTR_ERR(afi);
+	if (nfmsg->nfgen_family != NFPROTO_UNSPEC) {
+		afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false);
+		if (IS_ERR(afi))
+			return PTR_ERR(afi);
+	}
 
 	if (nla[NFTA_SET_TABLE] != NULL) {
 		table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]);
@@ -1973,11 +1976,14 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
 			return -ENOMEM;
 
 		list_for_each_entry(i, &ctx->table->sets, list) {
-			if (!sscanf(i->name, name, &n))
+			int tmp;
+
+			if (!sscanf(i->name, name, &tmp))
 				continue;
-			if (n < 0 || n > BITS_PER_LONG * PAGE_SIZE)
+			if (tmp < 0 || tmp > BITS_PER_LONG * PAGE_SIZE)
 				continue;
-			set_bit(n, inuse);
+
+			set_bit(tmp, inuse);
 		}
 
 		n = find_first_zero_bit(inuse, BITS_PER_LONG * PAGE_SIZE);
@@ -2094,8 +2100,8 @@ done:
 	return skb->len;
 }
 
-static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb,
-				   struct netlink_callback *cb)
+static int nf_tables_dump_sets_family(struct nft_ctx *ctx, struct sk_buff *skb,
+				      struct netlink_callback *cb)
 {
 	const struct nft_set *set;
 	unsigned int idx = 0, s_idx = cb->args[0];
@@ -2127,6 +2133,61 @@ done:
 	return skb->len;
 }
 
+static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb,
+				   struct netlink_callback *cb)
+{
+	const struct nft_set *set;
+	unsigned int idx, s_idx = cb->args[0];
+	const struct nft_af_info *afi;
+	struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2];
+	struct net *net = sock_net(skb->sk);
+	int cur_family = cb->args[3];
+
+	if (cb->args[1])
+		return skb->len;
+
+	list_for_each_entry(afi, &net->nft.af_info, list) {
+		if (cur_family) {
+			if (afi->family != cur_family)
+				continue;
+
+			cur_family = 0;
+		}
+
+		list_for_each_entry(table, &afi->tables, list) {
+			if (cur_table) {
+				if (cur_table != table)
+					continue;
+
+				cur_table = NULL;
+			}
+
+			ctx->table = table;
+			ctx->afi = afi;
+			idx = 0;
+			list_for_each_entry(set, &ctx->table->sets, list) {
+				if (idx < s_idx)
+					goto cont;
+				if (nf_tables_fill_set(skb, ctx, set,
+						       NFT_MSG_NEWSET,
+						       NLM_F_MULTI) < 0) {
+					cb->args[0] = idx;
+					cb->args[2] = (unsigned long) table;
+					cb->args[3] = afi->family;
+					goto done;
+				}
+cont:
+				idx++;
+			}
+			if (s_idx)
+				s_idx = 0;
+		}
+	}
+	cb->args[1] = 1;
+done:
+	return skb->len;
+}
+
 static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
@@ -2143,9 +2204,12 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
 	if (err < 0)
 		return err;
 
-	if (ctx.table == NULL)
-		ret = nf_tables_dump_sets_all(&ctx, skb, cb);
-	else
+	if (ctx.table == NULL) {
+		if (ctx.afi == NULL)
+			ret = nf_tables_dump_sets_all(&ctx, skb, cb);
+		else
+			ret = nf_tables_dump_sets_family(&ctx, skb, cb);
+	} else
 		ret = nf_tables_dump_sets_table(&ctx, skb, cb);
 
 	return ret;
@@ -2158,6 +2222,7 @@ static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb,
 	const struct nft_set *set;
 	struct nft_ctx ctx;
 	struct sk_buff *skb2;
+	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	int err;
 
 	/* Verify existance before starting dump */
@@ -2172,6 +2237,10 @@ static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb,
 		return netlink_dump_start(nlsk, skb, nlh, &c);
 	}
 
+	/* Only accept unspec with dump */
+	if (nfmsg->nfgen_family == NFPROTO_UNSPEC)
+		return -EAFNOSUPPORT;
+
 	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]);
 	if (IS_ERR(set))
 		return PTR_ERR(set);
@@ -2341,6 +2410,7 @@ static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb,
 			    const struct nlmsghdr *nlh,
 			    const struct nlattr * const nla[])
 {
+	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	struct nft_set *set;
 	struct nft_ctx ctx;
 	int err;
@@ -2352,6 +2422,9 @@ static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb,
 	if (err < 0)
 		return err;
 
+	if (nfmsg->nfgen_family == NFPROTO_UNSPEC)
+		return -EAFNOSUPPORT;
+
 	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]);
 	if (IS_ERR(set))
 		return PTR_ERR(set);
@@ -2521,9 +2594,8 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
 	u32 portid, seq;
 	int event, err;
 
-	nfmsg = nlmsg_data(cb->nlh);
-	err = nlmsg_parse(cb->nlh, sizeof(*nfmsg), nla, NFTA_SET_ELEM_LIST_MAX,
-			  nft_set_elem_list_policy);
+	err = nlmsg_parse(cb->nlh, sizeof(struct nfgenmsg), nla,
+			  NFTA_SET_ELEM_LIST_MAX, nft_set_elem_list_policy);
 	if (err < 0)
 		return err;
 
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index cb9e685caae1..e8fcc343c2b9 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -164,7 +164,7 @@ next_rule:
 		break;
 	}
 
-	switch (data[NFT_REG_VERDICT].verdict) {
+	switch (data[NFT_REG_VERDICT].verdict & NF_VERDICT_MASK) {
 	case NF_ACCEPT:
 	case NF_DROP:
 	case NF_QUEUE:
@@ -172,6 +172,9 @@ next_rule:
 			nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
 
 		return data[NFT_REG_VERDICT].verdict;
+	}
+
+	switch (data[NFT_REG_VERDICT].verdict) {
 	case NFT_JUMP:
 		if (unlikely(pkt->skb->nf_trace))
 			nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 8c28220a90b3..1ceaaa6dfe72 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -21,12 +21,15 @@
 
 struct nft_meta {
 	enum nft_meta_keys	key:8;
-	enum nft_registers	dreg:8;
+	union {
+		enum nft_registers	dreg:8;
+		enum nft_registers	sreg:8;
+	};
 };
 
-static void nft_meta_eval(const struct nft_expr *expr,
-			  struct nft_data data[NFT_REG_MAX + 1],
-			  const struct nft_pktinfo *pkt)
+static void nft_meta_get_eval(const struct nft_expr *expr,
+			      struct nft_data data[NFT_REG_MAX + 1],
+			      const struct nft_pktinfo *pkt)
 {
 	const struct nft_meta *priv = nft_expr_priv(expr);
 	const struct sk_buff *skb = pkt->skb;
@@ -132,23 +135,50 @@ err:
 	data[NFT_REG_VERDICT].verdict = NFT_BREAK;
 }
 
+static void nft_meta_set_eval(const struct nft_expr *expr,
+			      struct nft_data data[NFT_REG_MAX + 1],
+			      const struct nft_pktinfo *pkt)
+{
+	const struct nft_meta *meta = nft_expr_priv(expr);
+	struct sk_buff *skb = pkt->skb;
+	u32 value = data[meta->sreg].data[0];
+
+	switch (meta->key) {
+	case NFT_META_MARK:
+		skb->mark = value;
+		break;
+	case NFT_META_PRIORITY:
+		skb->priority = value;
+		break;
+	case NFT_META_NFTRACE:
+		skb->nf_trace = 1;
+		break;
+	default:
+		WARN_ON(1);
+	}
+}
+
 static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = {
 	[NFTA_META_DREG]	= { .type = NLA_U32 },
 	[NFTA_META_KEY]		= { .type = NLA_U32 },
+	[NFTA_META_SREG]	= { .type = NLA_U32 },
 };
 
-static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
-			 const struct nlattr * const tb[])
+static int nft_meta_init_validate_set(uint32_t key)
 {
-	struct nft_meta *priv = nft_expr_priv(expr);
-	int err;
-
-	if (tb[NFTA_META_DREG] == NULL ||
-	    tb[NFTA_META_KEY] == NULL)
-		return -EINVAL;
+	switch (key) {
+	case NFT_META_MARK:
+	case NFT_META_PRIORITY:
+	case NFT_META_NFTRACE:
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
 
-	priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
-	switch (priv->key) {
+static int nft_meta_init_validate_get(uint32_t key)
+{
+	switch (key) {
 	case NFT_META_LEN:
 	case NFT_META_PROTOCOL:
 	case NFT_META_PRIORITY:
@@ -167,26 +197,69 @@ static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
 #ifdef CONFIG_NETWORK_SECMARK
 	case NFT_META_SECMARK:
 #endif
-		break;
+		return 0;
 	default:
 		return -EOPNOTSUPP;
 	}
 
-	priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG]));
-	err = nft_validate_output_register(priv->dreg);
+}
+
+static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+			 const struct nlattr * const tb[])
+{
+	struct nft_meta *priv = nft_expr_priv(expr);
+	int err;
+
+	priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
+
+	if (tb[NFTA_META_DREG]) {
+		err = nft_meta_init_validate_get(priv->key);
+		if (err < 0)
+			return err;
+
+		priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG]));
+		err = nft_validate_output_register(priv->dreg);
+		if (err < 0)
+			return err;
+
+		return nft_validate_data_load(ctx, priv->dreg, NULL,
+					      NFT_DATA_VALUE);
+	}
+
+	err = nft_meta_init_validate_set(priv->key);
 	if (err < 0)
 		return err;
-	return nft_validate_data_load(ctx, priv->dr
author	David S. Miller <davem@davemloft.net>	2014-01-06 13:29:30 -0500
committer	David S. Miller <davem@davemloft.net>	2014-01-06 13:29:30 -0500
commit	9aa28f2b71055d5ae17a2e1daee359d4174bb13e (patch)
tree	fbf4e0fd11eb924e0bece74a87f442bc54441b35 /net
parent	6a8c4796df74045088a916581c736432d08c53c0 (diff)
parent	c9c8e485978a308c8a359140da187d55120f8fee (diff)