summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2015-06-15 14:30:32 -0700
committerDavid S. Miller <davem@davemloft.net>2015-06-15 14:30:32 -0700
commitada6c1de9ecabcfc5619479bcd29a208f2e248a0 (patch)
tree1bde0136c3d5f67942c8f2212dd960aa64d993e0
parent758f0d4b16e0508aa47a4a4d06eacba0d66e24d5 (diff)
parent835b803377f5f11f9ccf234f70ed667a82605c45 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
Pablo Neira Ayuso says: ==================== Netfilter updates for net-next This a bit large (and late) patchset that contains Netfilter updates for net-next. Most relevantly br_netfilter fixes, ipset RCU support, removal of x_tables percpu ruleset copy and rework of the nf_tables netdev support. More specifically, they are: 1) Warn the user when there is a better protocol conntracker available, from Marcelo Ricardo Leitner. 2) Fix forwarding of IPv6 fragmented traffic in br_netfilter, from Bernhard Thaler. This comes with several patches to prepare the change in first place. 3) Get rid of special mtu handling of PPPoE/VLAN frames for br_netfilter. This is not needed anymore since now we use the largest fragment size to refragment, from Florian Westphal. 4) Restore vlan tag when refragmenting in br_netfilter, also from Florian. 5) Get rid of the percpu ruleset copy in x_tables, from Florian. Plus another follow up patch to refine it from Eric Dumazet. 6) Several ipset cleanups, fixes and finally RCU support, from Jozsef Kadlecsik. 7) Get rid of parens in Netfilter Kconfig files. 8) Attach the net_device to the basechain as opposed to the initial per table approach in the nf_tables netdev family. 9) Subscribe to netdev events to detect the removal and registration of a device that is referenced by a basechain. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/netfilter/ipset/ip_set.h29
-rw-r--r--include/linux/netfilter/ipset/ip_set_comment.h38
-rw-r--r--include/linux/netfilter/ipset/ip_set_timeout.h27
-rw-r--r--include/linux/netfilter/x_tables.h56
-rw-r--r--include/linux/netfilter_bridge.h7
-rw-r--r--include/linux/netfilter_ipv6.h3
-rw-r--r--include/linux/skbuff.h7
-rw-r--r--include/net/netfilter/nf_tables.h11
-rw-r--r--include/uapi/linux/netfilter/ipset/ip_set.h6
-rw-r--r--include/uapi/linux/netfilter/nf_tables.h4
-rw-r--r--net/bridge/br_netfilter.c398
-rw-r--r--net/bridge/br_private.h7
-rw-r--r--net/ipv4/ip_output.c4
-rw-r--r--net/ipv4/netfilter/Kconfig3
-rw-r--r--net/ipv4/netfilter/arp_tables.c86
-rw-r--r--net/ipv4/netfilter/ip_tables.c95
-rw-r--r--net/ipv6/netfilter.c2
-rw-r--r--net/ipv6/netfilter/Kconfig3
-rw-r--r--net/ipv6/netfilter/ip6_tables.c96
-rw-r--r--net/netfilter/Kconfig18
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_gen.h44
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ip.c27
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ipmac.c46
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_port.c24
-rw-r--r--net/netfilter/ipset/ip_set_core.c344
-rw-r--r--net/netfilter/ipset/ip_set_getport.c13
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h714
-rw-r--r--net/netfilter/ipset/ip_set_hash_ip.c39
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipmark.c46
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipport.c51
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportip.c53
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c58
-rw-r--r--net/netfilter/ipset/ip_set_hash_mac.c19
-rw-r--r--net/netfilter/ipset/ip_set_hash_net.c49
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c225
-rw-r--r--net/netfilter/ipset/ip_set_hash_netnet.c120
-rw-r--r--net/netfilter/ipset/ip_set_hash_netport.c52
-rw-r--r--net/netfilter/ipset/ip_set_hash_netportnet.c128
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c419
-rw-r--r--net/netfilter/ipset/pfxlen.c16
-rw-r--r--net/netfilter/nf_conntrack_proto_generic.c8
-rw-r--r--net/netfilter/nf_tables_api.c139
-rw-r--r--net/netfilter/nf_tables_netdev.c75
-rw-r--r--net/netfilter/x_tables.c37
-rw-r--r--net/netfilter/xt_set.c44
45 files changed, 1972 insertions, 1718 deletions
diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index ffdfdc24952a..48bb01edcf30 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -108,8 +108,13 @@ struct ip_set_counter {
atomic64_t packets;
};
+struct ip_set_comment_rcu {
+ struct rcu_head rcu;
+ char str[0];
+};
+
struct ip_set_comment {
- char *str;
+ struct ip_set_comment_rcu __rcu *c;
};
struct ip_set_skbinfo {
@@ -176,6 +181,9 @@ struct ip_set_type_variant {
/* List elements */
int (*list)(const struct ip_set *set, struct sk_buff *skb,
struct netlink_callback *cb);
+ /* Keep listing private when resizing runs parallel */
+ void (*uref)(struct ip_set *set, struct netlink_callback *cb,
+ bool start);
/* Return true if "b" set is the same as "a"
* according to the create set parameters */
@@ -223,7 +231,7 @@ struct ip_set {
/* The name of the set */
char name[IPSET_MAXNAMELEN];
/* Lock protecting the set data */
- rwlock_t lock;
+ spinlock_t lock;
/* References to the set */
u32 ref;
/* The core set type */
@@ -341,12 +349,11 @@ ip_set_put_skbinfo(struct sk_buff *skb, struct ip_set_skbinfo *skbinfo)
cpu_to_be64((u64)skbinfo->skbmark << 32 |
skbinfo->skbmarkmask))) ||
(skbinfo->skbprio &&
- nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
+ nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
cpu_to_be32(skbinfo->skbprio))) ||
(skbinfo->skbqueue &&
- nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
+ nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
cpu_to_be16(skbinfo->skbqueue)));
-
}
static inline void
@@ -380,12 +387,12 @@ ip_set_init_counter(struct ip_set_counter *counter,
/* Netlink CB args */
enum {
- IPSET_CB_NET = 0,
- IPSET_CB_DUMP,
- IPSET_CB_INDEX,
- IPSET_CB_ARG0,
+ IPSET_CB_NET = 0, /* net namespace */
+ IPSET_CB_DUMP, /* dump single set/all sets */
+ IPSET_CB_INDEX, /* set index */
+ IPSET_CB_PRIVATE, /* set private data */
+ IPSET_CB_ARG0, /* type specific */
IPSET_CB_ARG1,
- IPSET_CB_ARG2,
};
/* register and unregister set references */
@@ -545,8 +552,6 @@ ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
{ .bytes = ULLONG_MAX, .packets = ULLONG_MAX, \
.timeout = (set)->timeout }
-#define IP_SET_INIT_CIDR(a, b) ((a) ? (a) : (b))
-
#define IPSET_CONCAT(a, b) a##b
#define IPSET_TOKEN(a, b) IPSET_CONCAT(a, b)
diff --git a/include/linux/netfilter/ipset/ip_set_comment.h b/include/linux/netfilter/ipset/ip_set_comment.h
index 21217ea008d7..8d0248525957 100644
--- a/include/linux/netfilter/ipset/ip_set_comment.h
+++ b/include/linux/netfilter/ipset/ip_set_comment.h
@@ -16,41 +16,57 @@ ip_set_comment_uget(struct nlattr *tb)
return nla_data(tb);
}
+/* Called from uadd only, protected by the set spinlock.
+ * The kadt functions don't use the comment extensions in any way.
+ */
static inline void
ip_set_init_comment(struct ip_set_comment *comment,
const struct ip_set_ext *ext)
{
+ struct ip_set_comment_rcu *c = rcu_dereference_protected(comment->c, 1);
size_t len = ext->comment ? strlen(ext->comment) : 0;
- if (unlikely(comment->str)) {
- kfree(comment->str);
- comment->str = NULL;
+ if (unlikely(c)) {
+ kfree_rcu(c, rcu);
+ rcu_assign_pointer(comment->c, NULL);
}
if (!len)
return;
if (unlikely(len > IPSET_MAX_COMMENT_SIZE))
len = IPSET_MAX_COMMENT_SIZE;
- comment->str = kzalloc(len + 1, GFP_ATOMIC);
- if (unlikely(!comment->str))
+ c = kzalloc(sizeof(*c) + len + 1, GFP_ATOMIC);
+ if (unlikely(!c))
return;
- strlcpy(comment->str, ext->comment, len + 1);
+ strlcpy(c->str, ext->comment, len + 1);
+ rcu_assign_pointer(comment->c, c);
}
+/* Used only when dumping a set, protected by rcu_read_lock_bh() */
static inline int
ip_set_put_comment(struct sk_buff *skb, struct ip_set_comment *comment)
{
- if (!comment->str)
+ struct ip_set_comment_rcu *c = rcu_dereference_bh(comment->c);
+
+ if (!c)
return 0;
- return nla_put_string(skb, IPSET_ATTR_COMMENT, comment->str);
+ return nla_put_string(skb, IPSET_ATTR_COMMENT, c->str);
}
+/* Called from uadd/udel, flush or the garbage collectors protected
+ * by the set spinlock.
+ * Called when the set is destroyed and when there can't be any user
+ * of the set data anymore.
+ */
static inline void
ip_set_comment_free(struct ip_set_comment *comment)
{
- if (unlikely(!comment->str))
+ struct ip_set_comment_rcu *c;
+
+ c = rcu_dereference_protected(comment->c, 1);
+ if (unlikely(!c))
return;
- kfree(comment->str);
- comment->str = NULL;
+ kfree_rcu(c, rcu);
+ rcu_assign_pointer(comment->c, NULL);
}
#endif
diff --git a/include/linux/netfilter/ipset/ip_set_timeout.h b/include/linux/netfilter/ipset/ip_set_timeout.h
index 83c2f9e0886c..1d6a935c1ac5 100644
--- a/include/linux/netfilter/ipset/ip_set_timeout.h
+++ b/include/linux/netfilter/ipset/ip_set_timeout.h
@@ -40,38 +40,33 @@ ip_set_timeout_uget(struct nlattr *tb)
}
static inline bool
-ip_set_timeout_test(unsigned long timeout)
+ip_set_timeout_expired(unsigned long *t)
{
- return timeout == IPSET_ELEM_PERMANENT ||
- time_is_after_jiffies(timeout);
-}
-
-static inline bool
-ip_set_timeout_expired(unsigned long *timeout)
-{
- return *timeout != IPSET_ELEM_PERMANENT &&
- time_is_before_jiffies(*timeout);
+ return *t != IPSET_ELEM_PERMANENT && time_is_before_jiffies(*t);
}
static inline void
-ip_set_timeout_set(unsigned long *timeout, u32 t)
+ip_set_timeout_set(unsigned long *timeout, u32 value)
{
- if (!t) {
+ unsigned long t;
+
+ if (!value) {
*timeout = IPSET_ELEM_PERMANENT;
return;
}
- *timeout = msecs_to_jiffies(t * 1000) + jiffies;
- if (*timeout == IPSET_ELEM_PERMANENT)
+ t = msecs_to_jiffies(value * MSEC_PER_SEC) + jiffies;
+ if (t == IPSET_ELEM_PERMANENT)
/* Bingo! :-) */
- (*timeout)--;
+ t--;
+ *timeout = t;
}
static inline u32
ip_set_timeout_get(unsigned long *timeout)
{
return *timeout == IPSET_ELEM_PERMANENT ? 0 :
- jiffies_to_msecs(*timeout - jiffies)/1000;
+ jiffies_to_msecs(*timeout - jiffies)/MSEC_PER_SEC;
}
#endif /* __KERNEL__ */
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 09f38206c18f..95693c4cebdd 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -224,13 +224,10 @@ struct xt_table_info {
unsigned int stacksize;
unsigned int __percpu *stackptr;
void ***jumpstack;
- /* ipt_entry tables: one per CPU */
- /* Note : this field MUST be the last one, see XT_TABLE_INFO_SZ */
- void *entries[1];
+
+ unsigned char entries[0] __aligned(8);
};
-#define XT_TABLE_INFO_SZ (offsetof(struct xt_table_info, entries) \
- + nr_cpu_ids * sizeof(char *))
int xt_register_target(struct xt_target *target);
void xt_unregister_target(struct xt_target *target);
int xt_register_targets(struct xt_target *target, unsigned int n);
@@ -353,6 +350,55 @@ static inline unsigned long ifname_compare_aligned(const char *_a,
return ret;
}
+
+/* On SMP, ip(6)t_entry->counters.pcnt holds address of the
+ * real (percpu) counter. On !SMP, its just the packet count,
+ * so nothing needs to be done there.
+ *
+ * xt_percpu_counter_alloc returns the address of the percpu
+ * counter, or 0 on !SMP.
+ *
+ * Hence caller must use IS_ERR_VALUE to check for error, this
+ * allows us to return 0 for single core systems without forcing
+ * callers to deal with SMP vs. NONSMP issues.
+ */
+static inline u64 xt_percpu_counter_alloc(void)
+{
+ if (nr_cpu_ids > 1) {
+ void __percpu *res = alloc_percpu(struct xt_counters);
+
+ if (res == NULL)
+ return (u64) -ENOMEM;
+
+ return (__force u64) res;
+ }
+
+ return 0;
+}
+static inline void xt_percpu_counter_free(u64 pcnt)
+{
+ if (nr_cpu_ids > 1)
+ free_percpu((void __percpu *) pcnt);
+}
+
+static inline struct xt_counters *
+xt_get_this_cpu_counter(struct xt_counters *cnt)
+{
+ if (nr_cpu_ids > 1)
+ return this_cpu_ptr((void __percpu *) cnt->pcnt);
+
+ return cnt;
+}
+
+static inline struct xt_counters *
+xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned int cpu)
+{
+ if (nr_cpu_ids > 1)
+ return per_cpu_ptr((void __percpu *) cnt->pcnt, cpu);
+
+ return cnt;
+}
+
struct nf_hook_ops *xt_hook_link(const struct xt_table *, nf_hookfn *);
void xt_hook_unlink(const struct xt_table *, struct nf_hook_ops *);
diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index f2fdb5a52070..6d80fc686323 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -20,13 +20,6 @@ enum nf_br_hook_priorities {
#define BRNF_BRIDGED_DNAT 0x02
#define BRNF_NF_BRIDGE_PREROUTING 0x08
-static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
-{
- if (skb->nf_bridge->orig_proto == BRNF_PROTO_PPPOE)
- return PPPOE_SES_HLEN;
- return 0;
-}
-
int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb);
static inline void br_drop_fake_rtable(struct sk_buff *skb)
diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h
index 64dad1cc1a4b..8b7d28f3aada 100644
--- a/include/linux/netfilter_ipv6.h
+++ b/include/linux/netfilter_ipv6.h
@@ -25,6 +25,9 @@ void ipv6_netfilter_fini(void);
struct nf_ipv6_ops {
int (*chk_addr)(struct net *net, const struct in6_addr *addr,
const struct net_device *dev, int strict);
+ void (*route_input)(struct sk_buff *skb);
+ int (*fragment)(struct sock *sk, struct sk_buff *skb,
+ int (*output)(struct sock *, struct sk_buff *));
};
extern const struct nf_ipv6_ops __rcu *nf_ipv6_ops;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a7acc92aa668..d6cdd6e87d53 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -36,6 +36,7 @@
#include <linux/sched.h>
#include <net/flow_dissector.h>
#include <linux/splice.h>
+#include <linux/in6.h>
/* A. Checksumming of received packets by device.
*
@@ -173,13 +174,17 @@ struct nf_bridge_info {
BRNF_PROTO_PPPOE
} orig_proto:8;
bool pkt_otherhost;
+ __u16 frag_max_size;
unsigned int mask;
struct net_device *physindev;
union {
struct net_device *physoutdev;
char neigh_header[8];
};
- __be32 ipv4_daddr;
+ union {
+ __be32 ipv4_daddr;
+ struct in6_addr ipv6_daddr;
+ };
};
#endif
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 3d6f48ca40a7..2a246680a6c3 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -781,6 +781,7 @@ struct nft_stats {
};
#define NFT_HOOK_OPS_MAX 2
+#define NFT_BASECHAIN_DISABLED (1 << 0)
/**
* struct nft_base_chain - nf_tables base chain
@@ -791,14 +792,17 @@ struct nft_stats {
* @policy: default policy
* @stats: per-cpu chain stats
* @chain: the chain
+ * @dev_name: device name that this base chain is attached to (if any)
*/
struct nft_base_chain {
struct nf_hook_ops ops[NFT_HOOK_OPS_MAX];
possible_net_t pnet;
const struct nf_chain_type *type;
u8 policy;
+ u8 flags;
struct nft_stats __percpu *stats;
struct nft_chain chain;
+ char dev_name[IFNAMSIZ];
};
static inline struct nft_base_chain *nft_base_chain(const struct nft_chain *chain)
@@ -806,6 +810,11 @@ static inline struct nft_base_chain *nft_base_chain(const struct nft_chain *chai
return container_of(chain, struct nft_base_chain, chain);
}
+int nft_register_basechain(struct nft_base_chain *basechain,
+ unsigned int hook_nops);
+void nft_unregister_basechain(struct nft_base_chain *basechain,
+ unsigned int hook_nops);
+
unsigned int nft_do_chain(struct nft_pktinfo *pkt,
const struct nf_hook_ops *ops);
@@ -819,7 +828,6 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt,
* @use: number of chain references to this table
* @flags: table flag (see enum nft_table_flags)
* @name: name of the table
- * @dev: this table is bound to this device (if any)
*/
struct nft_table {
struct list_head list;
@@ -829,7 +837,6 @@ struct nft_table {
u32 use;
u16 flags;
char name[NFT_TABLE_MAXNAMELEN];
- struct net_device *dev;
};
enum nft_af_flags {
diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h
index 5ab4e60894cf..63b2e34f1b60 100644
--- a/include/uapi/linux/netfilter/ipset/ip_set.h
+++ b/include/uapi/linux/netfilter/ipset/ip_set.h
@@ -15,12 +15,12 @@
/* The protocol version */
#define IPSET_PROTOCOL 6
-/* The maximum permissible comment length we will accept over netlink */
-#define IPSET_MAX_COMMENT_SIZE 255
-
/* The max length of strings including NUL: set and type identifiers */
#define IPSET_MAXNAMELEN 32
+/* The maximum permissible comment length we will accept over netlink */
+#define IPSET_MAX_COMMENT_SIZE 255
+
/* Message types and commands */
enum ipset_cmd {
IPSET_CMD_NONE,
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 89a671e0f5e7..a99e6a997140 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -122,11 +122,13 @@ enum nft_list_attributes {
*
* @NFTA_HOOK_HOOKNUM: netfilter hook number (NLA_U32)
* @NFTA_HOOK_PRIORITY: netfilter hook priority (NLA_U32)
+ * @NFTA_HOOK_DEV: netdevice name (NLA_STRING)
*/
enum nft_hook_attributes {
NFTA_HOOK_UNSPEC,
NFTA_HOOK_HOOKNUM,
NFTA_HOOK_PRIORITY,
+ NFTA_HOOK_DEV,
__NFTA_HOOK_MAX
};
#define NFTA_HOOK_MAX (__NFTA_HOOK_MAX - 1)
@@ -146,14 +148,12 @@ enum nft_table_flags {
* @NFTA_TABLE_NAME: name of the table (NLA_STRING)
* @NFTA_TABLE_FLAGS: bitmask of enum nft_table_flags (NLA_U32)
* @NFTA_TABLE_USE: number of chains in this table (NLA_U32)
- * @NFTA_TABLE_DEV: net device name (NLA_STRING)
*/
enum nft_table_attributes {
NFTA_TABLE_UNSPEC,
NFTA_TABLE_NAME,
NFTA_TABLE_FLAGS,
NFTA_TABLE_USE,
- NFTA_TABLE_DEV,
__NFTA_TABLE_MAX
};
#define NFTA_TABLE_MAX (__NFTA_TABLE_MAX - 1)
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 46660a28feef..e4e5f2f29173 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -34,6 +34,7 @@
#include <net/ip.h>
#include <net/ipv6.h>
+#include <net/addrconf.h>
#include <net/route.h>
#include <net/netfilter/br_netfilter.h>
@@ -115,6 +116,8 @@ struct brnf_frag_data {
char mac[NF_BRIDGE_MAX_MAC_HEADER_LENGTH];
u8 encap_size;
u8 size;
+ u16 vlan_tci;
+ __be16 vlan_proto;
};
static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage);
@@ -216,7 +219,7 @@ static inline void nf_bridge_pull_encap_header_rcsum(struct sk_buff *skb)
* expected format
*/
-static int br_parse_ip_options(struct sk_buff *skb)
+static int br_validate_ipv4(struct sk_buff *skb)
{
const struct iphdr *iph;
struct net_device *dev = skb->dev;
@@ -264,6 +267,111 @@ drop:
return -1;
}
+/* We only check the length. A bridge shouldn't do any hop-by-hop stuff
+ * anyway
+ */
+static int check_hbh_len(struct sk_buff *skb)
+{
+ unsigned char *raw = (u8 *)(ipv6_hdr(skb) + 1);
+ u32 pkt_len;
+ const unsigned char *nh = skb_network_header(skb);
+ int off = raw - nh;
+ int len = (raw[1] + 1) << 3;
+
+ if ((raw + len) - skb->data > skb_headlen(skb))
+ goto bad;
+
+ off += 2;
+ len -= 2;
+
+ while (len > 0) {
+ int optlen = nh[off + 1] + 2;
+
+ switch (nh[off]) {
+ case IPV6_TLV_PAD1:
+ optlen = 1;
+ break;
+
+ case IPV6_TLV_PADN:
+ break;
+
+ case IPV6_TLV_JUMBO:
+ if (nh[off + 1] != 4 || (off & 3) != 2)
+ goto bad;
+ pkt_len = ntohl(*(__be32 *)(nh + off + 2));
+ if (pkt_len <= IPV6_MAXPLEN ||
+ ipv6_hdr(skb)->payload_len)
+ goto bad;
+ if (pkt_len > skb->len - sizeof(struct ipv6hdr))
+ goto bad;
+ if (pskb_trim_rcsum(skb,
+ pkt_len + sizeof(struct ipv6hdr)))
+ goto bad;
+ nh = skb_network_header(skb);
+ break;
+ default:
+ if (optlen > len)
+ goto bad;
+ break;
+ }
+ off += optlen;
+ len -= optlen;
+ }
+ if (len == 0)
+ return 0;
+bad:
+ return -1;
+}
+
+/* Equivalent to br_validate_ipv4 for IPv6 */
+static int br_validate_ipv6(struct sk_buff *skb)
+{
+ const struct ipv6hdr *hdr;
+ struct net_device *dev = skb->dev;
+ struct inet6_dev *idev = in6_dev_get(skb->dev);
+ u32 pkt_len;
+ u8 ip6h_len = sizeof(struct ipv6hdr);
+
+ if (!pskb_may_pull(skb, ip6h_len))
+ goto inhdr_error;
+
+ if (skb->len < ip6h_len)
+ goto drop;
+
+ hdr = ipv6_hdr(skb);
+
+ if (hdr->version != 6)
+ goto inhdr_error;
+
+ pkt_len = ntohs(hdr->payload_len);
+
+ if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) {
+ if (pkt_len + ip6h_len > skb->len) {
+ IP6_INC_STATS_BH(dev_net(dev), idev,
+ IPSTATS_MIB_INTRUNCATEDPKTS);
+ goto drop;
+ }
+ if (pskb_trim_rcsum(skb, pkt_len + ip6h_len)) {
+ IP6_INC_STATS_BH(dev_net(dev), idev,
+ IPSTATS_MIB_INDISCARDS);
+ goto drop;
+ }
+ }
+ if (hdr->nexthdr == NEXTHDR_HOP && check_hbh_len(skb))
+ goto drop;
+
+ memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
+ /* No IP options in IPv6 header; however it should be
+ * checked if some next headers need special treatment
+ */
+ return 0;
+
+inhdr_error:
+ IP6_INC_STATS_BH(dev_net(dev), idev, IPSTATS_MIB_INHDRERRORS);
+drop:
+ return -1;
+}
+
static void nf_bridge_update_protocol(struct sk_buff *skb)
{
switch (skb->nf_bridge->orig_proto) {
@@ -278,37 +386,6 @@ static void nf_bridge_update_protocol(struct sk_buff *skb)
}
}
-/* PF_BRIDGE/PRE_ROUTING *********************************************/
-/* Undo the changes made for ip6tables PREROUTING and continue the
- * bridge PRE_ROUTING hook. */
-static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb)
-{
- struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
- struct rtable *rt;
-
- if (nf_bridge->pkt_otherhost) {
- skb->pkt_type = PACKET_OTHERHOST;
- nf_bridge->pkt_otherhost = false;
- }
- nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING;
-
- rt = bridge_parent_rtable(nf_bridge->physindev);
- if (!rt) {
- kfree_skb(skb);
- return 0;
- }
- skb_dst_set_noref(skb, &rt->dst);
-
- skb->dev = nf_bridge->physindev;
- nf_bridge_update_protocol(skb);
- nf_bridge_push_encap_header(skb);
- NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, sk, skb,
- skb->dev, NULL,
- br_handle_frame_finish, 1);
-
- return 0;
-}
-
/* Obtain the correct destination MAC address, while preserving the original
* source MAC address. If we already know this address, we just copy it. If we
* don't, we use the neighbour framework to find out. In both cases, we make
@@ -357,7 +434,74 @@ free_skb:
static bool daddr_was_changed(const struct sk_buff *skb,
const struct nf_bridge_info *nf_bridge)
{
- return ip_hdr(skb)->daddr != nf_bridge->ipv4_daddr;
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ return ip_hdr(skb)->daddr != nf_bridge->ipv4_daddr;
+ case htons(ETH_P_IPV6):
+ return memcmp(&nf_bridge->ipv6_daddr, &ipv6_hdr(skb)->daddr,
+ sizeof(ipv6_hdr(skb)->daddr)) != 0;
+ default:
+ return false;
+ }
+}
+
+/* PF_BRIDGE/PRE_ROUTING: Undo the changes made for ip6tables
+ * PREROUTING and continue the bridge PRE_ROUTING hook. See comment
+ * for br_nf_pre_routing_finish(), same logic is used here but
+ * equivalent IPv6 function ip6_route_input() called indirectly.
+ */
+static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb)
+{
+ struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
+ struct rtable *rt;
+ struct net_device *dev = skb->dev;
+ const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
+
+ nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size;
+
+ if (nf_bridge->pkt_otherhost) {
+ skb->pkt_type = PACKET_OTHERHOST;
+ nf_bridge->pkt_otherhost = false;
+ }
+ nf_bridge->mask &= ~BRNF_NF_BRIDGE_PREROUTING;
+ if (daddr_was_changed(skb, nf_bridge)) {
+ skb_dst_drop(skb);
+ v6ops->route_input(skb);
+
+ if (skb_dst(skb)->error) {
+ kfree_skb(skb);
+ return 0;
+ }
+
+ if (skb_dst(skb)->dev == dev) {
+ skb->dev = nf_bridge->physindev;
+ nf_bridge_update_protocol(skb);
+ nf_bridge_push_encap_header(skb);
+ NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING,
+ sk, skb, skb->dev, NULL,
+ br_nf_pre_routing_finish_bridge,
+ 1);
+ return 0;
+ }
+ ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr);
+ skb->pkt_type = PACKET_HOST;
+ } else {
+ rt = bridge_parent_rtable(nf_bridge->physindev);
+ if (!rt) {
+ kfree_skb(skb);
+ return 0;
+ }
+ skb_dst_set_noref(skb, &rt->dst);
+ }
+
+ skb->dev = nf_bridge->physindev;
+ nf_bridge_update_protocol(skb);
+ nf_bridge_push_encap_header(skb);
+ NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, sk, skb,
+ skb->dev, NULL,
+ br_handle_frame_finish, 1);
+
+ return 0;
}
/* This requires some explaining. If DNAT has taken place,
@@ -406,16 +550,14 @@ static int br_nf_pre_routing_finish(struct sock *sk, struct sk_buff *skb)
struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
struct rtable *rt;
int err;
- int frag_max_size;
- frag_max_size = IPCB(skb)->frag_max_size;
- BR_INPUT_SKB_CB(skb)->frag_max_size = frag_max_size;
+ nf_bridge-&g