diff options
author | Jesper Dangaard Brouer <brouer@redhat.com> | 2012-09-26 14:06:59 +0200 |
---|---|---|
committer | Simon Horman <horms@verge.net.au> | 2012-09-28 11:34:24 +0900 |
commit | 2f74713d1436b7d2d0506ba1bc5f10915a73bbec (patch) | |
tree | 7443abafe2fed8dc8018fdb6bbd6a6619c11eb24 /net/netfilter/ipvs/ip_vs_core.c | |
parent | 63dca2c0b0e7a92cb39d1b1ecefa32ffda201975 (diff) |
ipvs: Complete IPv6 fragment handling for IPVS
IPVS now supports fragmented packets, with support from nf_conntrack_reasm.c
Based on patch from: Hans Schillstrom.
IPVS do like conntrack i.e. use the skb->nfct_reasm
(i.e. when all fragments is collected, nf_ct_frag6_output()
starts a "re-play" of all fragments into the interrupted
PREROUTING chain at prio -399 (NF_IP6_PRI_CONNTRACK_DEFRAG+1)
with nfct_reasm pointing to the assembled packet.)
Notice, module nf_defrag_ipv6 must be loaded for this to work.
Report unhandled fragments, and recommend user to load nf_defrag_ipv6.
To handle fw-mark for fragments. Add a new IPVS hook into prerouting
chain at prio -99 (NF_IP6_PRI_NAT_DST+1) to catch fragments, and copy
fw-mark info from the first packet with an upper layer header.
IPv6 fragment handling should be the last thing on the IPVS IPv6
missing support list.
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Hans Schillstrom <hans@schillstrom.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
Diffstat (limited to 'net/netfilter/ipvs/ip_vs_core.c')
-rw-r--r-- | net/netfilter/ipvs/ip_vs_core.c | 117 |
1 files changed, 96 insertions, 21 deletions
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 19c08425e137..19b89ff94cd5 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -402,8 +402,12 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, unsigned int flags; *ignored = 1; + + /* + * IPv6 frags, only the first hit here. + */ ip_vs_fill_iph_skb(svc->af, skb, &iph); - pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); + pptr = frag_safe_skb_hp(skb, iph.len, sizeof(_ports), _ports, &iph); if (pptr == NULL) return NULL; @@ -507,8 +511,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, #endif ip_vs_fill_iph_skb(svc->af, skb, &iph); - - pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); + pptr = frag_safe_skb_hp(skb, iph.len, sizeof(_ports), _ports, &iph); if (pptr == NULL) { ip_vs_service_put(svc); return NF_DROP; @@ -654,14 +657,6 @@ static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user) return err; } -#ifdef CONFIG_IP_VS_IPV6 -static inline int ip_vs_gather_frags_v6(struct sk_buff *skb, u_int32_t user) -{ - /* TODO IPv6: Find out what to do here for IPv6 */ - return 0; -} -#endif - static int ip_vs_route_me_harder(int af, struct sk_buff *skb) { #ifdef CONFIG_IP_VS_IPV6 @@ -939,8 +934,7 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related, ip_vs_fill_iph_skb(AF_INET6, skb, ipvsh); *related = 1; - - ic = skb_header_pointer(skb, ipvsh->len, sizeof(_icmph), &_icmph); + ic = frag_safe_skb_hp(skb, ipvsh->len, sizeof(_icmph), &_icmph, ipvsh); if (ic == NULL) return NF_DROP; @@ -955,6 +949,11 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related, *related = 0; return NF_ACCEPT; } + /* Fragment header that is before ICMP header tells us that: + * it's not an error message since they can't be fragmented. + */ + if (ipvsh->flags & IP6T_FH_F_FRAG) + return NF_DROP; IP_VS_DBG(8, "Outgoing ICMPv6 (%d,%d) %pI6c->%pI6c\n", ic->icmp6_type, ntohs(icmpv6_id(ic)), @@ -1117,6 +1116,12 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) ip_vs_fill_iph_skb(af, skb, &iph); #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { + if (!iph.fragoffs && skb_nfct_reasm(skb)) { + struct sk_buff *reasm = skb_nfct_reasm(skb); + /* Save fw mark for coming frags */ + reasm->ipvs_property = 1; + reasm->mark = skb->mark; + } if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { int related; int verdict = ip_vs_out_icmp_v6(skb, &related, @@ -1124,7 +1129,6 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) if (related) return verdict; - ip_vs_fill_iph_skb(af, skb, &iph); } } else #endif @@ -1134,7 +1138,6 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) if (related) return verdict; - ip_vs_fill_ip4hdr(skb_network_header(skb), &iph); } pd = ip_vs_proto_data_get(net, iph.protocol); @@ -1167,8 +1170,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) pp->protocol == IPPROTO_SCTP)) { __be16 _ports[2], *pptr; - pptr = skb_header_pointer(skb, iph.len, - sizeof(_ports), _ports); + pptr = frag_safe_skb_hp(skb, iph.len, + sizeof(_ports), _ports, &iph); if (pptr == NULL) return NF_ACCEPT; /* Not for me */ if (ip_vs_lookup_real_service(net, af, iph.protocol, @@ -1468,7 +1471,7 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) *related = 1; - ic = skb_header_pointer(skb, iph->len, sizeof(_icmph), &_icmph); + ic = frag_safe_skb_hp(skb, iph->len, sizeof(_icmph), &_icmph, iph); if (ic == NULL) return NF_DROP; @@ -1483,6 +1486,11 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) *related = 0; return NF_ACCEPT; } + /* Fragment header that is before ICMP header tells us that: + * it's not an error message since they can't be fragmented. + */ + if (iph->flags & IP6T_FH_F_FRAG) + return NF_DROP; IP_VS_DBG(8, "Incoming ICMPv6 (%d,%d) %pI6c->%pI6c\n", ic->icmp6_type, ntohs(icmpv6_id(ic)), @@ -1514,10 +1522,20 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offs_ciph, "Checking incoming ICMPv6 for"); - /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_in_get(AF_INET6, skb, &ciph, ciph.len, 1); + /* The embedded headers contain source and dest in reverse order + * if not from localhost + */ + cp = pp->conn_in_get(AF_INET6, skb, &ciph, ciph.len, + (hooknum == NF_INET_LOCAL_OUT) ? 0 : 1); + if (!cp) return NF_ACCEPT; + /* VS/TUN, VS/DR and LOCALNODE just let it go */ + if ((hooknum == NF_INET_LOCAL_OUT) && + (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)) { + __ip_vs_conn_put(cp); + return NF_ACCEPT; + } /* do the statistics and put it back */ ip_vs_in_stats(cp, skb); @@ -1590,6 +1608,12 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { + if (!iph.fragoffs && skb_nfct_reasm(skb)) { + struct sk_buff *reasm = skb_nfct_reasm(skb); + /* Save fw mark for coming frags. */ + reasm->ipvs_property = 1; + reasm->mark = skb->mark; + } if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { int related; int verdict = ip_vs_in_icmp_v6(skb, &related, hooknum); @@ -1614,13 +1638,16 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) pp = pd->pp; /* * Check if the packet belongs to an existing connection entry - * Only sched first IPv6 fragment. */ cp = pp->conn_in_get(af, skb, &iph, iph.len, 0); if (unlikely(!cp) && !iph.fragoffs) { + /* No (second) fragments need to enter here, as nf_defrag_ipv6 + * replayed fragment zero will already have created the cp + */ int v; + /* Schedule and create new connection entry into &cp */ if (!pp->conn_schedule(af, skb, pd, &v, &cp)) return v; } @@ -1629,6 +1656,14 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) /* sorry, all this trouble for a no-hit :) */ IP_VS_DBG_PKT(12, af, pp, skb, 0, "ip_vs_in: packet continues traversal as normal"); + if (iph.fragoffs && !skb_nfct_reasm(skb)) { + /* Fragment that couldn't be mapped to a conn entry + * and don't have any pointer to a reasm skb + * is missing module nf_defrag_ipv6 + */ + IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n"); + IP_VS_DBG_PKT(7, af, pp, skb, 0, "unhandled fragment"); + } return NF_ACCEPT; } @@ -1713,6 +1748,38 @@ ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb, #ifdef CONFIG_IP_VS_IPV6 /* + * AF_INET6 fragment handling + * Copy info from first fragment, to the rest of them. + */ +static unsigned int +ip_vs_preroute_frag6(unsigned int hooknum, struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct sk_buff *reasm = skb_nfct_reasm(skb); + struct net *net; + + /* Skip if not a "replay" from nf_ct_frag6_output or first fragment. + * ipvs_property is set when checking first fragment + * in ip_vs_in() and ip_vs_out(). + */ + if (reasm) + IP_VS_DBG(2, "Fragment recv prop:%d\n", reasm->ipvs_property); + if (!reasm || !reasm->ipvs_property) + return NF_ACCEPT; + + net = skb_net(skb); + if (!net_ipvs(net)->enable) + return NF_ACCEPT; + + /* Copy stored fw mark, saved in ip_vs_{in,out} */ + skb->mark = reasm->mark; + + return NF_ACCEPT; +} + +/* * AF_INET6 handler in NF_INET_LOCAL_IN chain * Schedule and forward packets from remote clients */ @@ -1851,6 +1918,14 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .priority = 100, }, #ifdef CONFIG_IP_VS_IPV6 + /* After mangle & nat fetch 2:nd fragment and following */ + { + .hook = ip_vs_preroute_frag6, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_PRE_ROUTING, + .priority = NF_IP6_PRI_NAT_DST + 1, + }, /* After packet filtering, change source only for VS/NAT */ { .hook = ip_vs_reply6, |