summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-08-19 11:51:45 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2018-08-19 11:51:45 -0700
commit2ad0d52699700a91660a406a4046017a2d7f246a (patch)
treeeda80942d8671e60ef2d991fc68816784a41135c /net
parente61cf2e3a5b452cfefcb145021f5a8ea88735cc1 (diff)
parente2948e5af8eeb6c945000772b7613b0323a0a203 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking fixes from David Miller: 1) Fix races in IPVS, from Tan Hu. 2) Missing unbind in matchall classifier, from Hangbin Liu. 3) Missing act_ife action release, from Vlad Buslov. 4) Cure lockdep splats in ila, from Cong Wang. 5) veth queue leak on link delete, from Toshiaki Makita. 6) Disable isdn's IIOCDBGVAR ioctl, it exposes kernel addresses. From Kees Cook. 7) RCU usage fixup in XDP, from Tariq Toukan. 8) Two TCP ULP fixes from Daniel Borkmann. 9) r8169 needs REALTEK_PHY as a Kconfig dependency, from Heiner Kallweit. 10) Always take tcf_lock with BH disabled, otherwise we can deadlock with rate estimator code paths. From Vlad Buslov. 11) Don't use MSI-X on RTL8106e r8169 chips, they don't resume properly. From Jian-Hong Pan. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (41 commits) ip6_vti: fix creating fallback tunnel device for vti6 ip_vti: fix a null pointer deferrence when create vti fallback tunnel r8169: don't use MSI-X on RTL8106e net: lan743x_ptp: convert to ktime_get_clocktai_ts64 net: sched: always disable bh when taking tcf_lock ip6_vti: simplify stats handling in vti6_xmit bpf: fix redirect to map under tail calls r8169: add missing Kconfig dependency tools/bpf: fix bpf selftest test_cgroup_storage failure bpf, sockmap: fix sock_map_ctx_update_elem race with exist/noexist bpf, sockmap: fix map elem deletion race with smap_stop_sock bpf, sockmap: fix leakage of smap_psock_map_entry tcp, ulp: fix leftover icsk_ulp_ops preventing sock from reattach tcp, ulp: add alias for all ulp modules bpf: fix a rcu usage warning in bpf_prog_array_copy_core() samples/bpf: all XDP samples should unload xdp/bpf prog on SIGTERM net/xdp: Fix suspicious RCU usage warning net/mlx5e: Delete unneeded function argument Documentation: networking: ti-cpsw: correct cbs parameters for Eth1 100Mb isdn: Disable IIOCDBGVAR ...
Diffstat (limited to 'net')
-rw-r--r--net/core/filter.c68
-rw-r--r--net/core/xdp.c14
-rw-r--r--net/ipv4/ip_vti.c3
-rw-r--r--net/ipv4/tcp_ulp.c4
-rw-r--r--net/ipv6/ip6_vti.c16
-rw-r--r--net/ipv6/netfilter/ip6t_rpfilter.c12
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c22
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c15
-rw-r--r--net/netfilter/nf_conntrack_netlink.c26
-rw-r--r--net/netfilter/nf_conntrack_proto.c15
-rw-r--r--net/netfilter/nf_tables_api.c38
-rw-r--r--net/netfilter/nfnetlink_acct.c29
-rw-r--r--net/netfilter/nft_chain_filter.c14
-rw-r--r--net/netfilter/nft_ct.c7
-rw-r--r--net/netfilter/nft_dynset.c2
-rw-r--r--net/netfilter/nft_set_bitmap.c6
-rw-r--r--net/netfilter/nft_set_hash.c8
-rw-r--r--net/netfilter/nft_set_rbtree.c4
-rw-r--r--net/netfilter/nft_tproxy.c4
-rw-r--r--net/netfilter/x_tables.c7
-rw-r--r--net/sched/act_bpf.c10
-rw-r--r--net/sched/act_csum.c10
-rw-r--r--net/sched/act_gact.c10
-rw-r--r--net/sched/act_ife.c8
-rw-r--r--net/sched/act_mirred.c16
-rw-r--r--net/sched/act_sample.c25
-rw-r--r--net/sched/act_tunnel_key.c10
-rw-r--r--net/sched/act_vlan.c10
-rw-r--r--net/sched/cls_matchall.c2
-rw-r--r--net/tls/tls_main.c1
30 files changed, 221 insertions, 195 deletions
diff --git a/net/core/filter.c b/net/core/filter.c
index fd423ce3da34..c25eb36f1320 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3246,31 +3246,33 @@ static void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
}
}
-static inline bool xdp_map_invalid(const struct bpf_prog *xdp_prog,
- unsigned long aux)
+void bpf_clear_redirect_map(struct bpf_map *map)
{
- return (unsigned long)xdp_prog->aux != aux;
+ struct bpf_redirect_info *ri;
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ ri = per_cpu_ptr(&bpf_redirect_info, cpu);
+ /* Avoid polluting remote cacheline due to writes if
+ * not needed. Once we pass this test, we need the
+ * cmpxchg() to make sure it hasn't been changed in
+ * the meantime by remote CPU.
+ */
+ if (unlikely(READ_ONCE(ri->map) == map))
+ cmpxchg(&ri->map, map, NULL);
+ }
}
static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
- struct bpf_prog *xdp_prog)
+ struct bpf_prog *xdp_prog, struct bpf_map *map)
{
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
- unsigned long map_owner = ri->map_owner;
- struct bpf_map *map = ri->map;
u32 index = ri->ifindex;
void *fwd = NULL;
int err;
ri->ifindex = 0;
- ri->map = NULL;
- ri->map_owner = 0;
-
- if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
- err = -EFAULT;
- map = NULL;
- goto err;
- }
+ WRITE_ONCE(ri->map, NULL);
fwd = __xdp_map_lookup_elem(map, index);
if (!fwd) {
@@ -3296,12 +3298,13 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
struct bpf_prog *xdp_prog)
{
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+ struct bpf_map *map = READ_ONCE(ri->map);
struct net_device *fwd;
u32 index = ri->ifindex;
int err;
- if (ri->map)
- return xdp_do_redirect_map(dev, xdp, xdp_prog);
+ if (map)
+ return xdp_do_redirect_map(dev, xdp, xdp_prog, map);
fwd = dev_get_by_index_rcu(dev_net(dev), index);
ri->ifindex = 0;
@@ -3325,24 +3328,17 @@ EXPORT_SYMBOL_GPL(xdp_do_redirect);
static int xdp_do_generic_redirect_map(struct net_device *dev,
struct sk_buff *skb,
struct xdp_buff *xdp,
- struct bpf_prog *xdp_prog)
+ struct bpf_prog *xdp_prog,
+ struct bpf_map *map)
{
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
- unsigned long map_owner = ri->map_owner;
- struct bpf_map *map = ri->map;
u32 index = ri->ifindex;
void *fwd = NULL;
int err = 0;
ri->ifindex = 0;
- ri->map = NULL;
- ri->map_owner = 0;
+ WRITE_ONCE(ri->map, NULL);
- if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
- err = -EFAULT;
- map = NULL;
- goto err;
- }
fwd = __xdp_map_lookup_elem(map, index);
if (unlikely(!fwd)) {
err = -EINVAL;
@@ -3379,13 +3375,14 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
struct xdp_buff *xdp, struct bpf_prog *xdp_prog)
{
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+ struct bpf_map *map = READ_ONCE(ri->map);
u32 index = ri->ifindex;
struct net_device *fwd;
int err = 0;
- if (ri->map)
- return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog);
-
+ if (map)
+ return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog,
+ map);
ri->ifindex = 0;
fwd = dev_get_by_index_rcu(dev_net(dev), index);
if (unlikely(!fwd)) {
@@ -3416,8 +3413,7 @@ BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
ri->ifindex = ifindex;
ri->flags = flags;
- ri->map = NULL;
- ri->map_owner = 0;
+ WRITE_ONCE(ri->map, NULL);
return XDP_REDIRECT;
}
@@ -3430,8 +3426,8 @@ static const struct bpf_func_proto bpf_xdp_redirect_proto = {
.arg2_type = ARG_ANYTHING,
};
-BPF_CALL_4(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags,
- unsigned long, map_owner)
+BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex,
+ u64, flags)
{
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
@@ -3440,15 +3436,11 @@ BPF_CALL_4(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags
ri->ifindex = ifindex;
ri->flags = flags;
- ri->map = map;
- ri->map_owner = map_owner;
+ WRITE_ONCE(ri->map, map);
return XDP_REDIRECT;
}
-/* Note, arg4 is hidden from users and populated by the verifier
- * with the right pointer.
- */
static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
.func = bpf_xdp_redirect_map,
.gpl_only = false,
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 3dd99e1c04f5..89b6785cef2a 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -98,23 +98,15 @@ static void __xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
{
struct xdp_mem_allocator *xa;
int id = xdp_rxq->mem.id;
- int err;
if (id == 0)
return;
mutex_lock(&mem_id_lock);
- xa = rhashtable_lookup(mem_id_ht, &id, mem_id_rht_params);
- if (!xa) {
- mutex_unlock(&mem_id_lock);
- return;
- }
-
- err = rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params);
- WARN_ON(err);
-
- call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free);
+ xa = rhashtable_lookup_fast(mem_id_ht, &id, mem_id_rht_params);
+ if (xa && !rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params))
+ call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free);
mutex_unlock(&mem_id_lock);
}
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 3f091ccad9af..f38cb21d773d 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -438,7 +438,8 @@ static int __net_init vti_init_net(struct net *net)
if (err)
return err;
itn = net_generic(net, vti_net_id);
- vti_fb_tunnel_init(itn->fb_tunnel_dev);
+ if (itn->fb_tunnel_dev)
+ vti_fb_tunnel_init(itn->fb_tunnel_dev);
return 0;
}
diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c
index 622caa4039e0..a5995bb2eaca 100644
--- a/net/ipv4/tcp_ulp.c
+++ b/net/ipv4/tcp_ulp.c
@@ -51,7 +51,7 @@ static const struct tcp_ulp_ops *__tcp_ulp_find_autoload(const char *name)
#ifdef CONFIG_MODULES
if (!ulp && capable(CAP_NET_ADMIN)) {
rcu_read_unlock();
- request_module("%s", name);
+ request_module("tcp-ulp-%s", name);
rcu_read_lock();
ulp = tcp_ulp_find(name);
}
@@ -129,6 +129,8 @@ void tcp_cleanup_ulp(struct sock *sk)
if (icsk->icsk_ulp_ops->release)
icsk->icsk_ulp_ops->release(sk);
module_put(icsk->icsk_ulp_ops->owner);
+
+ icsk->icsk_ulp_ops = NULL;
}
/* Change upper layer protocol for socket */
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index c72ae3a4fe09..38dec9da90d3 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -503,17 +503,9 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
skb->dev = skb_dst(skb)->dev;
err = dst_output(t->net, skb->sk, skb);
- if (net_xmit_eval(err) == 0) {
- struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
-
- u64_stats_update_begin(&tstats->syncp);
- tstats->tx_bytes += pkt_len;
- tstats->tx_packets++;
- u64_stats_update_end(&tstats->syncp);
- } else {
- stats->tx_errors++;
- stats->tx_aborted_errors++;
- }
+ if (net_xmit_eval(err) == 0)
+ err = pkt_len;
+ iptunnel_xmit_stats(dev, err);
return 0;
tx_err_link_failure:
@@ -1114,6 +1106,8 @@ static int __net_init vti6_init_net(struct net *net)
ip6n->tnls[0] = ip6n->tnls_wc;
ip6n->tnls[1] = ip6n->tnls_r_l;
+ if (!net_has_fallback_tunnels(net))
+ return 0;
err = -ENOMEM;
ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6_vti0",
NET_NAME_UNKNOWN, vti6_dev_setup);
diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c
index 0fe61ede77c6..c3c6b09acdc4 100644
--- a/net/ipv6/netfilter/ip6t_rpfilter.c
+++ b/net/ipv6/netfilter/ip6t_rpfilter.c
@@ -26,6 +26,12 @@ static bool rpfilter_addr_unicast(const struct in6_addr *addr)
return addr_type & IPV6_ADDR_UNICAST;
}
+static bool rpfilter_addr_linklocal(const struct in6_addr *addr)
+{
+ int addr_type = ipv6_addr_type(addr);
+ return addr_type & IPV6_ADDR_LINKLOCAL;
+}
+
static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb,
const struct net_device *dev, u8 flags)
{
@@ -48,7 +54,11 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb,
}
fl6.flowi6_mark = flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0;
- if ((flags & XT_RPFILTER_LOOSE) == 0)
+
+ if (rpfilter_addr_linklocal(&iph->saddr)) {
+ lookup_flags |= RT6_LOOKUP_F_IFACE;
+ fl6.flowi6_oif = dev->ifindex;
+ } else if ((flags & XT_RPFILTER_LOOSE) == 0)
fl6.flowi6_oif = dev->ifindex;
rt = (void *)ip6_route_lookup(net, &fl6, skb, lookup_flags);
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 0edc62910ebf..5b2b17867cb1 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -1117,24 +1117,28 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
#ifdef CONFIG_IP_VS_IPV6
if (cp->af == AF_INET6)
seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X "
- "%s %04X %-11s %7lu%s\n",
+ "%s %04X %-11s %7u%s\n",
ip_vs_proto_name(cp->protocol),
&cp->caddr.in6, ntohs(cp->cport),
&cp->vaddr.in6, ntohs(cp->vport),
dbuf, ntohs(cp->dport),
ip_vs_state_name(cp),
- (cp->timer.expires-jiffies)/HZ, pe_data);
+ jiffies_delta_to_msecs(cp->timer.expires -
+ jiffies) / 1000,
+ pe_data);
else
#endif
seq_printf(seq,
"%-3s %08X %04X %08X %04X"
- " %s %04X %-11s %7lu%s\n",
+ " %s %04X %-11s %7u%s\n",
ip_vs_proto_name(cp->protocol),
ntohl(cp->caddr.ip), ntohs(cp->cport),
ntohl(cp->vaddr.ip), ntohs(cp->vport),
dbuf, ntohs(cp->dport),
ip_vs_state_name(cp),
- (cp->timer.expires-jiffies)/HZ, pe_data);
+ jiffies_delta_to_msecs(cp->timer.expires -
+ jiffies) / 1000,
+ pe_data);
}
return 0;
}
@@ -1179,26 +1183,28 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
#ifdef CONFIG_IP_VS_IPV6
if (cp->af == AF_INET6)
seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X "
- "%s %04X %-11s %-6s %7lu\n",
+ "%s %04X %-11s %-6s %7u\n",
ip_vs_proto_name(cp->protocol),
&cp->caddr.in6, ntohs(cp->cport),
&cp->vaddr.in6, ntohs(cp->vport),
dbuf, ntohs(cp->dport),
ip_vs_state_name(cp),
ip_vs_origin_name(cp->flags),
- (cp->timer.expires-jiffies)/HZ);
+ jiffies_delta_to_msecs(cp->timer.expires -
+ jiffies) / 1000);
else
#endif
seq_printf(seq,
"%-3s %08X %04X %08X %04X "
- "%s %04X %-11s %-6s %7lu\n",
+ "%s %04X %-11s %-6s %7u\n",
ip_vs_proto_name(cp->protocol),
ntohl(cp->caddr.ip), ntohs(cp->cport),
ntohl(cp->vaddr.ip), ntohs(cp->vport),
dbuf, ntohs(cp->dport),
ip_vs_state_name(cp),
ip_vs_origin_name(cp->flags),
- (cp->timer.expires-jiffies)/HZ);
+ jiffies_delta_to_msecs(cp->timer.expires -
+ jiffies) / 1000);
}
return 0;
}
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 0679dd101e72..7ca926a03b81 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1972,13 +1972,20 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int
if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
/* the destination server is not available */
- if (sysctl_expire_nodest_conn(ipvs)) {
+ __u32 flags = cp->flags;
+
+ /* when timer already started, silently drop the packet.*/
+ if (timer_pending(&cp->timer))
+ __ip_vs_conn_put(cp);
+ else
+ ip_vs_conn_put(cp);
+
+ if (sysctl_expire_nodest_conn(ipvs) &&
+ !(flags & IP_VS_CONN_F_ONE_PACKET)) {
/* try to expire the connection immediately */
ip_vs_conn_expire_now(cp);
}
- /* don't restart its timer, and silently
- drop the packet. */
- __ip_vs_conn_put(cp);
+
return NF_DROP;
}
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index f981bfa8db72..036207ecaf16 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -846,6 +846,21 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[])
#endif
}
+static int ctnetlink_start(struct netlink_callback *cb)
+{
+ const struct nlattr * const *cda = cb->data;
+ struct ctnetlink_filter *filter = NULL;
+
+ if (cda[CTA_MARK] && cda[CTA_MARK_MASK]) {
+ filter = ctnetlink_alloc_filter(cda);
+ if (IS_ERR(filter))
+ return PTR_ERR(filter);
+ }
+
+ cb->data = filter;
+ return 0;
+}
+
static int ctnetlink_filter_match(struct nf_conn *ct, void *data)
{
struct ctnetlink_filter *filter = data;
@@ -1290,19 +1305,12 @@ static int ctnetlink_get_conntrack(struct net *net, struct sock *ctnl,
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
+ .start = ctnetlink_start,
.dump = ctnetlink_dump_table,
.done = ctnetlink_done,
+ .data = (void *)cda,
};
- if (cda[CTA_MARK] && cda[CTA_MARK_MASK]) {
- struct ctnetlink_filter *filter;
-
- filter = ctnetlink_alloc_filter(cda);
- if (IS_ERR(filter))
- return PTR_ERR(filter);
-
- c.data = filter;
- }
return netlink_dump_start(ctnl, skb, nlh, &c);
}
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 30070732ee50..9f14b0df6960 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -312,7 +312,9 @@ void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto)
__nf_ct_l4proto_unregister_one(l4proto);
mutex_unlock(&nf_ct_proto_mutex);
- synchronize_rcu();
+ synchronize_net();
+ /* Remove all contrack entries for this protocol */
+ nf_ct_iterate_destroy(kill_l4proto, (void *)l4proto);
}
EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister_one);
@@ -333,14 +335,17 @@ static void
nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const l4proto[],
unsigned int num_proto)
{
+ int i;
+
mutex_lock(&nf_ct_proto_mutex);
- while (num_proto-- != 0)
- __nf_ct_l4proto_unregister_one(l4proto[num_proto]);
+ for (i = 0; i < num_proto; i++)
+ __nf_ct_l4proto_unregister_one(l4proto[i]);
mutex_unlock(&nf_ct_proto_mutex);
synchronize_net();
- /* Remove all contrack entries for this protocol */
- nf_ct_iterate_destroy(kill_l4proto, (void *)l4proto);
+
+ for (i = 0; i < num_proto; i++)
+ nf_ct_iterate_destroy(kill_l4proto, (void *)l4proto[i]);
}
static int
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 67cdd5c4f4f5..1dca5683f59f 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3354,7 +3354,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
struct nft_set *set;
struct nft_ctx ctx;
char *name;
- unsigned int size;
+ u64 size;
u64 timeout;
u32 ktype, dtype, flags, policy, gc_int, objtype;
struct nft_set_desc desc;
@@ -5925,10 +5925,7 @@ static int nf_tables_flowtable_event(struct notifier_block *this,
if (event != NETDEV_UNREGISTER)
return 0;
- net = maybe_get_net(dev_net(dev));
- if (!net)
- return 0;
-
+ net = dev_net(dev);
mutex_lock(&net->nft.commit_mutex);
list_for_each_entry(table, &net->nft.tables, list) {
list_for_each_entry(flowtable, &table->flowtables, list) {
@@ -5936,7 +5933,7 @@ static int nf_tables_flowtable_event(struct notifier_block *this,
}
}
mutex_unlock(&net->nft.commit_mutex);
- put_net(net);
+
return NOTIFY_DONE;
}
@@ -7273,21 +7270,36 @@ static int __init nf_tables_module_init(void)
{
int err;
- nft_chain_filter_init();
+ err = register_pernet_subsys(&nf_tables_net_ops);
+ if (err < 0)
+ return err;
+
+ err = nft_chain_filter_init();
+ if (err < 0)
+ goto err1;
err = nf_tables_core_module_init();
if (err < 0)
- return err;
+ goto err2;
- err = nfnetlink_subsys_register(&nf_tables_subsys);
+ err = register_netdevice_notifier(&nf_tables_flowtable_notifier);
if (err < 0)
- goto err;
+ goto err3;
- register_netdevice_notifier(&nf_tables_flowtable_notifier);
+ /* must be last */
+ err = nfnetlink_subsys_register(&nf_tables_subsys);
+ if (err < 0)
+ goto err4;
- return register_pernet_subsys(&nf_tables_net_ops);
-err:
+ return err;
+err4:
+ unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
+err3:
nf_tables_core_module_exit();
+err2:
+ nft_chain_filter_fini();
+err1:
+ unregister_pernet_subsys(&nf_tables_net_ops);
return err;
}
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index a0e5adf0b3b6..8fa8bf7c48e6 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -238,29 +238,33 @@ static const struct nla_policy filter_policy[NFACCT_FILTER_MAX + 1] = {
[NFACCT_FILTER_VALUE] = { .type = NLA_U32 },
};
-static struct nfacct_filter *
-nfacct_filter_alloc(const struct nlattr * const attr)
+static int nfnl_acct_start(struct netlink_callback *cb)
{
- struct nfacct_filter *filter;
+ const struct nlattr *const attr = cb->data;
struct nlattr *tb[NFACCT_FILTER_MAX + 1];
+ struct nfacct_filter *filter;
int err;
+ if (!attr)
+ return 0;
+
err = nla_parse_nested(tb, NFACCT_FILTER_MAX, attr, filter_policy,
NULL);
if (err < 0)
- return ERR_PTR(err);
+ return err;
if (!tb[NFACCT_FILTER_MASK] || !tb[NFACCT_FILTER_VALUE])
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
filter = kzalloc(sizeof(struct nfacct_filter), GFP_KERNEL);
if (!filter)
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
filter->mask = ntohl(nla_get_be32(tb[NFACCT_FILTER_MASK]));
filter->value = ntohl(nla_get_be32(tb[NFACCT_FILTER_VALUE]));
+ cb->data = filter;
- return filter;
+ return 0;
}
static int nfnl_acct_get(struct net *net, struct sock *nfnl,
@@ -275,18 +279,11 @@ static int nfnl_acct_get(struct net *net, struct sock *nfnl,
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
.dump = nfnl_acct_dump,
+ .start = nfnl_acct_start,
.done = nfnl_acct_done,
+ .data = (void *)tb[NFACCT_FILTER],
};
- if (tb[NFACCT_FILTER]) {
- struct nfacct_filter *filter;
-
- filter = nfacct_filter_alloc(tb[NFACCT_FILTER]);
- if (IS_ERR(filter))
- return PTR_ERR(filter);
-
- c.data = filter;
- }
return netlink_dump_start(nfnl, skb, nlh, &c);
}
diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c
index ea5b7c4944f6..3fd540b2c6ba 100644
--- a/net/netfilter/nft_chain_filter.c
+++ b/net/netfilter/nft_chain_filter.c
@@ -293,6 +293,13 @@ static void nft_netdev_event(unsigned long event, struct net_device *dev,
if (strcmp(basechain->dev_name, dev->name) != 0)
return;
+ /* UNREGISTER events are also happpening on netns exit.
+ *
+ * Altough nf_tables core releases all tables/chains, only
+ * this event handler provides guarantee that
+ * basechain.ops->dev is still accessible, so we cannot
+ * skip exiting net namespaces.
+ */
__nft_release_basechain(ctx);
break;
case NETDEV_CHANGENAME:
@@ -318,10 +325,6 @@ static int nf_tables_netdev_event(struct notifier_block *this,
event != NETDEV_CHANGENAME)
return NOTIFY_DONE;
- ctx.net = maybe_get_net(ctx.net);
- if (!ctx.net)
- return NOTIFY_DONE;
-
mutex_lock(&ctx.net->nft.commit_mutex);
list_for_each_entry(table, &ctx.net->nft.tables, list) {
if (table->family != NFPROTO_NETDEV)
@@ -338,7 +341,6 @@ static int nf_tables_netdev_event(struct notifier_block *this,
}
}
mutex_unlock(&ctx.net->nft.commit_mutex);
- put_net(ctx.net);
return NOTIFY_DONE;
}
@@ -392,7 +394,7 @@ int __init nft_chain_filter_init(void)
return 0;
}
-void __exit nft_chain_filter_fini(void)
+void nft_chain_filter_fini(void)
{
nft_chain_filter_bridge_fini();
nft_chain_filter_inet_fini();
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 4855d4ce1c8f..26a8baebd072 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -832,12 +832,13 @@ static int nft_ct_timeout_obj_init(const struct nft_ctx *ctx,
__u8 l4num;
int ret;
- if (!tb[NFTA_CT_TIMEOUT_L3PROTO] ||
- !tb[NFTA_CT_TIMEOUT_L4PROTO] ||
+ if (!tb[NFTA_CT_TIMEOUT_L4PROTO] ||
!tb[NFTA_CT_TIMEOUT_DATA])
return -EINVAL;
- l3num = ntohs(nla_get_be16(tb[NFTA_CT_TIMEOUT_L3PROTO]));
+ if (tb[NFTA_CT_TIMEOUT_L3PROTO])
+ l3num = ntohs(nla_get_be16(tb[NFTA_CT_TIMEOUT_L3PROTO]));
+
l4num = nla_get_u8(tb[NFTA_CT_TIMEOUT_L4PROTO]);
priv->l4proto = l4num;
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 81184c244d1a..6e91a37d57f2 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -187,8 +187,6 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
if (tb[NFTA_DYNSET_EXPR] != NULL) {
if (!(set->flags & NFT_SET_EVAL))
return -EINVAL;
- if (!nft_set_is_anonymous(set))
- return -EOPNOTSUPP;
priv->expr = nft_expr_init(ctx, tb[NFTA_DYNSET_EXPR]);
if (IS_ERR(priv->expr))
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index 128bc16f52dd..f866bd41e5d2 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -248,13 +248,13 @@ static inline u32 nft_bitmap_size(u32 klen)
return ((2 << ((klen * BITS_PER_BYTE) - 1)) / BITS_PER_BYTE) << 1;
}
-static inline u32 nft_bitmap_total_size(u32 klen)
+static inline u64 nft_bitmap_total_size(u32 klen)
{
return sizeof(struct nft_bitmap) + nft_bitmap_size(klen);
}
-static unsigned int nft_bitmap_privsize(const struct nlattr * const nla[],
- const struct nft_set_desc *desc)
+static u64 nft_bitmap_privsize(const struct nlattr * const nla[],
+ const struct nft_set_desc *desc)
{
u32 klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN]));
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 90c3e7e6cacb..015124e649cb 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -341,8 +341,8 @@ schedule:
nft_set_gc_interval(set));
}
-static unsigned int nft_rhash_privsize(const struct nlattr * const nla[],
- const struct nft_set_desc *desc)
+static u64 nft_rhash_privsize(const struct nlattr * const nla[],
+ const struct nft_set_desc *desc)
{
return sizeof(struct nft_rhash);
}
@@ -585,8 +585,8 @@ cont:
}
}
-static unsigned int nft_hash_privsize(const struct nlattr * const nla[],
- const struct nft_set_desc *desc)
+static u64 nft_hash_privsize(const struct nlattr * const nla[],
+ const struct nft_set_desc *desc)
{
return sizeof(struct nft_hash) +
nft_hash_buckets(desc->size) * sizeof(struct hlist_head);
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 9873d734b494..55e2d9215c0d 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -411,8 +411,8 @@ static void nft_rbtree_gc(struct work_struct *work)
nft_set_gc_interval(set));
}
-static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[],
- const struct nft_set_desc *desc)
+static u64 nft_rbtree_privsize(const struct nlattr * const nla[],
+ const struct nft_set_desc *desc)
{
return sizeof(struct nft_rbtree);
}
diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c
index eff99dffc842..f92a82c73880 100644
--- a/net/netfilter/nft_tproxy.c
+++ b/net/netfilter/nft_tproxy.c
@@ -82,13 +82,15 @@ static void nft_tproxy_eval_v6(const struc