From 72d4d3e3980702809509586d36015b7c3c51fad4 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sat, 21 Apr 2018 13:43:48 +0200 Subject: netfilter: Fix handling simultaneous open in TCP conntrack Dominique Martinet reported a TCP hang problem when simultaneous open was used. The problem is that the tcp_conntracks state table is not smart enough to handle the case. The state table could be fixed by introducing a new state, but that would require more lines of code compared to this patch, due to the required backward compatibility with ctnetlink. Signed-off-by: Jozsef Kadlecsik Reported-by: Dominique Martinet Tested-by: Dominique Martinet Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto_tcp.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index e97cdc1cf98c..8e67910185a0 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -981,6 +981,17 @@ static int tcp_packet(struct nf_conn *ct, return NF_ACCEPT; /* Don't change state */ } break; + case TCP_CONNTRACK_SYN_SENT2: + /* tcp_conntracks table is not smart enough to handle + * simultaneous open. + */ + ct->proto.tcp.last_flags |= IP_CT_TCP_SIMULTANEOUS_OPEN; + break; + case TCP_CONNTRACK_SYN_RECV: + if (dir == IP_CT_DIR_REPLY && index == TCP_ACK_SET && + ct->proto.tcp.last_flags & IP_CT_TCP_SIMULTANEOUS_OPEN) + new_state = TCP_CONNTRACK_ESTABLISHED; + break; case TCP_CONNTRACK_CLOSE: if (index == TCP_RST_SET && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET) -- cgit v1.2.3 From dceb48d86b4871984b8ce9ad5057fb2c01aa33de Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 25 Apr 2018 13:38:47 +0200 Subject: netfilter: x_tables: check name length in find_match/target, too ebtables uses find_match() rather than find_request_match in one case (see bcf4934288402be3464110109a4dae3bd6fb3e93, "netfilter: ebtables: Fix extension lookup with identical name"), so extend the check on name length to those functions too. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/x_tables.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 71325fef647d..cb7cb300c3bc 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -183,6 +183,9 @@ struct xt_match *xt_find_match(u8 af, const char *name, u8 revision) struct xt_match *m; int err = -ENOENT; + if (strnlen(name, XT_EXTENSION_MAXNAMELEN) == XT_EXTENSION_MAXNAMELEN) + return ERR_PTR(-EINVAL); + mutex_lock(&xt[af].mutex); list_for_each_entry(m, &xt[af].match, list) { if (strcmp(m->name, name) == 0) { @@ -229,6 +232,9 @@ struct xt_target *xt_find_target(u8 af, const char *name, u8 revision) struct xt_target *t; int err = -ENOENT; + if (strnlen(name, XT_EXTENSION_MAXNAMELEN) == XT_EXTENSION_MAXNAMELEN) + return ERR_PTR(-EINVAL); + mutex_lock(&xt[af].mutex); list_for_each_entry(t, &xt[af].target, list) { if (strcmp(t->name, name) == 0) { -- cgit v1.2.3 From 2f99aa31cd7a5d667f17dd2924c884c3d2c621ac Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 25 Apr 2018 15:11:07 +0200 Subject: netfilter: nf_tables: skip synchronize_rcu if transaction log is empty After processing the transaction log, the remaining entries of the log need to be released. However, in some cases no entries remain, e.g. because the transaction did not remove anything. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 04d4e3772584..785d7fcf1fe1 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5761,7 +5761,7 @@ static void nft_chain_commit_update(struct nft_trans *trans) } } -static void nf_tables_commit_release(struct nft_trans *trans) +static void nft_commit_release(struct nft_trans *trans) { switch (trans->msg_type) { case NFT_MSG_DELTABLE: @@ -5790,6 +5790,21 @@ static void nf_tables_commit_release(struct nft_trans *trans) kfree(trans); } +static void nf_tables_commit_release(struct net *net) +{ + struct nft_trans *trans, *next; + + if (list_empty(&net->nft.commit_list)) + return; + + synchronize_rcu(); + + list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { + list_del(&trans->list); + nft_commit_release(trans); + } +} + static int nf_tables_commit(struct net *net, struct sk_buff *skb) { struct nft_trans *trans, *next; @@ -5920,13 +5935,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) } } - synchronize_rcu(); - - list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { - list_del(&trans->list); - nf_tables_commit_release(trans); - } - + nf_tables_commit_release(net); nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN); return 0; -- cgit v1.2.3 From a4995684a949cc1d28fbf09900c47c34b9427ecf Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 27 Apr 2018 11:16:09 -0700 Subject: netfilter: bridge: stp fix reference to uninitialized data The destination mac (destmac) is only valid if EBT_DESTMAC flag is set. Fix by changing the order of the comparison to look for the flag first. Reported-by: syzbot+5c06e318fc558cc27823@syzkaller.appspotmail.com Signed-off-by: Stephen Hemminger Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebt_stp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c index 47ba98db145d..46c1fe7637ea 100644 --- a/net/bridge/netfilter/ebt_stp.c +++ b/net/bridge/netfilter/ebt_stp.c @@ -161,8 +161,8 @@ static int ebt_stp_mt_check(const struct xt_mtchk_param *par) /* Make sure the match only receives stp frames */ if (!par->nft_compat && (!ether_addr_equal(e->destmac, eth_stp_addr) || - !is_broadcast_ether_addr(e->destmsk) || - !(e->bitmask & EBT_DESTMAC))) + !(e->bitmask & EBT_DESTMAC) || + !is_broadcast_ether_addr(e->destmsk))) return -EINVAL; return 0; -- cgit v1.2.3 From b8e9dc1c75714ceb53615743e1036f76e00f5a17 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 2 May 2018 14:07:42 +0200 Subject: netfilter: nf_tables: nft_compat: fix refcount leak on xt module Taehee Yoo reported following bug: iptables-compat -I OUTPUT -m cpu --cpu 0 iptables-compat -F lsmod |grep xt_cpu xt_cpu 16384 1 Quote: "When above command is given, a netlink message has two expressions that are the cpu compat and the nft_counter. The nft_expr_type_get() in the nf_tables_expr_parse() successes first expression then, calls select_ops callback. (allocates memory and holds module) But, second nft_expr_type_get() in the nf_tables_expr_parse() returns -EAGAIN because of request_module(). In that point, by the 'goto err1', the 'module_put(info[i].ops->type->owner)' is called. There is no release routine." The core problem is that unlike all other expression, nft_compat select_ops has side effects. 1. it allocates dynamic memory which holds an nft ops struct. In all other expressions, ops has static storage duration. 2. It grabs references to the xt module that it is supposed to invoke. Depending on where things go wrong, error unwinding doesn't always do the right thing. In the above scenario, a new nft_compat_expr is created and xt_cpu module gets loaded with a refcount of 1. Due to to -EAGAIN, the netlink messages get re-parsed. When that happens, nft_compat finds that xt_cpu is already present and increments module refcount again. This fixes the problem by making select_ops to have no visible side effects and removes all extra module_get/put. When select_ops creates a new nft_compat expression, the new expression has a refcount of 0, and the xt module gets its refcount incremented. When error happens, the next call finds existing entry, but will no longer increase the reference count -- the presence of existing nft_xt means we already hold a module reference. Because nft_xt_put is only called from nft_compat destroy hook, it will never see the initial zero reference count. ->destroy can only be called after ->init(), and that will increase the refcount. Lastly, we now free nft_xt struct with kfree_rcu. Else, we get use-after free in nf_tables_rule_destroy: while (expr != nft_expr_last(rule) && expr->ops) { nf_tables_expr_destroy(ctx, expr); expr = nft_expr_next(expr); // here nft_expr_next() dereferences expr->ops. This is safe for all users, as ops have static storage duration. In nft_compat case however, its ->destroy callback can free the memory that hold the ops structure. Tested-by: Taehee Yoo Reported-by: Taehee Yoo Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_compat.c | 92 +++++++++++++++++++++++++++++----------------- 1 file changed, 58 insertions(+), 34 deletions(-) (limited to 'net') diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 8e23726b9081..870d8c29dae9 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -27,14 +27,24 @@ struct nft_xt { struct list_head head; struct nft_expr_ops ops; unsigned int refcnt; + + /* Unlike other expressions, ops doesn't have static storage duration. + * nft core assumes they do. We use kfree_rcu so that nft core can + * can check expr->ops->size even after nft_compat->destroy() frees + * the nft_xt struct that holds the ops structure. + */ + struct rcu_head rcu_head; }; -static void nft_xt_put(struct nft_xt *xt) +static bool nft_xt_put(struct nft_xt *xt) { if (--xt->refcnt == 0) { list_del(&xt->head); - kfree(xt); + kfree_rcu(xt, rcu_head); + return true; } + + return false; } static int nft_compat_chain_validate_dependency(const char *tablename, @@ -226,6 +236,7 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, struct xt_target *target = expr->ops->data; struct xt_tgchk_param par; size_t size = XT_ALIGN(nla_len(tb[NFTA_TARGET_INFO])); + struct nft_xt *nft_xt; u16 proto = 0; bool inv = false; union nft_entry e = {}; @@ -236,25 +247,22 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, if (ctx->nla[NFTA_RULE_COMPAT]) { ret = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &proto, &inv); if (ret < 0) - goto err; + return ret; } nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv); ret = xt_check_target(&par, size, proto, inv); if (ret < 0) - goto err; + return ret; /* The standard target cannot be used */ - if (target->target == NULL) { - ret = -EINVAL; - goto err; - } + if (!target->target) + return -EINVAL; + nft_xt = container_of(expr->ops, struct nft_xt, ops); + nft_xt->refcnt++; return 0; -err: - module_put(target->me); - return ret; } static void @@ -271,8 +279,8 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) if (par.target->destroy != NULL) par.target->destroy(&par); - nft_xt_put(container_of(expr->ops, struct nft_xt, ops)); - module_put(target->me); + if (nft_xt_put(container_of(expr->ops, struct nft_xt, ops))) + module_put(target->me); } static int nft_target_dump(struct sk_buff *skb, const struct nft_expr *expr) @@ -411,6 +419,7 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, struct xt_match *match = expr->ops->data; struct xt_mtchk_param par; size_t size = XT_ALIGN(nla_len(tb[NFTA_MATCH_INFO])); + struct nft_xt *nft_xt; u16 proto = 0; bool inv = false; union nft_entry e = {}; @@ -421,19 +430,18 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, if (ctx->nla[NFTA_RULE_COMPAT]) { ret = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &proto, &inv); if (ret < 0) - goto err; + return ret; } nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv); ret = xt_check_match(&par, size, proto, inv); if (ret < 0) - goto err; + return ret; + nft_xt = container_of(expr->ops, struct nft_xt, ops); + nft_xt->refcnt++; return 0; -err: - module_put(match->me); - return ret; } static void @@ -450,8 +458,8 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) if (par.match->destroy != NULL) par.match->destroy(&par); - nft_xt_put(container_of(expr->ops, struct nft_xt, ops)); - module_put(match->me); + if (nft_xt_put(container_of(expr->ops, struct nft_xt, ops))) + module_put(match->me); } static int nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr) @@ -654,13 +662,8 @@ nft_match_select_ops(const struct nft_ctx *ctx, list_for_each_entry(nft_match, &nft_match_list, head) { struct xt_match *match = nft_match->ops.data; - if (nft_match_cmp(match, mt_name, rev, family)) { - if (!try_module_get(match->me)) - return ERR_PTR(-ENOENT); - - nft_match->refcnt++; + if (nft_match_cmp(match, mt_name, rev, family)) return &nft_match->ops; - } } match = xt_request_find_match(family, mt_name, rev); @@ -679,7 +682,7 @@ nft_match_select_ops(const struct nft_ctx *ctx, goto err; } - nft_match->refcnt = 1; + nft_match->refcnt = 0; nft_match->ops.type = &nft_match_type; nft_match->ops.size = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize)); nft_match->ops.eval = nft_match_eval; @@ -739,13 +742,8 @@ nft_target_select_ops(const struct nft_ctx *ctx, list_for_each_entry(nft_target, &nft_target_list, head) { struct xt_target *target = nft_target->ops.data; - if (nft_target_cmp(target, tg_name, rev, family)) { - if (!try_module_get(target->me)) - return ERR_PTR(-ENOENT); - - nft_target->refcnt++; + if (nft_target_cmp(target, tg_name, rev, family)) return &nft_target->ops; - } } target = xt_request_find_target(family, tg_name, rev); @@ -764,7 +762,7 @@ nft_target_select_ops(const struct nft_ctx *ctx, goto err; } - nft_target->refcnt = 1; + nft_target->refcnt = 0; nft_target->ops.type = &nft_target_type; nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize)); nft_target->ops.init = nft_target_init; @@ -823,6 +821,32 @@ err_match: static void __exit nft_compat_module_exit(void) { + struct nft_xt *xt, *next; + + /* list should be empty here, it can be non-empty only in case there + * was an error that caused nft_xt expr to not be initialized fully + * and noone else requested the same expression later. + * + * In this case, the lists contain 0-refcount entries that still + * hold module reference. + */ + list_for_each_entry_safe(xt, next, &nft_target_list, head) { + struct xt_target *target = xt->ops.data; + + if (WARN_ON_ONCE(xt->refcnt)) + continue; + module_put(target->me); + kfree(xt); + } + + list_for_each_entry_safe(xt, next, &nft_match_list, head) { + struct xt_match *match = xt->ops.data; + + if (WARN_ON_ONCE(xt->refcnt)) + continue; + module_put(match->me); + kfree(xt); + } nfnetlink_subsys_unregister(&nfnl_compat_subsys); nft_unregister_expr(&nft_target_type); nft_unregister_expr(&nft_match_type); -- cgit v1.2.3 From a050d345cef0dc6249263540da1e902bba617e43 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 3 May 2018 22:01:40 +0300 Subject: ipvs: fix refcount usage for conns in ops mode Connections in One-packet scheduling mode (-o, --ops) are removed with refcnt=0 because they are not hashed in conn table. To avoid refcount_dec reporting this as error, change them to be removed with refcount_dec_if_one as all other connections. refcount_t hit zero at ip_vs_conn_put+0x31/0x40 [ip_vs] in sh[15519], uid/euid: 497/497 WARNING: CPU: 0 PID: 15519 at ../kernel/panic.c:657 refcount_error_report+0x94/0x9e Modules linked in: ip_vs_rr cirrus ttm sb_edac edac_core drm_kms_helper crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc mousedev drm aesni_intel aes_x86_64 crypto_simd glue_helper cryptd psmouse evdev input_leds led_class intel_agp fb_sys_fops syscopyarea sysfillrect intel_rapl_perf mac_hid intel_gtt serio_raw sysimgblt agpgart i2c_piix4 i2c_core ata_generic pata_acpi floppy cfg80211 rfkill button loop macvlan ip_vs nf_conntrack libcrc32c crc32c_generic ip_tables x_tables ipv6 crc_ccitt autofs4 ext4 crc16 mbcache jbd2 fscrypto ata_piix libata atkbd libps2 scsi_mod crc32c_intel i8042 rtc_cmos serio af_packet dm_mod dax fuse xen_netfront xen_blkfront CPU: 0 PID: 15519 Comm: sh Tainted: G W 4.15.17 #1-NixOS Hardware name: Xen HVM domU, BIOS 4.2.amazon 08/24/2006 RIP: 0010:refcount_error_report+0x94/0x9e RSP: 0000:ffffa344dde039c8 EFLAGS: 00010296 RAX: 0000000000000057 RBX: ffffffff92f20e06 RCX: 0000000000000006 RDX: 0000000000000007 RSI: 0000000000000086 RDI: ffffa344dde165c0 RBP: ffffa344dde03b08 R08: 0000000000000218 R09: 0000000000000004 R10: ffffffff93006a80 R11: 0000000000000001 R12: ffffa344d68cd100 R13: 00000000000001f1 R14: ffffffff92f12fb0 R15: 0000000000000004 FS: 00007fc9d2040fc0(0000) GS:ffffa344dde00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000262a000 CR3: 0000000016a0c004 CR4: 00000000001606f0 Call Trace: ex_handler_refcount+0x4e/0x80 fixup_exception+0x33/0x40 do_trap+0x83/0x140 do_error_trap+0x83/0xf0 ? ip_vs_conn_drop_conntrack+0x120/0x1a5 [ip_vs] ? ip_finish_output2+0x29c/0x390 ? ip_finish_output2+0x1a2/0x390 invalid_op+0x1b/0x40 RIP: 0010:ip_vs_conn_put+0x31/0x40 [ip_vs] RSP: 0000:ffffa344dde03bb8 EFLAGS: 00010246 RAX: 0000000000000001 RBX: ffffa344df31cf00 RCX: ffffa344d7450198 RDX: 0000000000000003 RSI: 00000000fffffe01 RDI: ffffa344d7450140 RBP: 0000000000000002 R08: 0000000000000476 R09: 0000000000000000 R10: ffffa344dde03b28 R11: ffffa344df200000 R12: ffffa344d7d09000 R13: ffffa344def3a980 R14: ffffffffc04f6e20 R15: 0000000000000008 ip_vs_in.part.29.constprop.36+0x34f/0x640 [ip_vs] ? ip_vs_conn_out_get+0xe0/0xe0 [ip_vs] ip_vs_remote_request4+0x47/0xa0 [ip_vs] ? ip_vs_in.part.29.constprop.36+0x640/0x640 [ip_vs] nf_hook_slow+0x43/0xc0 ip_local_deliver+0xac/0xc0 ? ip_rcv_finish+0x400/0x400 ip_rcv+0x26c/0x380 __netif_receive_skb_core+0x3a0/0xb10 ? inet_gro_receive+0x23c/0x2b0 ? netif_receive_skb_internal+0x24/0xb0 netif_receive_skb_internal+0x24/0xb0 napi_gro_receive+0xb8/0xe0 xennet_poll+0x676/0xb40 [xen_netfront] net_rx_action+0x139/0x3a0 __do_softirq+0xde/0x2b4 irq_exit+0xae/0xb0 xen_evtchn_do_upcall+0x2c/0x40 xen_hvm_callback_vector+0x7d/0x90 RIP: 0033:0x7fc9d11c91f9 RSP: 002b:00007ffebe8a2ea0 EFLAGS: 00000202 ORIG_RAX: ffffffffffffff0c RAX: 00000000ffffffff RBX: 0000000002609808 RCX: 0000000000000054 RDX: 0000000000000001 RSI: 0000000002605440 RDI: 00000000025f940e RBP: 00000000025f940e R08: 000000000260213d R09: 1999999999999999 R10: 000000000262a808 R11: 00000000025f942d R12: 00000000025f940e R13: 00007fc9d1301e20 R14: 00000000025f9408 R15: 00007fc9d1302720 Code: 48 8b 95 80 00 00 00 41 55 49 8d 8c 24 e0 05 00 00 45 8b 84 24 38 04 00 00 41 89 c1 48 89 de 48 c7 c7 a8 2f f2 92 e8 7c fa ff ff <0f> 0b 58 5b 5d 41 5c 41 5d c3 0f 1f 44 00 00 55 48 89 e5 41 56 Reported-by: Net Filter Fixes: b54ab92b84b6 ("netfilter: refcounter conversions") Signed-off-by: Julian Anastasov Acked-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_conn.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 370abbf6f421..75de46576f51 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -232,7 +232,10 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp) static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp) { unsigned int hash; - bool ret; + bool ret = false; + + if (cp->flags & IP_VS_CONN_F_ONE_PACKET) + return refcount_dec_if_one(&cp->refcnt); hash = ip_vs_conn_hashkey_conn(cp); @@ -240,15 +243,13 @@ static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp) spin_lock(&cp->lock); if (cp->flags & IP_VS_CONN_F_HASHED) { - ret = false; /* Decrease refcnt and unlink conn only if we are last user */ if (refcount_dec_if_one(&cp->refcnt)) { hlist_del_rcu(&cp->c_list); cp->flags &= ~IP_VS_CONN_F_HASHED; ret = true; } - } else - ret = refcount_read(&cp->refcnt) ? false : true; + } spin_unlock(&cp->lock); ct_write_unlock_bh(hash); @@ -454,12 +455,6 @@ ip_vs_conn_out_get_proto(struct netns_ipvs *ipvs, int af, } EXPORT_SYMBOL_GPL(ip_vs_conn_out_get_proto); -static void __ip_vs_conn_put_notimer(struct ip_vs_conn *cp) -{ - __ip_vs_conn_put(cp); - ip_vs_conn_expire(&cp->timer); -} - /* * Put back the conn and restart its timer with its timeout */ @@ -478,7 +473,7 @@ void ip_vs_conn_put(struct ip_vs_conn *cp) (refcount_read(&cp->refcnt) == 1) && !timer_pending(&cp->timer)) /* expire connection immediately */ - __ip_vs_conn_put_notimer(cp); + ip_vs_conn_expire(&cp->timer); else __ip_vs_conn_put_timer(cp); } -- cgit v1.2.3 From d5e032fc5697b6c0d6b4958bcacb981a08f8174e Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 3 May 2018 22:02:18 +0300 Subject: ipvs: fix stats update from local clients Local clients are not properly synchronized on 32-bit CPUs when updating stats (3.10+). Now it is possible estimation_timer (timer), a stats reader, to interrupt the local client in the middle of write_seqcount_{begin,end} sequence leading to loop (DEADLOCK). The same interrupt can happen from received packet (SoftIRQ) which updates the same per-CPU stats. Fix it by disabling BH while updating stats. Found with debug: WARNING: inconsistent lock state 4.17.0-rc2-00105-g35cb6d7-dirty #2 Not tainted -------------------------------- inconsistent {IN-SOFTIRQ-R} -> {SOFTIRQ-ON-W} usage. ftp/2545 [HC0[0]:SC0[0]:HE1:SE1] takes: 86845479 (&syncp->seq#6){+.+-}, at: ip_vs_schedule+0x1c5/0x59e [ip_vs] {IN-SOFTIRQ-R} state was registered at: lock_acquire+0x44/0x5b estimation_timer+0x1b3/0x341 [ip_vs] call_timer_fn+0x54/0xcd run_timer_softirq+0x10c/0x12b __do_softirq+0xc1/0x1a9 do_softirq_own_stack+0x1d/0x23 irq_exit+0x4a/0x64 smp_apic_timer_interrupt+0x63/0x71 apic_timer_interrupt+0x3a/0x40 default_idle+0xa/0xc arch_cpu_idle+0x9/0xb default_idle_call+0x21/0x23 do_idle+0xa0/0x167 cpu_startup_entry+0x19/0x1b start_secondary+0x133/0x182 startup_32_smp+0x164/0x168 irq event stamp: 42213 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&syncp->seq#6); lock(&syncp->seq#6); *** DEADLOCK *** Fixes: ac69269a45e8 ("ipvs: do not disable bh for long time") Signed-off-by: Julian Anastasov Acked-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_core.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 5f6f73cf2174..0679dd101e72 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -119,6 +119,8 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) struct ip_vs_cpu_stats *s; struct ip_vs_service *svc; + local_bh_disable(); + s = this_cpu_ptr(dest->stats.cpustats); u64_stats_update_begin(&s->syncp); s->cnt.inpkts++; @@ -137,6 +139,8 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) s->cnt.inpkts++; s->cnt.inbytes += skb->len; u64_stats_update_end(&s->syncp); + + local_bh_enable(); } } @@ -151,6 +155,8 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) struct ip_vs_cpu_stats *s; struct ip_vs_service *svc; + local_bh_disable(); + s = this_cpu_ptr(dest->stats.cpustats); u64_stats_update_begin(&s->syncp); s->cnt.outpkts++; @@ -169,6 +175,8 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) s->cnt.outpkts++; s->cnt.outbytes += skb->len; u64_stats_update_end(&s->syncp); + + local_bh_enable(); } } @@ -179,6 +187,8 @@ ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc) struct netns_ipvs *ipvs = svc->ipvs; struct ip_vs_cpu_stats *s; + local_bh_disable(); + s = this_cpu_ptr(cp->dest->stats.cpustats); u64_stats_update_begin(&s->syncp); s->cnt.conns++; @@ -193,6 +203,8 @@ ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc) u64_stats_update_begin(&s->syncp); s->cnt.conns++; u64_stats_update_end(&s->syncp); + + local_bh_enable(); } -- cgit v1.2.3 From 25fd386e0bc065849db7400f579e82863ea44838 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 4 May 2018 18:16:06 +0200 Subject: netfilter: core: add missing __rcu annotation removes following sparse error: net/netfilter/core.c:598:30: warning: incorrect type in argument 1 (different address spaces) net/netfilter/core.c:598:30: expected struct nf_hook_entries **e net/netfilter/core.c:598:30: got struct nf_hook_entries [noderef] ** Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 0f6b8172fb9a..206fb2c4c319 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -585,7 +585,8 @@ void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *); EXPORT_SYMBOL(nf_nat_decode_session_hook); #endif -static void __net_init __netfilter_net_init(struct nf_hook_entries **e, int max) +static void __net_init +__netfilter_net_init(struct nf_hook_entries __rcu **e, int max) { int h; -- cgit v1.2.3 From 4e09fc873d92398001e267f7b60c36c963f825b3 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 6 May 2018 00:45:43 +0200 Subject: netfilter: prefer nla_strlcpy for dealing with NLA_STRING attributes fixes these warnings: 'nfnl_cthelper_create' at net/netfilter/nfnetlink_cthelper.c:237:2, 'nfnl_cthelper_new' at net/netfilter/nfnetlink_cthelper.c:450:9: ./include/linux/string.h:246:9: warning: '__builtin_strncpy' specified bound 16 equals destination size [-Wstringop-truncation] return __builtin_strncpy(p, q, size); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Moreover, strncpy assumes null-terminated source buffers, but thats not the case here. Unlike strlcpy, nla_strlcpy *does* pad the destination buffer while also considering nla attribute size. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_acct.c | 2 +- net/netfilter/nfnetlink_cthelper.c | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index b9505bcd3827..6ddf89183e7b 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -115,7 +115,7 @@ static int nfnl_acct_new(struct net *net, struct sock *nfnl, nfacct->flags = flags; } - strncpy(nfacct->name, nla_data(tb[NFACCT_NAME]), NFACCT_NAME_MAX); + nla_strlcpy(nfacct->name, nla_data(tb[NFACCT_NAME]), NFACCT_NAME_MAX); if (tb[NFACCT_BYTES]) { atomic64_set(&nfacct->bytes, diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index 4a4b293fb2e5..fa026b269b36 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -149,8 +149,8 @@ nfnl_cthelper_expect_policy(struct nf_conntrack_expect_policy *expect_policy, !tb[NFCTH_POLICY_EXPECT_TIMEOUT]) return -EINVAL; - strncpy(expect_policy->name, - nla_data(tb[NFCTH_POLICY_NAME]), NF_CT_HELPER_NAME_LEN); + nla_strlcpy(expect_policy->name, + nla_data(tb[NFCTH_POLICY_NAME]), NF_CT_HELPER_NAME_LEN); expect_policy->max_expected = ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_MAX])); if (expect_policy->max_expected > NF_CT_EXPECT_MAX_CNT) @@ -234,7 +234,8 @@ nfnl_cthelper_create(const struct nlattr * const tb[], if (ret < 0) goto err1; - strncpy(helper->name, nla_data(tb[NFCTH_NAME]), NF_CT_HELPER_NAME_LEN); + nla_strlcpy(helper->name, + nla_data(tb[NFCTH_NAME]), NF_CT_HELPER_NAME_LEN); size = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN])); if (size > FIELD_SIZEOF(struct nf_conn_help, data)) { ret = -ENOMEM; -- cgit v1.2.3 From a44f6d82a471aa52fe218e43105fbe3c458fc5a6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 6 May 2018 00:46:16 +0200 Subject: netfilter: x_tables: add module alias for icmp matches The icmp matches are implemented in ip_tables and ip6_tables, respectively, so for normal iptables they are always available: those modules are loaded once iptables calls getsockopt() to fetch available module revisions. In iptables-over-nftables case probing occurs via nfnetlink, so these modules might not be loaded. Add aliases so modprobe can load these when icmp/icmp6 is requested. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ip_tables.c | 1 + net/ipv6/netfilter/ip6_tables.c | 1 + 2 files changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 44b308d93ec2..e85f35b89c49 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -34,6 +34,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team "); MODULE_DESCRIPTION("IPv4 packet filter"); +MODULE_ALIAS("ipt_icmp"); void *ipt_alloc_initial_table(const struct xt_table *info) { diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 65c9e1a58305..97f79dc943d7 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -38,6 +38,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team "); MODULE_DESCRIPTION("IPv6 packet filter"); +MODULE_ALIAS("ip6t_icmp6"); void *ip6t_alloc_initial_table(const struct xt_table *info) { -- cgit v1.2.3 From 009240940e84c1c089af88b454f7e804a4c5bd1b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 6 May 2018 00:47:20 +0200 Subject: netfilter: nf_tables: don't assume chain stats are set when jumplabel is set nft_chain_stats_replace() and all other spots assume ->stats can be NULL, but nft_update_chain_stats does not. It must do this check, just because the jump label is set doesn't mean all basechains have stats assigned. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_core.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index dfd0bf3810d2..942702a2776f 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -119,15 +119,22 @@ DEFINE_STATIC_KEY_FALSE(nft_counters_enabled); static noinline void nft_update_chain_stats(const struct nft_chain *chain, const struct nft_pktinfo *pkt) { + struct nft_base_chain *base_chain; struct nft_stats *stats; - local_bh_disable(); - stats = this_cpu_ptr(rcu_dereference(nft_base_chain(chain)->stats)); - u64_stats_update_begin(&stats->syncp); - stats->pkts++; - stats->bytes += pkt->skb->len; - u64_stats_update_end(&stats->syncp); - local_bh_enable(); + base_chain = nft_base_chain(chain); + if (!base_chain->stats) + return; + + stats = this_cpu_ptr(rcu_dereference(base_chain->stats)); + if (stats) { + local_bh_disable(); + u64_stats_update_begin(&stats->syncp); + stats->pkts++; + stats->bytes += pkt->skb->len; + u64_stats_update_end(&stats->syncp); + local_bh_enable(); + } } struct nft_jumpstack { -- cgit v1.2.3 From 8bdf164744b2c7f63561846c01cff3db597f282d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 7 May 2018 15:22:35 +0200 Subject: netfilter: nft_compat: prepare for indirect info storage Next patch will make it possible for *info to be stored in a separate allocation instead of the expr private area. This removes the 'expr priv area is info blob' assumption from the match init/destroy/eval functions. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_compat.c | 47 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 870d8c29dae9..dec0afb0ffe0 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -324,11 +324,11 @@ static int nft_target_validate(const struct nft_ctx *ctx, return 0; } -static void nft_match_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +static void __nft_match_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt, + void *info) { - void *info = nft_expr_priv(expr); struct xt_match *match = expr->ops->data; struct sk_buff *skb = pkt->skb; bool ret; @@ -352,6 +352,13 @@ static void nft_match_eval(const struct nft_expr *expr, } } +static void nft_match_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + __nft_match_eval(expr, regs, pkt, nft_expr_priv(expr)); +} + static const struct nla_policy nft_match_policy[NFTA_MATCH_MAX + 1] = { [NFTA_MATCH_NAME] = { .type = NLA_NUL_STRING }, [NFTA_MATCH_REV] = { .type = NLA_U32 }, @@ -412,10 +419,10 @@ static void match_compat_from_user(struct xt_match *m, void *in, void *out) } static int -nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, - const struct nlattr * const tb[]) +__nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[], + void *info) { - void *info = nft_expr_priv(expr); struct xt_match *match = expr->ops->data; struct xt_mtchk_param par; size_t size = XT_ALIGN(nla_len(tb[NFTA_MATCH_INFO])); @@ -444,11 +451,18 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return 0; } +static int +nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + return __nft_match_init(ctx, expr, tb, nft_expr_priv(expr)); +} + static void -nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) +__nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr, + void *info) { struct xt_match *match = expr->ops->data; - void *info = nft_expr_priv(expr); struct xt_mtdtor_param par; par.net = ctx->net; @@ -462,9 +476,15 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) module_put(match->me); } -static int nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr) +static void +nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) +{ + __nft_match_destroy(ctx, expr, nft_expr_priv(expr)); +} + +static int __nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr, + void *info) { - void *info = nft_expr_priv(expr); struct xt_match *match = expr->ops->data; if (nla_put_string(skb, NFTA_MATCH_NAME, match->name) || @@ -478,6 +498,11 @@ nla_put_failure: return -1; } +static int nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + return __nft_match_dump(skb, expr, nft_expr_priv(expr)); +} + static int nft_match_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nft_data **data) -- cgit v1.2.3 From 732a8049f365f514d0607e03938491bf6cb0d620 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 7 May 2018 15:22:36 +0200 Subject: netfilter: nft_compat: fix handling of large matchinfo size currently matchinfo gets stored in the expression, but some xt matches are very large. To handle those we either need to switch nft core to kvmalloc and increase size limit, or allocate the info blob of large matches separately. This does the latter, this limits the scope of the changes to nft_compat. I picked a threshold of 192, this allows most matches to work as before and handle only few ones via separate alloation (cgroup, u32, sctp, rt). Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_compat.c | 64 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 63 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index dec0afb0ffe0..1d99a1efdafc 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -36,6 +36,13 @@ struct nft_xt { struct rcu_head rcu_head; }; +/* Used for matches where *info is larger than X byte */ +#define NFT_MATCH_LARGE_THRESH 192 + +struct nft_xt_match_priv { + void *info; +}; + static bool nft_xt_put(struct nft_xt *xt) { if (--xt->refcnt == 0) { @@ -352,6 +359,15 @@ static void __nft_match_eval(const struct nft_expr *expr, } } +static void nft_match_large_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_xt_match_priv *priv = nft_expr_priv(expr); + + __nft_match_eval(expr, regs, pkt, priv->info); +} + static void nft_match_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -458,6 +474,24 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return __nft_match_init(ctx, expr, tb, nft_expr_priv(expr)); } +static int +nft_match_large_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_xt_match_priv *priv = nft_expr_priv(expr); + struct xt_match *m = expr->ops->data; + int ret; + + priv->info = kmalloc(XT_ALIGN(m->matchsize), GFP_KERNEL); + if (!priv->info) + return -ENOMEM; + + ret = __nft_match_init(ctx, expr, tb, priv->info); + if (ret) + kfree(priv->info); + return ret; +} + static void __nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr, void *info) @@ -482,6 +516,15 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) __nft_match_destroy(ctx, expr, nft_expr_priv(expr)); } +static void +nft_match_large_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) +{ + struct nft_xt_match_priv *priv = nft_expr_priv(expr); + + __nft_match_destroy(ctx, expr, priv->info); + kfree(priv->info); +} + static int __nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr, void *info) { @@ -503,6 +546,13 @@ static int nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr) return __nft_match_dump(skb, expr, nft_expr_priv(expr)); } +static int nft_match_large_dump(struct sk_buff *skb, const struct nft_expr *e) +{ + struct nft_xt_match_priv *priv = nft_expr_priv(e); + + return __nft_match_dump(skb, e, priv->info); +} + static int nft_match_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nft_data **data) @@ -670,6 +720,7 @@ nft_match_select_ops(const struct nft_ctx *ctx, { struct nft_xt *nft_match; struct xt_match *match; + unsigned int matchsize; char *mt_name; u32 rev, family; int err; @@ -709,7 +760,6 @@ nft_match_select_ops(const struct nft_ctx *ctx, nft_match->refcnt = 0; nft_match->ops.type = &nft_match_type; - nft_match->ops.size = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize)); nft_match->ops.eval = nft_match_eval; nft_match->ops.init = nft_match_init; nft_match->ops.destroy = nft_match_destroy; @@ -717,6 +767,18 @@ nft_match_select_ops(const struct nft_ctx *ctx, nft_match->ops.validate = nft_match_validate; nft_match->ops.data = match; + matchsize = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize)); + if (matchsize > NFT_MATCH_LARGE_THRESH) { + matchsize = NFT_EXPR_SIZE(sizeof(struct nft_xt_match_priv)); + + nft_match->ops.eval = nft_match_large_eval; + nft_match->ops.init = nft_match_large_init; + nft_match->ops.destroy = nft_match_large_destroy; + nft_match->ops.dump = nft_match_large_dump; + } + + nft_match->ops.size = matchsize; + list_add(&nft_match->head, &nft_match_list); return &nft_match->ops; -- cgit v1.2.3 From bb7b40aecbf778c0c83a5bd62b0f03ca9f49a618 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 8 May 2018 02:43:57 +0200 Subject: netfilter: nf_tables: bogus EBUSY in chain deletions When removing a rule that jumps to chain and such chain in the same batch, this bogusly hits EBUSY. Add activate and deactivate operations to expression that can be called from the preparation and the commit/abort phases. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 46 +++++++++++++++++++++++++++++++++++++++---- net/netfilter/nft_immediate.c | 15 +++++++++++--- 2 files changed, 54 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 785d7fcf1fe1..3806db31cbbf 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -214,6 +214,34 @@ static int nft_delchain(struct nft_ctx *ctx) return err; } +static void nft_rule_expr_activate(const struct nft_ctx *ctx, + struct nft_rule *rule) +{ + struct nft_expr *expr; + + expr = nft_expr_first(rule); + while (expr != nft_expr_last(rule) && expr->ops) { + if (expr->ops->activate) + expr->ops->activate(ctx, expr); + + expr = nft_expr_next(expr); + } +} + +static void nft_rule_expr_deactivate(const struct nft_ctx *ctx, + struct nft_rule *rule) +{ + struct nft_expr *expr; + + expr = nft_expr_first(rule); + while (expr != nft_expr_last(rule) && expr->ops) { + if (expr->ops->deactivate) + expr->ops->deactivate(ctx, expr); + + expr = nft_expr_next(expr); + } +} + static int nf_tables_delrule_deactivate(struct nft_ctx *ctx, struct nft_rule *rule) { @@ -259,6 +287,7 @@ static int nft_delrule(struct nft_ctx *ctx, struct nft_rule *rule) nft_trans_destroy(trans); return err; } + nft_rule_expr_deactivate(ctx, rule); return 0; } @@ -2238,6 +2267,13 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx, kfree(rule); } +static void nf_tables_rule_release(const struct nft_ctx *ctx, + struct nft_rule *rule) +{ + nft_rule_expr_deactivate(ctx, rule); + nf_tables_rule_destroy(ctx, rule); +} + #define NFT_RULE_MAXEXPRS 128 static struct nft_expr_info *info; @@ -2402,7 +2438,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, return 0; err2: - nf_tables_rule_destroy(&ctx, rule); + nf_tables_rule_release(&ctx, rule); err1: for (i = 0; i < n; i++) { if (info[i].ops != NULL) @@ -4130,7 +4166,7 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk, * NFT_GOTO verdicts. This function must be called on active data objects * from the second phase of the commit protocol. */ -static void nft_data_hold(const struct nft_data *data, enum nft_data_types type) +void nft_data_hold(const struct nft_data *data, enum nft_data_types type) { if (type == NFT_DATA_VERDICT) { switch (data->verdict.code) { @@ -6015,10 +6051,12 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb) case NFT_MSG_NEWRULE: trans->ctx.chain->use--; list_del_rcu(&nft_trans_rule(trans)->list); + nft_rule_expr_deactivate(&trans->ctx, nft_trans_rule(trans)); break; case NFT_MSG_DELRULE: trans->ctx.chain->use++; nft_clear(trans->ctx.net, nft_trans_rule(trans)); + nft_rule_expr_activate(&trans->ctx, nft_trans_rule(trans)); nft_trans_destroy(trans); break; case NFT_MSG_NEWSET: @@ -6594,7 +6632,7 @@ int __nft_release_basechain(struct nft_ctx *ctx) list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) { list_del(&rule->list); ctx->chain->use--; - nf_tables_rule_destroy(ctx, rule); + nf_tables_rule_release(ctx, rule); } list_del(&ctx->chain->list); ctx->table->use--; @@ -6632,7 +6670,7 @@ static void __nft_release_tables(struct net *net) list_for_each_entry_safe(rule, nr, &chain->rules, list) { list_del(&rule->list); chain->use--; - nf_tables_rule_destroy(&ctx, rule); + nf_tables_rule_release(&ctx, rule); } } list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) { diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 4717d7796927..aa87ff8beae8 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -69,8 +69,16 @@ err1: return err; } -static void nft_immediate_destroy(const struct nft_ctx *ctx, - const struct nft_expr *expr) +static void nft_immediate_activate(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + const struct nft_immediate_expr *priv = nft_expr_priv(expr); + + return nft_data_hold(&priv->data, nft_dreg_to_type(priv->dreg)); +} + +static void nft_immediate_deactivate(const struct nft_ctx *ctx, + const struct nft_expr *expr) { const struct nft_immediate_expr *priv = nft_expr_priv(expr); @@ -108,7 +116,8 @@ static const struct nft_expr_ops nft_imm_ops = { .size = NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)), .eval = nft_immediate_eval, .init = nft_immediate_init, - .destroy = nft_immediate_destroy, + .activate = nft_immediate_activate, + .deactivate = nft_immediate_deactivate, .dump = nft_immediate_dump, .validate = nft_immediate_validate, }; -- cgit v1.2.3 From f0dfd7a2b35b02030949100247d851b793cb275f Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 9 May 2018 13:22:56 +0100 Subject: netfilter: nf_tables: fix memory leak on error exit return Currently the -EBUSY error return path is not free'ing resources allocated earlier, leaving a memory leak. Fix this by exiting via the error exit label err5 that performs the necessary resource clean up. Detected by CoverityScan, CID#1432975 ("Resource leak") Fixes: 9744a6fcefcb ("netfilter: nf_tables: check if same extensions are set when adding elements") Signed-off-by: Colin Ian King Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 3806db31cbbf..91e80aa852d6 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4080,8 +4080,10 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) ^ nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) || nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) ^ - nft_set_ext_exists(ext2, NFT_SET_EXT_OBJREF)) - return -EBUSY; + nft_set_ext_exists(ext2, NFT_SET_EXT_OBJREF)) { + err = -EBUSY; + goto err5; + } if ((nft_set_ext_exists(ext, NFT_SET_EXT_DATA) && nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) && memcmp(nft_set_ext_data(ext), -- cgit v1.2.3 From b84bbaf7a6c8cca24f8acf25a2c8e46913a947ba Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 11 May 2018 13:24:25 -0400 Subject: packet: in packet_snd start writing at link layer allocation Packet sockets allow construction of packets shorter than dev->hard_header_len to accommodate protocols with variable length link layer headers. These packets are padded to dev->hard_header_len, because some device drivers interpret that as a minimum packet size. packet_snd reserves dev->hard_header_len bytes on allocation. SOCK_DGRAM sockets call skb_push in dev_hard_header() to ensure that link layer headers are stored in the reserved range. SOCK_RAW sockets do the same in tpacket_snd, but not in packet_snd. Syzbot was able to send a zero byte packet to a device with massive 116B link layer header, causing padding to cross over into skb_shinfo. Fix this by writing from the start of the llheader reserved range also in the case of packet_snd/SOCK_RAW. Update skb_set_network_header to the new offset. This also corrects it for SOCK_DGRAM, where it incorrectly double counted reserve due to the skb_push in dev_hard_header. Fixes: 9ed988cd5915 ("packet: validate variable length ll headers") Reported-by: syzbot+71d74a5406d02057d559@syzkaller.appspotmail.com Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/packet/af_packet.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 01f3515cada0..e9422fe45179 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2903,13 +2903,15 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) if (skb == NULL) goto out_unlock; - skb_set_network_header(skb, reserve); + skb_reset_network_header(skb); err = -EINVAL; if (sock->type == SOCK_DGRAM) { offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len); if (unlikely(offset < 0)) goto out_free; + } else if (reserve) { + skb_push(skb, reserve); } /* Returns -EFAULT on error */ -- cgit v1.2.3 From d49baa7e12ee70c0a7b821d088a770c94c02e494 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 13 May 2018 17:01:30 -0700 Subject: net/smc: check for missing nlattrs in SMC_PNETID messages It's possible to crash the kernel in several different ways by sending messages to the SMC_PNETID generic netlink family that are missing the expected attributes: - Missing SMC_PNETID_NAME => null pointer dereference when comparing names. - Missing SMC_PNETID_ETHNAME => null pointer dereference accessing smc_pnetentry::ndev. - Missing SMC_PNETID_IBNAME => null pointer dereference accessing smc_pnetentry::smcibdev. - Missing SMC_PNETID_IBPORT => out of bounds array access to smc_ib_device::pattr[-1]. Fix it by validating that all expected attributes are present and that SMC_PNETID_IBPORT is nonzero. Reported-by: syzbot+5cd61039dc9b8bfa6e47@syzkaller.appspotmail.com Fixes: 6812baabf24d ("smc: establish pnet table management") Cc: # v4.11+ Signed-off-by: Eric Biggers Signed-off-by: David S. Miller --- net/smc/smc_pnet.c | 71 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 40 insertions(+), 31 deletions(-) (limited to 'net') diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 74568cdbca70..d7b88b2d1b22 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -245,40 +245,45 @@ out: static int smc_pnet_fill_entry(struct net *net, struct smc_pnetentry *pnetelem, struct nlattr *tb[]) { - char *string, *ibname = NULL; - int rc = 0; + char *string, *ibname; + int rc; memset(pnetelem, 0, sizeof(*pnetelem)); INIT_LIST_HEAD(&pnetelem->list); - if (tb[SMC_PNETID_NAME]) { - string = (char *)nla_data(tb[SMC_PNETID_NAME]); - if (!smc_pnetid_valid(string, pnetelem->pnet_name)) { - rc = -EINVAL; - goto error; - } - } - if (tb[SMC_PNETID_ETHNAME]) { - string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]); - pnetelem->ndev = dev_get_by_name(net, string); - if (!pnetelem->ndev) - return -ENOENT; - } - if (tb[SMC_PNETID_IBNAME]) { - ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]); - ibname = strim(ibname); - pnetelem->smcibdev = smc_pnet_find_ib(ibname); - if (!pnetelem->smcibdev) { - rc = -ENOENT; - goto error; - } - } - if (tb[SMC_PNETID_IBPORT]) { - pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]); - if (pnetelem->ib_port > SMC_MAX_PORTS) { - rc = -EINVAL; - goto error; - } - } + + rc = -EINVAL; + if (!tb[SMC_PNETID_NAME]) + goto error; + string = (char *)nla_data(tb[SMC_PNETID_NAME]); + if (!smc_pnetid_valid(string, pnetelem->pnet_name)) + goto error; + + rc = -EINVAL; + if (!tb[SMC_PNETID_ETHNAME]) + goto error; + rc = -ENOENT; + string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]); + pnetelem->ndev = dev_get_by_name(net, string); + if (!pnetelem->ndev) + goto error; + + rc = -EINVAL; + if (!tb[SMC_PNETID_IBNAME]) + goto error; + rc = -ENOENT; + ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]); + ibname = strim(ibname); + pnetelem->smcibdev = smc_pnet_find_ib(ibname); + if (!pnetelem->smcibdev) + goto error; + + rc = -EINVAL; + if (!tb[SMC_PNETID_IBPORT]) + goto error; + pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]); + if (pnetelem->ib_port < 1 || pnetelem->ib_port > SMC_MAX_PORTS) + goto error; + return 0; error: @@ -307,6 +312,8 @@ static int smc_pnet_get(struct sk_buff *skb, struct genl_info *info) void *hdr; int rc; + if (!info->attrs[SMC_PNETID_NAME]) + return -EINVAL; pnetelem = smc_pnet_find_pnetid( (char *)nla_data(info->attrs[SMC_PNETID_NAME])); if (!pnetelem) @@ -359,6 +366,8 @@ static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info) static int smc_pnet_del(struct sk_buff *skb, struct genl_info *info) { + if (!info->attrs[SMC_PNETID_NAME]) + return -EINVAL; return smc_pnet_remove_by_pnetid( (char *)nla_data(info->attrs[SMC_PNETID_NAME])); } -- cgit v1.2.3 From 7f582b248d0a86bae5788c548d7bb5bca6f7691a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 14 May 2018 21:14:26 -0700 Subject: tcp: purge write queue in tcp_connect_init() syzkaller found a reliable way to crash the host, hitting a BUG() in __tcp_retransmit_skb() Malicous MSG_FASTOPEN is the root cause. We need to purge write queue in tcp_connect_init() at the point we init snd_una/write_seq. This patch also replaces the BUG() by a less intrusive WARN_ON_ONCE() kernel BUG at net/ipv4/tcp_output.c:2837! invalid opcode: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 0 PID: 5276 Comm: syz-executor0 Not tainted 4.17.0-rc3+ #51 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:__tcp_retransmit_skb+0x2992/0x2eb0 net/ipv4/tcp_output.c:2837 RSP: 0000:ffff8801dae06ff8 EFLAGS: 00010206 RAX: ffff8801b9fe61c0 RBX: 00000000ffc18a16 RCX: ffffffff864e1a49 RDX: 0000000000000100 RSI: ffffffff864e2e12 RDI: 0000000000000005 RBP: ffff8801dae073a0 R08: ffff8801b9fe61c0 R09: ffffed0039c40dd2 R10: ffffed0039c40dd2 R11: ffff8801ce206e93 R12: 00000000421eeaad R13: ffff8801ce206d4e R14: ffff8801ce206cc0 R15: ffff8801cd4f4a80 FS: 0000000000000000(0000) GS:ffff8801dae00000(0063) knlGS:00000000096bc900 CS: 0010 DS: 002b ES: 002b CR0: 0000000080050033 CR2: 0000000020000000 CR3: 00000001c47b6000 CR4: 00000000001406f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: tcp_retransmit_skb+0x2e/0x250 net/ipv4/tcp_output.c:2923 tcp_retransmit_timer+0xc50/0x3060 net/ipv4/tcp_timer.c:488 tcp_write_timer_handler+0x339/0x960 net/ipv4/tcp_timer.c:573 tcp_write_timer+0x111/0x1d0 net/ipv4/tcp_timer.c:593 call_timer_fn+0x230/0x940 kernel/time/timer.c:1326 expire_timers kernel/time/timer.c:1363 [inline] __run_timers+0x79e/0xc50 kernel/time/timer.c:1666 run_timer_softirq+0x4c/0x70 kernel/time/timer.c:1692 __do_softirq+0x2e0/0xaf5 kernel/softirq.c:285 invoke_softirq kernel/softirq.c:365 [inline] irq_exit+0x1d1/0x200 kernel/softirq.c:405 exiting_irq arch/x86/include/asm/apic.h:525 [inline] smp_apic_timer_interrupt+0x17e/0x710 arch/x86/kernel/apic/apic.c:1052 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:863 Fixes: cf60af03ca4e ("net-tcp: Fast Open client - sendmsg(MSG_FASTOPEN)") Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Cc: Neal Cardwell Reported-by: syzbot Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 383cac0ff0ec..d07e34f8e309 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2833,8 +2833,10 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) return -EBUSY; if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) { - if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) - BUG(); + if (unlikely(before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))) { + WARN_ON_ONCE(1); + return -EINVAL; + } if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq)) return -ENOMEM; } @@ -3342,6 +3344,7 @@ static void tcp_connect_init(struct sock *sk) sock_reset_flag(sk, SOCK_DONE); tp->snd_wnd = 0; tcp_init_wl(tp, 0); + tcp_write_queue_purge(sk); tp->snd_una = tp->write_seq; tp->snd_sml = tp->write_seq; tp->snd_up = tp->write_seq; -- cgit v1.2.3 From 5a4931ae0193f8a4a97e8260fd0df1d705d83299 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 16 May 2018 12:54:29 +0200 Subject: net/sched: fix refcnt leak in the error path of tcf_vlan_init() Similarly to what was done with commit a52956dfc503 ("net sched actions: fix refcnt leak in skbmod"), fix the error path of tcf_vlan_init() to avoid refcnt leaks when wrong value of TCA_VLAN_PUSH_VLAN_PROTOCOL is given. Fixes: 5026c9b1bafc ("net sched: vlan action fix late binding") CC: Roman Mashak Signed-off-by: Davide Caratti Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_vlan.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 853604685965..1fb39e1f9d07 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -161,6 +161,8 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, case htons(ETH_P_8021AD): break; default: + if (exists) + tcf_idr_release(*a, bind); return -EPROTONOSUPPORT; } } else { -- cgit v1.2.3 From 8ab6ffba14a466c7298cb3fd5066d774d2977ad1 Mon Sep 17 00:00:00 2001 From: Matt Mullins Date: Wed, 16 May 2018 10:48:40 -0700 Subject: tls: don't use stack memory in a scatterlist scatterlist code expects virt_to_page() to work, which fails with CONFIG_VMAP_STACK=y. Fixes: c46234ebb4d1e ("tls: RX path for ktls") Signed-off-by: Matt Mullins Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 71e79597f940..e1c93ce74e0f 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -680,7 +680,6 @@ static int decrypt_skb(struct sock *sk, struct sk_buff *skb, struct scatterlist *sgin = &sgin_arr[0]; struct strp_msg *rxm = strp_msg(skb); int ret, nsg = ARRAY_SIZE(sgin_arr); - char aad_recv[TLS_AAD_SPACE_SIZE]; struct sk_buff *unused; ret = skb_copy_bits(skb, rxm->offset + TLS_HEADER_SIZE, @@ -698,13 +697,13 @@ static int decrypt_skb(struct sock *sk, struct sk_buff *skb, } sg_init_table(sgin, nsg); - sg_set_buf(&sgin[0], aad_recv, sizeof(aad_recv)); + sg_set_buf(&sgin[0], ctx->rx_aad_ciphertext, TLS_AAD_SPACE_SIZE); nsg = skb_to_sgvec(skb, &sgin[1], rxm->offset + tls_ctx->rx.prepend_size, rxm->full_len - tls_ctx->rx.prepend_size); - tls_make_aad(aad_recv, + tls_make_aad(ctx->rx_aad_ciphertext, rxm->full_len - tls_ctx->rx.overhead_size, tls_ctx->rx.rec_seq, tls_ctx->rx.rec_seq_size, @@ -803,12 +802,12 @@ int tls_sw_recvmsg(struct sock *sk, if (to_copy <= len && page_count < MAX_SKB_FRAGS && likely(!(flags & MSG_PEEK))) { struct scatterlist sgin[MAX_SKB_FRAGS + 1]; - char unused[21]; int pages = 0; zc = true; sg_init_table(sgin, MAX_SKB_FRAGS + 1); - sg_set_buf(&sgin[0], unused, 13); + sg_set_buf(&sgin[0], ctx->rx_aad_plaintext, + TLS_AAD_SPACE_SIZE); err = zerocopy_from_iter(sk, &msg->msg_iter, to_copy, &pages, -- cgit v1.2.3 From 5a847a6e1477be5bd3f94cc1b7708d7d4a7cd94c Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 16 May 2018 13:36:40 -0700 Subject: net/ipv4: Initialize proto and ports in flow struct Updating the FIB tracepoint for the recent change to allow rules using the protocol and ports exposed a few places where the entries in the flow struct are not initialized. For __fib_validate_source add the call to fib4_rules_early_flow_dissect since it is invoked for the input path. For netfilter, add the memset on the flow struct to avoid future problems like this. In ip_route_input_slow need to set the fields if the skb dissection does not happen. Fixes: bfff4862653b ("net: fib_rules: support for match on ip_proto, sport and dport") Signed-off-by: David Ahern Acked-by: Roopa Prabhu Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 8 +++++++- net/ipv4/netfilter/ipt_rpfilter.c | 2 +- net/ipv4/route.c | 7 ++++++- 3 files changed, 14 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index f05afaf3235c..4d622112bf95 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -326,10 +326,11 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, int oif, struct net_device *dev, int rpf, struct in_device *idev, u32 *itag) { + struct net *net = dev_net(dev); + struct flow_keys flkeys; int ret, no_addr; struct fib_result res; struct flowi4 fl4; - struct net *net = dev_net(dev); bool dev_match; fl4.flowi4_oif = 0; @@ -347,6 +348,11 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, no_addr = idev->ifa_list == NULL; fl4.flowi4_mark = IN_DEV_SRC_VMARK(idev) ? skb->mark : 0; + if (!fib4_rules_early_flow_dissect(net, skb, &fl4, &flkeys)) { + fl4.flowi4_proto = 0; + fl4.fl4_sport = 0; + fl4.fl4_dport = 0; + } trace_fib_validate_source(dev, &fl4); diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index fd01f13c896a..12843c9ef142 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -89,10 +89,10 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) return true ^ invert; } + memset(&flow, 0, sizeof(flow)); flow.flowi4_iif = LOOPBACK_IFINDEX; flow.daddr = iph->saddr; flow.saddr = rpfilter_get_saddr(iph->daddr); - flow.flowi4_oif = 0; flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0; flow.flowi4_tos = RT_TOS(iph->tos); flow.flowi4_scope = RT_SCOPE_UNIVERSE; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 29268efad247..2cfa1b518f8d 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1961,8 +1961,13 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, fl4.saddr = saddr; fl4.flowi4_uid = sock_net_uid(net, NULL); - if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys)) + if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys)) { flkeys = &_flkeys; + } else { + fl4.flowi4_proto = 0; + fl4.fl4_sport = 0; + fl4.fl4_dport = 0; + } err = fib_lookup(net, &fl4, res, 0); if (err != 0) { -- cgit v1.2.3 From 02f99df1875c11330cd0be69a40fa8ccd14749b2 Mon Sep 17 00:00:00 2001 From: William Tu Date: Wed, 16 May 2018 17:24:32 -0700 Subject: erspan: fix invalid erspan version. ERSPAN only support version 1 and 2. When packets send to an erspan device which does not have proper version number set, drop the packet. In real case, we observe multicast packets sent to the erspan pernet device, erspan0, which does not have erspan version configured. Reported-by: Greg Rose Signed-off-by: William Tu Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 4 +++- net/ipv6/ip6_gre.c | 5 ++++- 2 files changed, 7 ins