From 277d916fc2e959c3f106904116bb4f7b1148d47a Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 16 Dec 2013 21:39:50 +0100 Subject: mac80211: move "bufferable MMPDU" check to fix AP mode scan The check needs to apply to both multicast and unicast packets, otherwise probe requests on AP mode scans are sent through the multicast buffer queue, which adds long delays (often longer than the scanning interval). Cc: stable@vger.kernel.org Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index c558b246ef00..ca7fa7f0613d 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -463,7 +463,6 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) { struct sta_info *sta = tx->sta; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; struct ieee80211_local *local = tx->local; if (unlikely(!sta)) @@ -474,15 +473,6 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) !(info->flags & IEEE80211_TX_CTL_NO_PS_BUFFER))) { int ac = skb_get_queue_mapping(tx->skb); - /* only deauth, disassoc and action are bufferable MMPDUs */ - if (ieee80211_is_mgmt(hdr->frame_control) && - !ieee80211_is_deauth(hdr->frame_control) && - !ieee80211_is_disassoc(hdr->frame_control) && - !ieee80211_is_action(hdr->frame_control)) { - info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER; - return TX_CONTINUE; - } - ps_dbg(sta->sdata, "STA %pM aid %d: PS buffer for AC %d\n", sta->sta.addr, sta->sta.aid, ac); if (tx->local->total_ps_buffered >= TOTAL_MAX_TX_BUFFER) @@ -525,9 +515,22 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) static ieee80211_tx_result debug_noinline ieee80211_tx_h_ps_buf(struct ieee80211_tx_data *tx) { + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; + if (unlikely(tx->flags & IEEE80211_TX_PS_BUFFERED)) return TX_CONTINUE; + /* only deauth, disassoc and action are bufferable MMPDUs */ + if (ieee80211_is_mgmt(hdr->frame_control) && + !ieee80211_is_deauth(hdr->frame_control) && + !ieee80211_is_disassoc(hdr->frame_control) && + !ieee80211_is_action(hdr->frame_control)) { + if (tx->flags & IEEE80211_TX_UNICAST) + info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER; + return TX_CONTINUE; + } + if (tx->flags & IEEE80211_TX_UNICAST) return ieee80211_tx_h_unicast_ps_buf(tx); else -- cgit v1.2.3 From d31652a26bc9e752a99b6ac3b61353129934e451 Mon Sep 17 00:00:00 2001 From: Arron Wang Date: Thu, 14 Nov 2013 17:03:41 +0800 Subject: NFC: Fix target mode p2p link establishment With commit e29a9e2ae165620d, we set the active_target pointer from nfc_dep_link_is_up() in order to support the case where the target detection and the DEP link setting are done atomically by the driver. That can only happen in initiator mode, so we need to check for that otherwise we fail to bring a p2p link in target mode. Signed-off-by: Arron Wang Signed-off-by: Samuel Ortiz --- net/nfc/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/nfc/core.c b/net/nfc/core.c index 872529105abc..83b9927e7d19 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -384,7 +384,7 @@ int nfc_dep_link_is_up(struct nfc_dev *dev, u32 target_idx, { dev->dep_link_up = true; - if (!dev->active_target) { + if (!dev->active_target && rf_mode == NFC_RF_INITIATOR) { struct nfc_target *target; target = nfc_find_target(dev, target_idx); -- cgit v1.2.3 From 2690d97ade05c5325cbf7c72b94b90d265659886 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 31 Dec 2013 16:28:39 +0100 Subject: netfilter: nf_nat: fix access to uninitialized buffer in IRC NAT helper Commit 5901b6be885e attempted to introduce IPv6 support into IRC NAT helper. By doing so, the following code seemed to be removed by accident: ip = ntohl(exp->master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip); sprintf(buffer, "%u %u", ip, port); pr_debug("nf_nat_irc: inserting '%s' == %pI4, port %u\n", buffer, &ip, port); This leads to the fact that buffer[] was left uninitialized and contained some stack value. When we call nf_nat_mangle_tcp_packet(), we call strlen(buffer) on excatly this uninitialized buffer. If we are unlucky and the skb has enough tailroom, we overwrite resp. leak contents with values that sit on our stack into the packet and send that out to the receiver. Since the rather informal DCC spec [1] does not seem to specify IPv6 support right now, we log such occurences so that admins can act accordingly, and drop the packet. I've looked into XChat source, and IPv6 is not supported there: addresses are in u32 and print via %u format string. Therefore, restore old behaviour as in IPv4, use snprintf(). The IRC helper does not support IPv6 by now. By this, we can safely use strlen(buffer) in nf_nat_mangle_tcp_packet() and prevent a buffer overflow. Also simplify some code as we now have ct variable anyway. [1] http://www.irchelp.org/irchelp/rfc/ctcpspec.html Fixes: 5901b6be885e ("netfilter: nf_nat: support IPv6 in IRC NAT helper") Signed-off-by: Daniel Borkmann Cc: Harald Welte Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_nat_irc.c | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_nat_irc.c b/net/netfilter/nf_nat_irc.c index f02b3605823e..1fb2258c3535 100644 --- a/net/netfilter/nf_nat_irc.c +++ b/net/netfilter/nf_nat_irc.c @@ -34,10 +34,14 @@ static unsigned int help(struct sk_buff *skb, struct nf_conntrack_expect *exp) { char buffer[sizeof("4294967296 65635")]; + struct nf_conn *ct = exp->master; + union nf_inet_addr newaddr; u_int16_t port; unsigned int ret; /* Reply comes from server. */ + newaddr = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3; + exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; exp->dir = IP_CT_DIR_REPLY; exp->expectfn = nf_nat_follow_master; @@ -57,17 +61,35 @@ static unsigned int help(struct sk_buff *skb, } if (port == 0) { - nf_ct_helper_log(skb, exp->master, "all ports in use"); + nf_ct_helper_log(skb, ct, "all ports in use"); return NF_DROP; } - ret = nf_nat_mangle_tcp_packet(skb, exp->master, ctinfo, - protoff, matchoff, matchlen, buffer, - strlen(buffer)); + /* strlen("\1DCC CHAT chat AAAAAAAA P\1\n")=27 + * strlen("\1DCC SCHAT chat AAAAAAAA P\1\n")=28 + * strlen("\1DCC SEND F AAAAAAAA P S\1\n")=26 + * strlen("\1DCC MOVE F AAAAAAAA P S\1\n")=26 + * strlen("\1DCC TSEND F AAAAAAAA P S\1\n")=27 + * + * AAAAAAAAA: bound addr (1.0.0.0==16777216, min 8 digits, + * 255.255.255.255==4294967296, 10 digits) + * P: bound port (min 1 d, max 5d (65635)) + * F: filename (min 1 d ) + * S: size (min 1 d ) + * 0x01, \n: terminators + */ + /* AAA = "us", ie. where server normally talks to. */ + snprintf(buffer, sizeof(buffer), "%u %u", ntohl(newaddr.ip), port); + pr_debug("nf_nat_irc: inserting '%s' == %pI4, port %u\n", + buffer, &newaddr.ip, port); + + ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff, matchoff, + matchlen, buffer, strlen(buffer)); if (ret != NF_ACCEPT) { - nf_ct_helper_log(skb, exp->master, "cannot mangle packet"); + nf_ct_helper_log(skb, ct, "cannot mangle packet"); nf_ct_unexpect_related(exp); } + return ret; } -- cgit v1.2.3 From f2661adc0c134d890d84c32d7cb54a2b4d1f0a5f Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Sat, 4 Jan 2014 14:10:43 +0100 Subject: netfilter: only warn once on wrong seqadj usage Avoid potentially spamming the kernel log with WARN splash messages when catching wrong usage of seqadj, by simply using WARN_ONCE. This is a followup to commit db12cf274353 (netfilter: WARN about wrong usage of sequence number adjustments) Suggested-by: Flavio Leitner Suggested-by: Daniel Borkmann Suggested-by: Florian Westphal Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_seqadj.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c index b2d38da67822..f6e2ae91a80b 100644 --- a/net/netfilter/nf_conntrack_seqadj.c +++ b/net/netfilter/nf_conntrack_seqadj.c @@ -37,7 +37,7 @@ int nf_ct_seqadj_set(struct nf_conn *ct, enum ip_conntrack_info ctinfo, return 0; if (unlikely(!seqadj)) { - WARN(1, "Wrong seqadj usage, missing nfct_seqadj_ext_add()\n"); + WARN_ONCE(1, "Missing nfct_seqadj_ext_add() setup call\n"); return 0; } -- cgit v1.2.3 From 657e5d19657542631461e72fdc375b1e83e72070 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Tue, 7 Jan 2014 15:39:43 +0800 Subject: ipv6: pcpu_tstats.syncp should be initialised in ip6_vti.c initialise pcpu_tstats.syncp to kill the calltrace [ 11.973950] Call Trace: [ 11.973950] [<819bbaff>] dump_stack+0x48/0x60 [ 11.973950] [<819bbaff>] dump_stack+0x48/0x60 [ 11.973950] [<81078dcf>] __lock_acquire.isra.22+0x1bf/0xc10 [ 11.973950] [<81078dcf>] __lock_acquire.isra.22+0x1bf/0xc10 [ 11.973950] [<81079fa7>] lock_acquire+0x77/0xa0 [ 11.973950] [<81079fa7>] lock_acquire+0x77/0xa0 [ 11.973950] [<817ca7ab>] ? dev_get_stats+0xcb/0x130 [ 11.973950] [<817ca7ab>] ? dev_get_stats+0xcb/0x130 [ 11.973950] [<8183862d>] ip_tunnel_get_stats64+0x6d/0x230 [ 11.973950] [<8183862d>] ip_tunnel_get_stats64+0x6d/0x230 [ 11.973950] [<817ca7ab>] ? dev_get_stats+0xcb/0x130 [ 11.973950] [<817ca7ab>] ? dev_get_stats+0xcb/0x130 [ 11.973950] [<811cf8c1>] ? __nla_reserve+0x21/0xd0 [ 11.973950] [<811cf8c1>] ? __nla_reserve+0x21/0xd0 [ 11.973950] [<817ca7ab>] dev_get_stats+0xcb/0x130 [ 11.973950] [<817ca7ab>] dev_get_stats+0xcb/0x130 [ 11.973950] [<817d5409>] rtnl_fill_ifinfo+0x569/0xe20 [ 11.973950] [<817d5409>] rtnl_fill_ifinfo+0x569/0xe20 [ 11.973950] [<810352e0>] ? kvm_clock_read+0x20/0x30 [ 11.973950] [<810352e0>] ? kvm_clock_read+0x20/0x30 [ 11.973950] [<81008e38>] ? sched_clock+0x8/0x10 [ 11.973950] [<81008e38>] ? sched_clock+0x8/0x10 [ 11.973950] [<8106ba45>] ? sched_clock_local+0x25/0x170 [ 11.973950] [<8106ba45>] ? sched_clock_local+0x25/0x170 [ 11.973950] [<810da6bd>] ? __kmalloc+0x3d/0x90 [ 11.973950] [<810da6bd>] ? __kmalloc+0x3d/0x90 [ 11.973950] [<817b8c10>] ? __kmalloc_reserve.isra.41+0x20/0x70 [ 11.973950] [<817b8c10>] ? __kmalloc_reserve.isra.41+0x20/0x70 [ 11.973950] [<810da81a>] ? slob_alloc_node+0x2a/0x60 [ 11.973950] [<810da81a>] ? slob_alloc_node+0x2a/0x60 [ 11.973950] [<817b919a>] ? __alloc_skb+0x6a/0x2b0 [ 11.973950] [<817b919a>] ? __alloc_skb+0x6a/0x2b0 [ 11.973950] [<817d8795>] rtmsg_ifinfo+0x65/0xe0 [ 11.973950] [<817d8795>] rtmsg_ifinfo+0x65/0xe0 [ 11.973950] [<817cbd31>] register_netdevice+0x531/0x5a0 [ 11.973950] [<817cbd31>] register_netdevice+0x531/0x5a0 [ 11.973950] [<81892b87>] ? ip6_tnl_get_cap+0x27/0x90 [ 11.973950] [<81892b87>] ? ip6_tnl_get_cap+0x27/0x90 [ 11.973950] [<817cbdb6>] register_netdev+0x16/0x30 [ 11.973950] [<817cbdb6>] register_netdev+0x16/0x30 [ 11.973950] [<81f574a6>] vti6_init_net+0x1c4/0x1d4 [ 11.973950] [<81f574a6>] vti6_init_net+0x1c4/0x1d4 [ 11.973950] [<81f573af>] ? vti6_init_net+0xcd/0x1d4 [ 11.973950] [<81f573af>] ? vti6_init_net+0xcd/0x1d4 [ 11.973950] [<817c16df>] ops_init.constprop.11+0x17f/0x1c0 [ 11.973950] [<817c16df>] ops_init.constprop.11+0x17f/0x1c0 [ 11.973950] [<817c1779>] register_pernet_operations.isra.9+0x59/0x90 [ 11.973950] [<817c1779>] register_pernet_operations.isra.9+0x59/0x90 [ 11.973950] [<817c18d1>] register_pernet_device+0x21/0x60 [ 11.973950] [<817c18d1>] register_pernet_device+0x21/0x60 [ 11.973950] [<81f574b6>] ? vti6_init_net+0x1d4/0x1d4 [ 11.973950] [<81f574b6>] ? vti6_init_net+0x1d4/0x1d4 [ 11.973950] [<81f574c7>] vti6_tunnel_init+0x11/0x68 [ 11.973950] [<81f574c7>] vti6_tunnel_init+0x11/0x68 [ 11.973950] [<81f572a1>] ? mip6_init+0x73/0xb4 [ 11.973950] [<81f572a1>] ? mip6_init+0x73/0xb4 [ 11.973950] [<81f0cba4>] do_one_initcall+0xbb/0x15b [ 11.973950] [<81f0cba4>] do_one_initcall+0xbb/0x15b [ 11.973950] [<811a00d8>] ? sha_transform+0x528/0x1150 [ 11.973950] [<811a00d8>] ? sha_transform+0x528/0x1150 [ 11.973950] [<81f0c544>] ? repair_env_string+0x12/0x51 [ 11.973950] [<81f0c544>] ? repair_env_string+0x12/0x51 [ 11.973950] [<8105c30d>] ? parse_args+0x2ad/0x440 [ 11.973950] [<8105c30d>] ? parse_args+0x2ad/0x440 [ 11.973950] [<810546be>] ? __usermodehelper_set_disable_depth+0x3e/0x50 [ 11.973950] [<810546be>] ? __usermodehelper_set_disable_depth+0x3e/0x50 [ 11.973950] [<81f0cd27>] kernel_init_freeable+0xe3/0x182 [ 11.973950] [<81f0cd27>] kernel_init_freeable+0xe3/0x182 [ 11.973950] [<81f0c532>] ? do_early_param+0x7a/0x7a [ 11.973950] [<81f0c532>] ? do_early_param+0x7a/0x7a [ 11.973950] [<819b5b1b>] kernel_init+0xb/0x100 [ 11.973950] [<819b5b1b>] kernel_init+0xb/0x100 [ 11.973950] [<819cebf7>] ret_from_kernel_thread+0x1b/0x28 [ 11.973950] [<819cebf7>] ret_from_kernel_thread+0x1b/0x28 [ 11.973950] [<819b5b10>] ? rest_init+0xc0/0xc0 [ 11.973950] [<819b5b10>] ? rest_init+0xc0/0xc0 Before 469bdcefdc ("ipv6: fix the use of pcpu_tstats in ip6_vti.c"), the pcpu_tstats.syncp is not used to pretect the 64bit elements of pcpu_tstats, so not appear this calltrace. Reported-by: Fengguang Wu Signed-off-by: Li RongQing Signed-off-by: David S. Miller --- net/ipv6/ip6_vti.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index a4564b05c47b..7b42d5ef868d 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -732,12 +732,18 @@ static void vti6_dev_setup(struct net_device *dev) static inline int vti6_dev_init_gen(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); + int i; t->dev = dev; t->net = dev_net(dev); dev->tstats = alloc_percpu(struct pcpu_tstats); if (!dev->tstats) return -ENOMEM; + for_each_possible_cpu(i) { + struct pcpu_tstats *stats; + stats = per_cpu_ptr(dev->tstats, i); + u64_stats_init(&stats->syncp); + } return 0; } -- cgit v1.2.3 From 732256b9335f8456623bb772d86c2a24e3cafca2 Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Tue, 7 Jan 2014 15:51:36 -0500 Subject: tipc: correctly unlink packets from deferred packet queue When we pull a received packet from a link's 'deferred packets' queue for processing, its 'next' pointer is not cleared, and still refers to the next packet in that queue, if any. This is incorrect, but caused no harm before commit 40ba3cdf542a469aaa9083fa041656e59b109b90 ("tipc: message reassembly using fragment chain") was introduced. After that commit, it may sometimes lead to the following oops: general protection fault: 0000 [#1] SMP DEBUG_PAGEALLOC Modules linked in: tipc CPU: 4 PID: 0 Comm: swapper/4 Tainted: G W 3.13.0-rc2+ #6 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007 task: ffff880017af4880 ti: ffff880017aee000 task.ti: ffff880017aee000 RIP: 0010:[] [] skb_try_coalesce+0x44/0x3d0 RSP: 0018:ffff880016603a78 EFLAGS: 00010212 RAX: 6b6b6b6bd6d6d6d6 RBX: ffff880013106ac0 RCX: ffff880016603ad0 RDX: ffff880016603ad7 RSI: ffff88001223ed00 RDI: ffff880013106ac0 RBP: ffff880016603ab8 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000000 R12: ffff88001223ed00 R13: ffff880016603ad0 R14: 000000000000058c R15: ffff880012297650 FS: 0000000000000000(0000) GS:ffff880016600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 000000000805b000 CR3: 0000000011f5d000 CR4: 00000000000006e0 Stack: ffff880016603a88 ffffffff810a38ed ffff880016603aa8 ffff88001223ed00 0000000000000001 ffff880012297648 ffff880016603b68 ffff880012297650 ffff880016603b08 ffffffffa0006c51 ffff880016603b08 00ffffffa00005fc Call Trace: [] ? trace_hardirqs_on+0xd/0x10 [] tipc_link_recv_fragment+0xd1/0x1b0 [tipc] [] tipc_recv_msg+0x4e4/0x920 [tipc] [] ? tipc_l2_rcv_msg+0x40/0x250 [tipc] [] tipc_l2_rcv_msg+0xcc/0x250 [tipc] [] ? tipc_l2_rcv_msg+0x40/0x250 [tipc] [] __netif_receive_skb_core+0x80b/0xd00 [] ? __netif_receive_skb_core+0x144/0xd00 [] __netif_receive_skb+0x26/0x70 [] netif_receive_skb+0x2d/0x200 [] napi_gro_receive+0xb0/0x130 [] e1000_clean_rx_irq+0x2c2/0x530 [] e1000_clean+0x266/0x9c0 [] ? notifier_call_chain+0x2b/0x160 [] net_rx_action+0x141/0x310 [] __do_softirq+0xeb/0x480 [] ? _raw_spin_unlock+0x2b/0x40 [] ? handle_fasteoi_irq+0x72/0x100 [] irq_exit+0x96/0xc0 [] do_IRQ+0x63/0xe0 [] common_interrupt+0x6f/0x6f This happens when the last fragment of a message has passed through the the receiving link's 'deferred packets' queue, and at least one other packet was added to that queue while it was there. After the fragment chain with the complete message has been successfully delivered to the receiving socket, it is released. Since 'next' pointer of the last fragment in the released chain now is non-NULL, we get the crash shown above. We fix this by clearing the 'next' pointer of all received packets, including those being pulled from the 'deferred' queue, before they undergo any further processing. Fixes: 40ba3cdf542a4 ("tipc: message reassembly using fragment chain") Signed-off-by: Erik Hugne Reported-by: Ying Xue Reviewed-by: Paul Gortmaker Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/tipc/link.c b/net/tipc/link.c index 69cd9bf3f561..13b987745820 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1498,6 +1498,7 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *b_ptr) int type; head = head->next; + buf->next = NULL; /* Ensure bearer is still enabled */ if (unlikely(!b_ptr->active)) -- cgit v1.2.3 From 07edd741c838e376933b445bbf2692f83b6774e6 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Wed, 8 Jan 2014 15:43:22 +0100 Subject: ipv6: add link-local, sit and loopback address with INFINITY_LIFE_TIME MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the past the IFA_PERMANENT flag indicated, that the valid and preferred lifetime where ignored. Since change fad8da3e085ddf ("ipv6 addrconf: fix preferred lifetime state-changing behavior while valid_lft is infinity") we honour at least the preferred lifetime on those addresses. As such the valid lifetime gets recalculated and updated to 0. If loopback address is added manually this problem does not occur. Also if NetworkManager manages IPv6, those addresses will get added via inet6_rtm_newaddr and thus will have a correct lifetime, too. Reported-by: François-Xavier Le Bail Reported-by: Damien Wyart Fixes: fad8da3e085ddf ("ipv6 addrconf: fix preferred lifetime state-changing behavior while valid_lft is infinity") Cc: Yasushi Asano Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f62c72b59f8e..abe46a4228ce 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2509,7 +2509,8 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr, struct inet6_ifaddr *ifp; ifp = ipv6_add_addr(idev, addr, NULL, plen, - scope, IFA_F_PERMANENT, 0, 0); + scope, IFA_F_PERMANENT, + INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); if (!IS_ERR(ifp)) { spin_lock_bh(&ifp->lock); ifp->flags &= ~IFA_F_TENTATIVE; @@ -2637,7 +2638,8 @@ static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr #endif - ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags, 0, 0); + ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags, + INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); if (!IS_ERR(ifp)) { addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0); addrconf_dad_start(ifp); -- cgit v1.2.3 From f663dd9aaf9ed124f25f0f8452edf238f087ad50 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 10 Jan 2014 16:18:26 +0800 Subject: net: core: explicitly select a txq before doing l2 forwarding Currently, the tx queue were selected implicitly in ndo_dfwd_start_xmit(). The will cause several issues: - NETIF_F_LLTX were removed for macvlan, so txq lock were done for macvlan instead of lower device which misses the necessary txq synchronization for lower device such as txq stopping or frozen required by dev watchdog or control path. - dev_hard_start_xmit() was called with NULL txq which bypasses the net device watchdog. - dev_hard_start_xmit() does not check txq everywhere which will lead a crash when tso is disabled for lower device. Fix this by explicitly introducing a new param for .ndo_select_queue() for just selecting queues in the case of l2 forwarding offload. netdev_pick_tx() was also extended to accept this parameter and dev_queue_xmit_accel() was used to do l2 forwarding transmission. With this fixes, NETIF_F_LLTX could be preserved for macvlan and there's no need to check txq against NULL in dev_hard_start_xmit(). Also there's no need to keep a dedicated ndo_dfwd_start_xmit() and we can just reuse the code of dev_queue_xmit() to do the transmission. In the future, it was also required for macvtap l2 forwarding support since it provides a necessary synchronization method. Cc: John Fastabend Cc: Neil Horman Cc: e1000-devel@lists.sourceforge.net Signed-off-by: Jason Wang Acked-by: Neil Horman Acked-by: John Fastabend Signed-off-by: David S. Miller --- net/core/dev.c | 29 +++++++++++++++++------------ net/core/flow_dissector.c | 10 +++++++--- net/core/netpoll.c | 2 +- net/mac80211/iface.c | 6 ++++-- net/sched/sch_generic.c | 2 +- 5 files changed, 30 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 4fc17221545d..0ce469e5ec80 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2539,7 +2539,7 @@ static inline int skb_needs_linearize(struct sk_buff *skb, } int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, - struct netdev_queue *txq, void *accel_priv) + struct netdev_queue *txq) { const struct net_device_ops *ops = dev->netdev_ops; int rc = NETDEV_TX_OK; @@ -2605,13 +2605,10 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, dev_queue_xmit_nit(skb, dev); skb_len = skb->len; - if (accel_priv) - rc = ops->ndo_dfwd_start_xmit(skb, dev, accel_priv); - else rc = ops->ndo_start_xmit(skb, dev); trace_net_dev_xmit(skb, rc, dev, skb_len); - if (rc == NETDEV_TX_OK && txq) + if (rc == NETDEV_TX_OK) txq_trans_update(txq); return rc; } @@ -2627,10 +2624,7 @@ gso: dev_queue_xmit_nit(nskb, dev); skb_len = nskb->len; - if (accel_priv) - rc = ops->ndo_dfwd_start_xmit(nskb, dev, accel_priv); - else - rc = ops->ndo_start_xmit(nskb, dev); + rc = ops->ndo_start_xmit(nskb, dev); trace_net_dev_xmit(nskb, rc, dev, skb_len); if (unlikely(rc != NETDEV_TX_OK)) { if (rc & ~NETDEV_TX_MASK) @@ -2811,7 +2805,7 @@ EXPORT_SYMBOL(dev_loopback_xmit); * the BH enable code must have IRQs enabled so that it will not deadlock. * --BLG */ -int dev_queue_xmit(struct sk_buff *skb) +int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) { struct net_device *dev = skb->dev; struct netdev_queue *txq; @@ -2827,7 +2821,7 @@ int dev_queue_xmit(struct sk_buff *skb) skb_update_prio(skb); - txq = netdev_pick_tx(dev, skb); + txq = netdev_pick_tx(dev, skb, accel_priv); q = rcu_dereference_bh(txq->qdisc); #ifdef CONFIG_NET_CLS_ACT @@ -2863,7 +2857,7 @@ int dev_queue_xmit(struct sk_buff *skb) if (!netif_xmit_stopped(txq)) { __this_cpu_inc(xmit_recursion); - rc = dev_hard_start_xmit(skb, dev, txq, NULL); + rc = dev_hard_start_xmit(skb, dev, txq); __this_cpu_dec(xmit_recursion); if (dev_xmit_complete(rc)) { HARD_TX_UNLOCK(dev, txq); @@ -2892,8 +2886,19 @@ out: rcu_read_unlock_bh(); return rc; } + +int dev_queue_xmit(struct sk_buff *skb) +{ + return __dev_queue_xmit(skb, NULL); +} EXPORT_SYMBOL(dev_queue_xmit); +int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv) +{ + return __dev_queue_xmit(skb, accel_priv); +} +EXPORT_SYMBOL(dev_queue_xmit_accel); + /*======================================================================= Receiver routines diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index d6ef17322500..2fc5beaf5783 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -395,17 +395,21 @@ u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb) EXPORT_SYMBOL(__netdev_pick_tx); struct netdev_queue *netdev_pick_tx(struct net_device *dev, - struct sk_buff *skb) + struct sk_buff *skb, + void *accel_priv) { int queue_index = 0; if (dev->real_num_tx_queues != 1) { const struct net_device_ops *ops = dev->netdev_ops; if (ops->ndo_select_queue) - queue_index = ops->ndo_select_queue(dev, skb); + queue_index = ops->ndo_select_queue(dev, skb, + accel_priv); else queue_index = __netdev_pick_tx(dev, skb); - queue_index = dev_cap_txqueue(dev, queue_index); + + if (!accel_priv) + queue_index = dev_cap_txqueue(dev, queue_index); } skb_set_queue_mapping(skb, queue_index); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 303097874633..19fe9c717ced 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -375,7 +375,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) { struct netdev_queue *txq; - txq = netdev_pick_tx(dev, skb); + txq = netdev_pick_tx(dev, skb, NULL); /* try until next clock tick */ for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 36c3a4cbcabf..a0757913046e 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1061,7 +1061,8 @@ static void ieee80211_uninit(struct net_device *dev) } static u16 ieee80211_netdev_select_queue(struct net_device *dev, - struct sk_buff *skb) + struct sk_buff *skb, + void *accel_priv) { return ieee80211_select_queue(IEEE80211_DEV_TO_SUB_IF(dev), skb); } @@ -1078,7 +1079,8 @@ static const struct net_device_ops ieee80211_dataif_ops = { }; static u16 ieee80211_monitor_select_queue(struct net_device *dev, - struct sk_buff *skb) + struct sk_buff *skb, + void *accel_priv) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 922a09406ba7..7fc899a943a8 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -126,7 +126,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, HARD_TX_LOCK(dev, txq, smp_processor_id()); if (!netif_xmit_frozen_or_stopped(txq)) - ret = dev_hard_start_xmit(skb, dev, txq, NULL); + ret = dev_hard_start_xmit(skb, dev, txq); HARD_TX_UNLOCK(dev, txq); -- cgit v1.2.3 From 70315d22d3c7383f9a508d0aab21e2eb35b2303a Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Fri, 10 Jan 2014 15:34:45 -0500 Subject: inet_diag: fix inet_diag_dump_icsk() to use correct state for timewait sockets Fix inet_diag_dump_icsk() to reflect the fact that both TCP_TIME_WAIT and TCP_FIN_WAIT2 connections are represented by inet_timewait_sock (not just TIME_WAIT), and for such sockets the tw_substate field holds the real state, which can be either TCP_TIME_WAIT or TCP_FIN_WAIT2. This brings the inet_diag state-matching code in line with the field it uses to populate idiag_state. This is also analogous to the info exported in /proc/net/tcp, where get_tcp4_sock() exports sk->sk_state and get_timewait4_sock() exports tw->tw_substate. Before fixing this, (a) neither "ss -nemoi" nor "ss -nemoi state fin-wait-2" would return a socket in TCP_FIN_WAIT2; and (b) "ss -nemoi state time-wait" would also return sockets in state TCP_FIN_WAIT2. This is an old bug that predates 05dbc7b ("tcp/dccp: remove twchain"). Signed-off-by: Neal Cardwell Cc: Eric Dumazet Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/inet_diag.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index a0f52dac8940..e34dccbc4d70 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -930,12 +930,15 @@ skip_listen_ht: spin_lock_bh(lock); sk_nulls_for_each(sk, node, &head->chain) { int res; + int state; if (!net_eq(sock_net(sk), net)) continue; if (num < s_num) goto next_normal; - if (!(r->idiag_states & (1 << sk->sk_state))) + state = (sk->sk_state == TCP_TIME_WAIT) ? + inet_twsk(sk)->tw_substate : sk->sk_state; + if (!(r->idiag_states & (1 << state))) goto next_normal; if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family) -- cgit v1.2.3