summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-10-01 12:58:48 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2013-10-01 12:58:48 -0700
commitc31eeaced22ce8bd61268a3c595d542bb38c0a4f (patch)
tree630f25b34aa1182b0a5ac1c5f60b9fa1acd29614 /net
parent0b936842c86392dad2c880539e824881e5d8ba77 (diff)
parent0eab5eb7a3a9a6ccfcdecbffff00d60a86a004bb (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking changes from David Miller: 1) Multiply in netfilter IPVS can overflow when calculating destination weight. From Simon Kirby. 2) Use after free fixes in IPVS from Julian Anastasov. 3) SFC driver bug fixes from Daniel Pieczko. 4) Memory leak in pcan_usb_core failure paths, from Alexey Khoroshilov. 5) Locking and encapsulation fixes to serial line CAN driver, from Andrew Naujoks. 6) Duplex and VF handling fixes to bnx2x driver from Yaniv Rosner, Eilon Greenstein, and Ariel Elior. 7) In lapb, if no other packets are outstanding, T1 timeouts actually stall things and no packet gets sent. Fix from Josselin Costanzi. 8) ICMP redirects should not make it to the socket error queues, from Duan Jiong. 9) Fix bugs in skge DMA mapping error handling, from Nikulas Patocka. 10) Fix setting of VLAN priority field on via-rhine driver, from Roget Luethi. 11) Fix TX stalls and VLAN promisc programming in be2net driver from Ajit Khaparde. 12) Packet padding doesn't get handled correctly in new usbnet SG support code, from Ming Lei. 13) Fix races in netdevice teardown wrt. network namespace closing. From Eric W. Biederman. 14) Fix potential missed initialization of net_secret if not TCP connections are openned. From Eric Dumazet. 15) Cinterion PLXX product ID in qmi_wwan driver is wrong, from Aleksander Morgado. 16) skb_cow_head() can change skb->data and thus packet header pointers, don't use stale ip_hdr reference in ip_tunnel code. 17) Backend state transition handling fixes in xen-netback, from Paul Durrant. 18) Packet offset for AH protocol is handled wrong in flow dissector, from Eric Dumazet. 19) Taking down an fq packet scheduler instance can leave stale packets in the queues, fix from Eric Dumazet. 20) Fix performance regressions introduced by TCP Small Queues. From Eric Dumazet. 21) IPV6 GRE tunneling code calculates max_headroom incorrectly, from Hannes Frederic Sowa. 22) Multicast timer handlers in ipv4 and ipv6 can be the last and final reference to the ipv4/ipv6 specific network device state, so use the reference put that will check and release the object if the reference hits zero. From Salam Noureddine. 23) Fix memory corruption in ip_tunnel driver, and use skb_push() instead of __skb_push() so that similar bugs are less hard to find. From Steffen Klassert. 24) Add forgotten hookup of rtnl_ops in SIT and ip6tnl drivers, from Nicolas Dichtel. 25) fq scheduler doesn't accurately rate limit in certain circumstances, from Eric Dumazet. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (103 commits) pkt_sched: fq: rate limiting improvements ip6tnl: allow to use rtnl ops on fb tunnel sit: allow to use rtnl ops on fb tunnel ip_tunnel: Remove double unregister of the fallback device ip_tunnel_core: Change __skb_push back to skb_push ip_tunnel: Add fallback tunnels to the hash lists ip_tunnel: Fix a memory corruption in ip_tunnel_xmit qlcnic: Fix SR-IOV configuration ll_temac: Reset dma descriptors indexes on ndo_open skbuff: size of hole is wrong in a comment ipv6 mcast: use in6_dev_put in timer handlers instead of __in6_dev_put ipv4 igmp: use in_dev_put in timer handlers instead of __in_dev_put ethernet: moxa: fix incorrect placement of __initdata tag ipv6: gre: correct calculation of max_headroom powerpc/83xx: gianfar_ptp: select 1588 clock source through dts file Revert "powerpc/83xx: gianfar_ptp: select 1588 clock source through dts file" bonding: Fix broken promiscuity reference counting issue tcp: TSQ can use a dynamic limit dm9601: fix IFF_ALLMULTI handling pkt_sched: fq: qdisc dismantle fixes ...
Diffstat (limited to 'net')
-rw-r--r--net/802/mrp.c27
-rw-r--r--net/bluetooth/hci_core.c26
-rw-r--r--net/bluetooth/hci_event.c6
-rw-r--r--net/bluetooth/l2cap_core.c7
-rw-r--r--net/bluetooth/rfcomm/tty.c35
-rw-r--r--net/core/dev.c49
-rw-r--r--net/core/flow_dissector.c4
-rw-r--r--net/core/secure_seq.c27
-rw-r--r--net/ipv4/af_inet.c4
-rw-r--r--net/ipv4/igmp.c4
-rw-r--r--net/ipv4/ip_tunnel.c22
-rw-r--r--net/ipv4/ip_tunnel_core.c2
-rw-r--r--net/ipv4/netfilter/ipt_SYNPROXY.c10
-rw-r--r--net/ipv4/raw.c4
-rw-r--r--net/ipv4/tcp_output.c17
-rw-r--r--net/ipv4/udp.c2
-rw-r--r--net/ipv6/addrconf.c79
-rw-r--r--net/ipv6/ip6_gre.c4
-rw-r--r--net/ipv6/ip6_output.c53
-rw-r--r--net/ipv6/ip6_tunnel.c3
-rw-r--r--net/ipv6/mcast.c6
-rw-r--r--net/ipv6/netfilter/ip6t_SYNPROXY.c10
-rw-r--r--net/ipv6/raw.c4
-rw-r--r--net/ipv6/sit.c86
-rw-r--r--net/ipv6/udp.c4
-rw-r--r--net/lapb/lapb_timer.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c12
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c86
-rw-r--r--net/netfilter/ipvs/ip_vs_est.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c72
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c62
-rw-r--r--net/netfilter/ipvs/ip_vs_nq.c8
-rw-r--r--net/netfilter/ipvs/ip_vs_sed.c8
-rw-r--r--net/netfilter/ipvs/ip_vs_wlc.c6
-rw-r--r--net/netfilter/nf_synproxy_core.c12
-rw-r--r--net/sched/sch_fq.c102
36 files changed, 514 insertions, 354 deletions
diff --git a/net/802/mrp.c b/net/802/mrp.c
index 1eb05d80b07b..3ed616215870 100644
--- a/net/802/mrp.c
+++ b/net/802/mrp.c
@@ -24,6 +24,11 @@
static unsigned int mrp_join_time __read_mostly = 200;
module_param(mrp_join_time, uint, 0644);
MODULE_PARM_DESC(mrp_join_time, "Join time in ms (default 200ms)");
+
+static unsigned int mrp_periodic_time __read_mostly = 1000;
+module_param(mrp_periodic_time, uint, 0644);
+MODULE_PARM_DESC(mrp_periodic_time, "Periodic time in ms (default 1s)");
+
MODULE_LICENSE("GPL");
static const u8
@@ -595,6 +600,24 @@ static void mrp_join_timer(unsigned long data)
mrp_join_timer_arm(app);
}
+static void mrp_periodic_timer_arm(struct mrp_applicant *app)
+{
+ mod_timer(&app->periodic_timer,
+ jiffies + msecs_to_jiffies(mrp_periodic_time));
+}
+
+static void mrp_periodic_timer(unsigned long data)
+{
+ struct mrp_applicant *app = (struct mrp_applicant *)data;
+
+ spin_lock(&app->lock);
+ mrp_mad_event(app, MRP_EVENT_PERIODIC);
+ mrp_pdu_queue(app);
+ spin_unlock(&app->lock);
+
+ mrp_periodic_timer_arm(app);
+}
+
static int mrp_pdu_parse_end_mark(struct sk_buff *skb, int *offset)
{
__be16 endmark;
@@ -845,6 +868,9 @@ int mrp_init_applicant(struct net_device *dev, struct mrp_application *appl)
rcu_assign_pointer(dev->mrp_port->applicants[appl->type], app);
setup_timer(&app->join_timer, mrp_join_timer, (unsigned long)app);
mrp_join_timer_arm(app);
+ setup_timer(&app->periodic_timer, mrp_periodic_timer,
+ (unsigned long)app);
+ mrp_periodic_timer_arm(app);
return 0;
err3:
@@ -870,6 +896,7 @@ void mrp_uninit_applicant(struct net_device *dev, struct mrp_application *appl)
* all pending messages before the applicant is gone.
*/
del_timer_sync(&app->join_timer);
+ del_timer_sync(&app->periodic_timer);
spin_lock_bh(&app->lock);
mrp_mad_event(app, MRP_EVENT_TX);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 634debab4d54..fb7356fcfe51 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1146,7 +1146,11 @@ int hci_dev_open(__u16 dev)
goto done;
}
- if (hdev->rfkill && rfkill_blocked(hdev->rfkill)) {
+ /* Check for rfkill but allow the HCI setup stage to proceed
+ * (which in itself doesn't cause any RF activity).
+ */
+ if (test_bit(HCI_RFKILLED, &hdev->dev_flags) &&
+ !test_bit(HCI_SETUP, &hdev->dev_flags)) {
ret = -ERFKILL;
goto done;
}
@@ -1566,10 +1570,13 @@ static int hci_rfkill_set_block(void *data, bool blocked)
BT_DBG("%p name %s blocked %d", hdev, hdev->name, blocked);
- if (!blocked)
- return 0;
-
- hci_dev_do_close(hdev);
+ if (blocked) {
+ set_bit(HCI_RFKILLED, &hdev->dev_flags);
+ if (!test_bit(HCI_SETUP, &hdev->dev_flags))
+ hci_dev_do_close(hdev);
+ } else {
+ clear_bit(HCI_RFKILLED, &hdev->dev_flags);
+ }
return 0;
}
@@ -1591,9 +1598,13 @@ static void hci_power_on(struct work_struct *work)
return;
}
- if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags))
+ if (test_bit(HCI_RFKILLED, &hdev->dev_flags)) {
+ clear_bit(HCI_AUTO_OFF, &hdev->dev_flags);
+ hci_dev_do_close(hdev);
+ } else if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags)) {
queue_delayed_work(hdev->req_workqueue, &hdev->power_off,
HCI_AUTO_OFF_TIMEOUT);
+ }
if (test_and_clear_bit(HCI_SETUP, &hdev->dev_flags))
mgmt_index_added(hdev);
@@ -2209,6 +2220,9 @@ int hci_register_dev(struct hci_dev *hdev)
}
}
+ if (hdev->rfkill && rfkill_blocked(hdev->rfkill))
+ set_bit(HCI_RFKILLED, &hdev->dev_flags);
+
set_bit(HCI_SETUP, &hdev->dev_flags);
if (hdev->dev_type != HCI_AMP)
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 94aab73f89d4..8db3e89fae35 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -3557,7 +3557,11 @@ static void hci_le_ltk_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
cp.handle = cpu_to_le16(conn->handle);
if (ltk->authenticated)
- conn->sec_level = BT_SECURITY_HIGH;
+ conn->pending_sec_level = BT_SECURITY_HIGH;
+ else
+ conn->pending_sec_level = BT_SECURITY_MEDIUM;
+
+ conn->enc_key_size = ltk->enc_size;
hci_send_cmd(hdev, HCI_OP_LE_LTK_REPLY, sizeof(cp), &cp);
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index b3bb7bca8e60..63fa11109a1c 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -3755,6 +3755,13 @@ static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn,
sk = chan->sk;
+ /* For certain devices (ex: HID mouse), support for authentication,
+ * pairing and bonding is optional. For such devices, inorder to avoid
+ * the ACL alive for too long after L2CAP disconnection, reset the ACL
+ * disc_timeout back to HCI_DISCONN_TIMEOUT during L2CAP connect.
+ */
+ conn->hcon->disc_timeout = HCI_DISCONN_TIMEOUT;
+
bacpy(&bt_sk(sk)->src, conn->src);
bacpy(&bt_sk(sk)->dst, conn->dst);
chan->psm = psm;
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index 6d126faf145f..84fcf9fff3ea 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -569,7 +569,6 @@ static void rfcomm_dev_data_ready(struct rfcomm_dlc *dlc, struct sk_buff *skb)
static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err)
{
struct rfcomm_dev *dev = dlc->owner;
- struct tty_struct *tty;
if (!dev)
return;
@@ -581,38 +580,8 @@ static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err)
DPM_ORDER_DEV_AFTER_PARENT);
wake_up_interruptible(&dev->port.open_wait);
- } else if (dlc->state == BT_CLOSED) {
- tty = tty_port_tty_get(&dev->port);
- if (!tty) {
- if (test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags)) {
- /* Drop DLC lock here to avoid deadlock
- * 1. rfcomm_dev_get will take rfcomm_dev_lock
- * but in rfcomm_dev_add there's lock order:
- * rfcomm_dev_lock -> dlc lock
- * 2. tty_port_put will deadlock if it's
- * the last reference
- *
- * FIXME: when we release the lock anything
- * could happen to dev, even its destruction
- */
- rfcomm_dlc_unlock(dlc);
- if (rfcomm_dev_get(dev->id) == NULL) {
- rfcomm_dlc_lock(dlc);
- return;
- }
-
- if (!test_and_set_bit(RFCOMM_TTY_RELEASED,
- &dev->flags))
- tty_port_put(&dev->port);
-
- tty_port_put(&dev->port);
- rfcomm_dlc_lock(dlc);
- }
- } else {
- tty_hangup(tty);
- tty_kref_put(tty);
- }
- }
+ } else if (dlc->state == BT_CLOSED)
+ tty_port_tty_hangup(&dev->port, false);
}
static void rfcomm_dev_modem_status(struct rfcomm_dlc *dlc, u8 v24_sig)
diff --git a/net/core/dev.c b/net/core/dev.c
index 5c713f2239cc..65f829cfd928 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5247,10 +5247,12 @@ static int dev_new_index(struct net *net)
/* Delayed registration/unregisteration */
static LIST_HEAD(net_todo_list);
+static DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);
static void net_set_todo(struct net_device *dev)
{
list_add_tail(&dev->todo_list, &net_todo_list);
+ dev_net(dev)->dev_unreg_count++;
}
static void rollback_registered_many(struct list_head *head)
@@ -5918,6 +5920,12 @@ void netdev_run_todo(void)
if (dev->destructor)
dev->destructor(dev);
+ /* Report a network device has been unregistered */
+ rtnl_lock();
+ dev_net(dev)->dev_unreg_count--;
+ __rtnl_unlock();
+ wake_up(&netdev_unregistering_wq);
+
/* Free network device */
kobject_put(&dev->dev.kobj);
}
@@ -6603,6 +6611,34 @@ static void __net_exit default_device_exit(struct net *net)
rtnl_unlock();
}
+static void __net_exit rtnl_lock_unregistering(struct list_head *net_list)
+{
+ /* Return with the rtnl_lock held when there are no network
+ * devices unregistering in any network namespace in net_list.
+ */
+ struct net *net;
+ bool unregistering;
+ DEFINE_WAIT(wait);
+
+ for (;;) {
+ prepare_to_wait(&netdev_unregistering_wq, &wait,
+ TASK_UNINTERRUPTIBLE);
+ unregistering = false;
+ rtnl_lock();
+ list_for_each_entry(net, net_list, exit_list) {
+ if (net->dev_unreg_count > 0) {
+ unregistering = true;
+ break;
+ }
+ }
+ if (!unregistering)
+ break;
+ __rtnl_unlock();
+ schedule();
+ }
+ finish_wait(&netdev_unregistering_wq, &wait);
+}
+
static void __net_exit default_device_exit_batch(struct list_head *net_list)
{
/* At exit all network devices most be removed from a network
@@ -6614,7 +6650,18 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
struct net *net;
LIST_HEAD(dev_kill_list);
- rtnl_lock();
+ /* To prevent network device cleanup code from dereferencing
+ * loopback devices or network devices that have been freed
+ * wait here for all pending unregistrations to complete,
+ * before unregistring the loopback device and allowing the
+ * network namespace be freed.
+ *
+ * The netdev todo list containing all network devices
+ * unregistrations that happen in default_device_exit_batch
+ * will run in the rtnl_unlock() at the end of
+ * default_device_exit_batch.
+ */
+ rtnl_lock_unregistering(net_list);
list_for_each_entry(net, net_list, exit_list) {
for_each_netdev_reverse(net, dev) {
if (dev->rtnl_link_ops)
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 1929af87b260..8d7d0dd72db2 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -154,8 +154,8 @@ ipv6:
if (poff >= 0) {
__be32 *ports, _ports;
- nhoff += poff;
- ports = skb_header_pointer(skb, nhoff, sizeof(_ports), &_ports);
+ ports = skb_header_pointer(skb, nhoff + poff,
+ sizeof(_ports), &_ports);
if (ports)
flow->ports = *ports;
}
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 6a2f13cee86a..3f1ec1586ae1 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -10,11 +10,24 @@
#include <net/secure_seq.h>
-static u32 net_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned;
+#define NET_SECRET_SIZE (MD5_MESSAGE_BYTES / 4)
-void net_secret_init(void)
+static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned;
+
+static void net_secret_init(void)
{
- get_random_bytes(net_secret, sizeof(net_secret));
+ u32 tmp;
+ int i;
+
+ if (likely(net_secret[0]))
+ return;
+
+ for (i = NET_SECRET_SIZE; i > 0;) {
+ do {
+ get_random_bytes(&tmp, sizeof(tmp));
+ } while (!tmp);
+ cmpxchg(&net_secret[--i], 0, tmp);
+ }
}
#ifdef CONFIG_INET
@@ -42,6 +55,7 @@ __u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
u32 hash[MD5_DIGEST_WORDS];
u32 i;
+ net_secret_init();
memcpy(hash, saddr, 16);
for (i = 0; i < 4; i++)
secret[i] = net_secret[i] + (__force u32)daddr[i];
@@ -63,6 +77,7 @@ u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
u32 hash[MD5_DIGEST_WORDS];
u32 i;
+ net_secret_init();
memcpy(hash, saddr, 16);
for (i = 0; i < 4; i++)
secret[i] = net_secret[i] + (__force u32) daddr[i];
@@ -82,6 +97,7 @@ __u32 secure_ip_id(__be32 daddr)
{
u32 hash[MD5_DIGEST_WORDS];
+ net_secret_init();
hash[0] = (__force __u32) daddr;
hash[1] = net_secret[13];
hash[2] = net_secret[14];
@@ -96,6 +112,7 @@ __u32 secure_ipv6_id(const __be32 daddr[4])
{
__u32 hash[4];
+ net_secret_init();
memcpy(hash, daddr, 16);
md5_transform(hash, net_secret);
@@ -107,6 +124,7 @@ __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
{
u32 hash[MD5_DIGEST_WORDS];
+ net_secret_init();
hash[0] = (__force u32)saddr;
hash[1] = (__force u32)daddr;
hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
@@ -121,6 +139,7 @@ u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
{
u32 hash[MD5_DIGEST_WORDS];
+ net_secret_init();
hash[0] = (__force u32)saddr;
hash[1] = (__force u32)daddr;
hash[2] = (__force u32)dport ^ net_secret[14];
@@ -140,6 +159,7 @@ u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
u32 hash[MD5_DIGEST_WORDS];
u64 seq;
+ net_secret_init();
hash[0] = (__force u32)saddr;
hash[1] = (__force u32)daddr;
hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
@@ -164,6 +184,7 @@ u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,
u64 seq;
u32 i;
+ net_secret_init();
memcpy(hash, saddr, 16);
for (i = 0; i < 4; i++)
secret[i] = net_secret[i] + daddr[i];
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 7a1874b7b8fd..cfeb85cff4f0 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -263,10 +263,8 @@ void build_ehash_secret(void)
get_random_bytes(&rnd, sizeof(rnd));
} while (rnd == 0);
- if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0) {
+ if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0)
get_random_bytes(&ipv6_hash_secret, sizeof(ipv6_hash_secret));
- net_secret_init();
- }
}
EXPORT_SYMBOL(build_ehash_secret);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index dace87f06e5f..7defdc9ba167 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -736,7 +736,7 @@ static void igmp_gq_timer_expire(unsigned long data)
in_dev->mr_gq_running = 0;
igmpv3_send_report(in_dev, NULL);
- __in_dev_put(in_dev);
+ in_dev_put(in_dev);
}
static void igmp_ifc_timer_expire(unsigned long data)
@@ -749,7 +749,7 @@ static void igmp_ifc_timer_expire(unsigned long data)
igmp_ifc_start_timer(in_dev,
unsolicited_report_interval(in_dev));
}
- __in_dev_put(in_dev);
+ in_dev_put(in_dev);
}
static void igmp_ifc_event(struct in_device *in_dev)
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index ac9fabe0300f..63a6d6d6b875 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -623,6 +623,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
tunnel->err_count = 0;
}
+ tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
ttl = tnl_params->ttl;
if (ttl == 0) {
if (skb->protocol == htons(ETH_P_IP))
@@ -641,18 +642,17 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
+ rt->dst.header_len;
- if (max_headroom > dev->needed_headroom) {
+ if (max_headroom > dev->needed_headroom)
dev->needed_headroom = max_headroom;
- if (skb_cow_head(skb, dev->needed_headroom)) {
- dev->stats.tx_dropped++;
- dev_kfree_skb(skb);
- return;
- }
+
+ if (skb_cow_head(skb, dev->needed_headroom)) {
+ dev->stats.tx_dropped++;
+ dev_kfree_skb(skb);
+ return;
}
err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol,
- ip_tunnel_ecn_encap(tos, inner_iph, skb), ttl, df,
- !net_eq(tunnel->net, dev_net(dev)));
+ tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
return;
@@ -853,8 +853,10 @@ int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
/* FB netdevice is special: we have one, and only one per netns.
* Allowing to move it to another netns is clearly unsafe.
*/
- if (!IS_ERR(itn->fb_tunnel_dev))
+ if (!IS_ERR(itn->fb_tunnel_dev)) {
itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
+ ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
+ }
rtnl_unlock();
return PTR_RET(itn->fb_tunnel_dev);
@@ -884,8 +886,6 @@ static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
if (!net_eq(dev_net(t->dev), net))
unregister_netdevice_queue(t->dev, head);
}
- if (itn->fb_tunnel_dev)
- unregister_netdevice_queue(itn->fb_tunnel_dev, head);
}
void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index d6c856b17fd4..c31e3ad98ef2 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -61,7 +61,7 @@ int iptunnel_xmit(struct rtable *rt, struct sk_buff *skb,
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
/* Push down and install the IP header. */
- __skb_push(skb, sizeof(struct iphdr));
+ skb_push(skb, sizeof(struct iphdr));
skb_reset_network_header(skb);
iph = ip_hdr(skb);
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index 67e17dcda65e..b6346bf2fde3 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -267,7 +267,8 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par)
if (th == NULL)
return NF_DROP;
- synproxy_parse_options(skb, par->thoff, th, &opts);
+ if (!synproxy_parse_options(skb, par->thoff, th, &opts))
+ return NF_DROP;
if (th->syn && !(th->ack || th->fin || th->rst)) {
/* Initial SYN from client */
@@ -350,7 +351,8 @@ static unsigned int ipv4_synproxy_hook(unsigned int hooknum,
/* fall through */
case TCP_CONNTRACK_SYN_SENT:
- synproxy_parse_options(skb, thoff, th, &opts);
+ if (!synproxy_parse_options(skb, thoff, th, &opts))
+ return NF_DROP;
if (!th->syn && th->ack &&
CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
@@ -373,7 +375,9 @@ static unsigned int ipv4_synproxy_hook(unsigned int hooknum,
if (!th->syn || !th->ack)
break;
- synproxy_parse_options(skb, thoff, th, &opts);
+ if (!synproxy_parse_options(skb, thoff, th, &opts))
+ return NF_DROP;
+
if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
synproxy->tsoff = opts.tsval - synproxy->its;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index bfec521c717f..193db03540ad 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -218,8 +218,10 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
ipv4_sk_update_pmtu(skb, sk, info);
- else if (type == ICMP_REDIRECT)
+ else if (type == ICMP_REDIRECT) {
ipv4_sk_redirect(skb, sk);
+ return;
+ }
/* Report error on raw socket, if:
1. User requested ip_recverr.
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 7c83cb8bf137..e6bb8256e59f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -895,8 +895,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
skb_orphan(skb);
skb->sk = sk;
- skb->destructor = (sysctl_tcp_limit_output_bytes > 0) ?
- tcp_wfree : sock_wfree;
+ skb->destructor = tcp_wfree;
atomic_add(skb->truesize, &sk->sk_wmem_alloc);
/* Build TCP header and checksum it. */
@@ -1840,7 +1839,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
while ((skb = tcp_send_head(sk))) {
unsigned int limit;
-
tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
BUG_ON(!tso_segs);
@@ -1869,13 +1867,20 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
break;
}
- /* TSQ : sk_wmem_alloc accounts skb truesize,
- * including skb overhead. But thats OK.
+ /* TCP Small Queues :
+ * Control number of packets in qdisc/devices to two packets / or ~1 ms.
+ * This allows for :
+ * - better RTT estimation and ACK scheduling
+ * - faster recovery
+ * - high rates
*/
- if (atomic_read(&sk->sk_wmem_alloc) >= sysctl_tcp_limit_output_bytes) {
+ limit = max(skb->truesize, sk->sk_pacing_rate >> 10);
+
+ if (atomic_read(&sk->sk_wmem_alloc) > limit) {
set_bit(TSQ_THROTTLED, &tp->tsq_flags);
break;
}
+
limit = mss_now;
if (tso_segs > 1 && !tcp_urg_mode(tp))
limit = tcp_mss_split_point(sk, skb, mss_now,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 74d2c95db57f..0ca44df51ee9 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -658,7 +658,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
break;
case ICMP_REDIRECT:
ipv4_sk_redirect(skb, sk);
- break;
+ goto out;
}
/*
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index d6ff12617f36..cd3fb301da38 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1499,6 +1499,33 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
return false;
}
+/* Compares an address/prefix_len with addresses on device @dev.
+ * If one is found it returns true.
+ */
+bool ipv6_chk_custom_prefix(const struct in6_addr *addr,
+ const unsigned int prefix_len, struct net_device *dev)
+{
+ struct inet6_dev *idev;
+ struct inet6_ifaddr *ifa;
+ bool ret = false;
+
+ rcu_read_lock();
+ idev = __in6_dev_get(dev);
+ if (idev) {
+ read_lock_bh(&idev->lock);
+ list_for_each_entry(ifa, &idev->addr_list, if_list) {
+ ret = ipv6_prefix_equal(addr, &ifa->addr, prefix_len);
+ if (ret)
+ break;
+ }
+ read_unlock_bh(&idev->lock);
+ }
+ rcu_read_unlock();
+
+ return ret;
+}
+EXPORT_SYMBOL(ipv6_chk_custom_prefix);
+
int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev)
{
struct inet6_dev *idev;
@@ -2193,43 +2220,21 @@ ok:
else
stored_lft = 0;
if (!update_lft && !create && stored_lft) {
- if (valid_lft > MIN_VALID_LIFETIME ||
- valid_lft > stored_lft)
- update_lft = 1;
- else if (stored_lft <= MIN_VALID_LIFETIME) {
- /* valid_lft <= stored_lft is always true */
- /*
- * RFC 4862 Section 5.5.3e:
- * "Note that the preferred lifetime of
- * the corresponding address is always
- * reset to the Preferred Lifetime in
- * the received Prefix Information
- * option, regardless of whether the
- * valid lifetime is also reset or
- * ignored."
- *
- * So if the preferred lifetime in
- * this advertisement is different
- * than what we have stored, but the
- * valid lifetime is invalid, just
- * reset prefered_lft.
- *
- * We must set the valid lifetime
- * to the stored lifetime since we'll
- * be updating the timestamp below,
- * else we'll set it back to the
- * minimum.
- */
- if (prefered_lft != ifp->prefered_lft) {
- valid_lft = stored_lft;
- update_lft = 1;
- }
- } else {
- valid_lft = MIN_VALID_LIFETIME;
- if (valid_lft < prefered_lft)
- prefered_lft = valid_lft;
- update_lft = 1;
- }
+ const u32 minimum_lft = min(
+ stored_lft, (u32)MIN_VALID_LIFETIME);
+ valid_lft = max(valid_lft, minimum_lft);
+
+ /* RFC4862 Section 5.5.3e:
+ * "Note that the preferred lifetime of the
+ * corresponding address is always reset to
+ * the Preferred Lifetime in the received
+ * Prefix Information option, regardless of
+ * whether the valid lifetime is also reset or
+ * ignored."
+ *
+ * So we should always update prefered_lft here.
+ */
+ update_lft = 1;<