From 6c3118e2305326743acb52250bcfd0d52389d9dc Mon Sep 17 00:00:00 2001
From: Vasanthakumar Thiagarajan <vasanth@atheros.com>
Date: Wed, 23 Jun 2010 06:49:21 -0700
Subject: ath9k: Fix bug in starting ani

There are few places where ANI is started without checking
if it is right to start. This might lead to a case where ani
timer would be left undeleted and cause improper memory acccess
during module unload. This bug is clearly exposed with
paprd support where the driver detects tx hang and does a
chip reset. During this reset ani is (re)started without checking
if it needs to be started. This would leave a timer scheduled
even after all the resources are freed and cause a panic.

This patch introduces a bit in sc_flags to indicate if ani
needs to be started in sw_scan_start() and ath_reset().
This would fix the following panic. This issue is easily seen
with ar9003 + paprd.

 BUG: unable to handle kernel paging request at 0000000000003f38
[<ffffffff81075391>] ? __queue_work+0x41/0x50
[<ffffffff8106afaa>] run_timer_softirq+0x17a/0x370
[<ffffffff81088be8>] ? tick_dev_program_event+0x48/0x110
[<ffffffff81061f69>] __do_softirq+0xb9/0x1f0
[<ffffffff810ba060>] ? handle_IRQ_event+0x50/0x160
[<ffffffff8100af5c>] call_softirq+0x1c/0x30
[<ffffffff8100c9f5>] do_softirq+0x65/0xa0
[<ffffffff81061e25>] irq_exit+0x85/0x90
[<ffffffff8155e095>] do_IRQ+0x75/0xf0
[<ffffffff815570d3>] ret_from_intr+0x0/0x11
<EOI>
[<ffffffff812fd67b>] ? acpi_idle_enter_simple+0xe4/0x119
[<ffffffff812fd674>] ? acpi_idle_enter_simple+0xdd/0x119
[<ffffffff81441c87>] cpuidle_idle_call+0xa7/0x140
[<ffffffff81008da3>] cpu_idle+0xb3/0x110
[<ffffffff81550722>] start_secondary+0x1ee/0x1f5

Signed-off-by: Vasanthakumar Thiagarajan <vasanth@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath/ath9k/ath9k.h |  1 +
 drivers/net/wireless/ath/ath9k/main.c  | 11 ++++++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)
diff --git a/drivers/net/wireless/ath/ath9k/ath9k.h b/drivers/net/wireless/ath/ath9k/ath9k.h
index fbb7dec6ddeb..5ea87736a6ae 100644
--- a/drivers/net/wireless/ath/ath9k/ath9k.h
+++ b/drivers/net/wireless/ath/ath9k/ath9k.h
@@ -445,6 +445,7 @@ void ath_deinit_leds(struct ath_softc *sc);
 #define SC_OP_TSF_RESET              BIT(11)
 #define SC_OP_BT_PRIORITY_DETECTED   BIT(12)
 #define SC_OP_BT_SCAN		     BIT(13)
+#define SC_OP_ANI_RUN		     BIT(14)
 
 /* Powersave flags */
 #define PS_WAIT_FOR_BEACON        BIT(0)
diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c
index abfa0493236f..1e2a68ea9355 100644
--- a/drivers/net/wireless/ath/ath9k/main.c
+++ b/drivers/net/wireless/ath/ath9k/main.c
@@ -336,6 +336,10 @@ set_timer:
 static void ath_start_ani(struct ath_common *common)
 {
 	unsigned long timestamp = jiffies_to_msecs(jiffies);
+	struct ath_softc *sc = (struct ath_softc *) common->priv;
+
+	if (!(sc->sc_flags & SC_OP_ANI_RUN))
+		return;
 
 	common->ani.longcal_timer = timestamp;
 	common->ani.shortcal_timer = timestamp;
@@ -872,11 +876,13 @@ static void ath9k_bss_assoc_info(struct ath_softc *sc,
 		/* Reset rssi stats */
 		sc->sc_ah->stats.avgbrssi = ATH_RSSI_DUMMY_MARKER;
 
+		sc->sc_flags |= SC_OP_ANI_RUN;
 		ath_start_ani(common);
 	} else {
 		ath_print(common, ATH_DBG_CONFIG, "Bss Info DISASSOC\n");
 		common->curaid = 0;
 		/* Stop ANI */
+		sc->sc_flags &= ~SC_OP_ANI_RUN;
 		del_timer_sync(&common->ani.timer);
 	}
 }
@@ -1478,8 +1484,10 @@ static int ath9k_add_interface(struct ieee80211_hw *hw,
 
 	if (vif->type == NL80211_IFTYPE_AP    ||
 	    vif->type == NL80211_IFTYPE_ADHOC ||
-	    vif->type == NL80211_IFTYPE_MONITOR)
+	    vif->type == NL80211_IFTYPE_MONITOR) {
+		sc->sc_flags |= SC_OP_ANI_RUN;
 		ath_start_ani(common);
+	}
 
 out:
 	mutex_unlock(&sc->mutex);
@@ -1500,6 +1508,7 @@ static void ath9k_remove_interface(struct ieee80211_hw *hw,
 	mutex_lock(&sc->mutex);
 
 	/* Stop ANI */
+	sc->sc_flags &= ~SC_OP_ANI_RUN;
 	del_timer_sync(&common->ani.timer);
 
 	/* Reclaim beacon resources */
-- 
cgit v1.2.3


From d1e89f37de2845db364ef6d67586cd882f86b557 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 18 Jun 2010 03:41:25 -0700
Subject: iwlwifi: fix multicast

commit 3474ad635db371b0d8d0ee40086f15d223d5b6a4
Author: Johannes Berg <johannes.berg@intel.com>
Date:   Thu Apr 29 04:43:05 2010 -0700

    iwlwifi: apply filter flags directly

broke multicast. The reason, it turns out, is that
the code previously checked if ALLMULTI _changed_,
which the new code no longer did, and normally it
_never_ changes. Had somebody changed it manually,
the code prior to my patch there would have been
broken already.

The reason is that we always, unconditionally, ask
the device to pass up all multicast frames, but the
new code made it depend on ALLMULTI which broke it
since now we'd pass up multicast frames depending
on the default filter in the device, which isn't
necessarily what we want (since we don't program it
right now).

Fix this by simply not checking allmulti as we have
allmulti behaviour enabled already anyway.

Reported-by: Maxim Levitsky <maximlevitsky@gmail.com>
Tested-by: Maxim Levitsky <maximlevitsky@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/iwlwifi/iwl-core.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c
index 426e95567de3..5bbc5298ef96 100644
--- a/drivers/net/wireless/iwlwifi/iwl-core.c
+++ b/drivers/net/wireless/iwlwifi/iwl-core.c
@@ -1314,7 +1314,6 @@ void iwl_configure_filter(struct ieee80211_hw *hw,
 			changed_flags, *total_flags);
 
 	CHK(FIF_OTHER_BSS | FIF_PROMISC_IN_BSS, RXON_FILTER_PROMISC_MSK);
-	CHK(FIF_ALLMULTI, RXON_FILTER_ACCEPT_GRP_MSK);
 	CHK(FIF_CONTROL, RXON_FILTER_CTL2HOST_MSK);
 	CHK(FIF_BCN_PRBRESP_PROMISC, RXON_FILTER_BCON_AWARE_MSK);
 
@@ -1329,6 +1328,12 @@ void iwl_configure_filter(struct ieee80211_hw *hw,
 
 	mutex_unlock(&priv->mutex);
 
+	/*
+	 * Receiving all multicast frames is always enabled by the
+	 * default flags setup in iwl_connection_init_rx_config()
+	 * since we currently do not support programming multicast
+	 * filters into the device.
+	 */
 	*total_flags &= FIF_OTHER_BSS | FIF_ALLMULTI | FIF_PROMISC_IN_BSS |
 			FIF_BCN_PRBRESP_PROMISC | FIF_CONTROL;
 }
-- 
cgit v1.2.3


From 062bee448bd539580ef9f64efe50fdfe04eeb103 Mon Sep 17 00:00:00 2001
From: Wey-Yi Guy <wey-yi.w.guy@intel.com>
Date: Fri, 18 Jun 2010 11:33:17 -0700
Subject: iwlwifi: set TX_CMD_FLAG_PROT_REQUIRE_MSK in tx_flag

When building tx command, always set TX_CMD_FLAG_PROT_REQUIRE_MSK
for 5000 series and up.

Without setting this bit the firmware will not examine the RTS/CTS setting
and thus not send traffic with the appropriate protection. RTS/CTS is is
required for HT traffic in a noisy environment where, without this setting,
connections will stall on some hardware as documented in the patch that
initially attempted to address this:

    commit 1152dcc28c66a74b5b3f1a3ede0aa6729bfd48e4
    Author: Wey-Yi Guy <wey-yi.w.guy@intel.com>
    Date:   Fri Jan 15 13:42:58 2010 -0800

    iwlwifi: Fix throughput stall issue in HT mode for 5000

    Similar to 6000 and 1000 series, RTS/CTS is the recommended
    protection mechanism for 5000 series in HT mode based on the HW design.
    Using RTS/CTS will better protect the inner exchange from interference,
    especially in highly-congested environment, it also prevent uCode encounter
    TX FIFO underrun and other HT mode related performance issues.

For 3945 and 4965, different flags are used for RTS/CTS or CTS-to-Self
protection.

Signed-off-by: Wey-Yi Guy <wey-yi.w.guy@intel.com>
Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
---
 drivers/net/wireless/iwlwifi/iwl-agn-hcmd.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/net/wireless/iwlwifi/iwl-agn-hcmd.c b/drivers/net/wireless/iwlwifi/iwl-agn-hcmd.c
index 44ef5d93befc..01658cf82d39 100644
--- a/drivers/net/wireless/iwlwifi/iwl-agn-hcmd.c
+++ b/drivers/net/wireless/iwlwifi/iwl-agn-hcmd.c
@@ -212,11 +212,7 @@ static void iwlagn_chain_noise_reset(struct iwl_priv *priv)
 static void iwlagn_rts_tx_cmd_flag(struct ieee80211_tx_info *info,
 			__le32 *tx_flags)
 {
-	if ((info->control.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) ||
-	    (info->control.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT))
-		*tx_flags |= TX_CMD_FLG_RTS_CTS_MSK;
-	else
-		*tx_flags &= ~TX_CMD_FLG_RTS_CTS_MSK;
+	*tx_flags |= TX_CMD_FLG_RTS_CTS_MSK;
 }
 
 /* Calc max signal level (dBm) among 3 possible receivers */
-- 
cgit v1.2.3


From d5675bd204efd87a174eeea592de23c4c4e7f908 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 24 Jun 2010 16:59:59 +0300
Subject: vhost: break out of polling loop on error

When ring parsing fails, we currently handle this
as ring empty condition. This means that we enable
kicks and recheck ring empty: if this not empty,
we re-start polling which of course will fail again.

Instead, let's return a negative error code and stop polling.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/vhost/net.c   | 12 ++++++++++--
 drivers/vhost/vhost.c | 33 +++++++++++++++++----------------
 drivers/vhost/vhost.h |  8 ++++----
 3 files changed, 31 insertions(+), 22 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 0f41c9195e9b..54096eef4840 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -98,7 +98,8 @@ static void tx_poll_start(struct vhost_net *net, struct socket *sock)
 static void handle_tx(struct vhost_net *net)
 {
 	struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_TX];
-	unsigned head, out, in, s;
+	unsigned out, in, s;
+	int head;
 	struct msghdr msg = {
 		.msg_name = NULL,
 		.msg_namelen = 0,
@@ -135,6 +136,9 @@ static void handle_tx(struct vhost_net *net)
 					 ARRAY_SIZE(vq->iov),
 					 &out, &in,
 					 NULL, NULL);
+		/* On error, stop handling until the next kick. */
+		if (head < 0)
+			break;
 		/* Nothing new?  Wait for eventfd to tell us they refilled. */
 		if (head == vq->num) {
 			wmem = atomic_read(&sock->sk->sk_wmem_alloc);
@@ -192,7 +196,8 @@ static void handle_tx(struct vhost_net *net)
 static void handle_rx(struct vhost_net *net)
 {
 	struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX];
-	unsigned head, out, in, log, s;
+	unsigned out, in, log, s;
+	int head;
 	struct vhost_log *vq_log;
 	struct msghdr msg = {
 		.msg_name = NULL,
@@ -228,6 +233,9 @@ static void handle_rx(struct vhost_net *net)
 					 ARRAY_SIZE(vq->iov),
 					 &out, &in,
 					 vq_log, &log);
+		/* On error, stop handling until the next kick. */
+		if (head < 0)
+			break;
 		/* OK, now we need to know about added descriptors. */
 		if (head == vq->num) {
 			if (unlikely(vhost_enable_notify(vq))) {
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 3b83382e06eb..5ccd384ec0be 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -873,12 +873,13 @@ static unsigned get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq,
  * number of output then some number of input descriptors, it's actually two
  * iovecs, but we pack them into one and note how many of each there were.
  *
- * This function returns the descriptor number found, or vq->num (which
- * is never a valid descriptor number) if none was found. */
-unsigned vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq,
-			   struct iovec iov[], unsigned int iov_size,
-			   unsigned int *out_num, unsigned int *in_num,
-			   struct vhost_log *log, unsigned int *log_num)
+ * This function returns the descriptor number found, or vq->num (which is
+ * never a valid descriptor number) if none was found.  A negative code is
+ * returned on error. */
+int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq,
+		      struct iovec iov[], unsigned int iov_size,
+		      unsigned int *out_num, unsigned int *in_num,
+		      struct vhost_log *log, unsigned int *log_num)
 {
 	struct vring_desc desc;
 	unsigned int i, head, found = 0;
@@ -890,13 +891,13 @@ unsigned vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq,
 	if (get_user(vq->avail_idx, &vq->avail->idx)) {
 		vq_err(vq, "Failed to access avail idx at %p\n",
 		       &vq->avail->idx);
-		return vq->num;
+		return -EFAULT;
 	}
 
 	if ((u16)(vq->avail_idx - last_avail_idx) > vq->num) {
 		vq_err(vq, "Guest moved used index from %u to %u",
 		       last_avail_idx, vq->avail_idx);
-		return vq->num;
+		return -EFAULT;
 	}
 
 	/* If there's nothing new since last we looked, return invalid. */
@@ -912,14 +913,14 @@ unsigned vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq,
 		vq_err(vq, "Failed to read head: idx %d address %p\n",
 		       last_avail_idx,
 		       &vq->avail->ring[last_avail_idx % vq->num]);
-		return vq->num;
+		return -EFAULT;
 	}
 
 	/* If their number is silly, that's an error. */
 	if (head >= vq->num) {
 		vq_err(vq, "Guest says index %u > %u is available",
 		       head, vq->num);
-		return vq->num;
+		return -EINVAL;
 	}
 
 	/* When we start there are none of either input nor output. */
@@ -933,19 +934,19 @@ unsigned vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq,
 		if (i >= vq->num) {
 			vq_err(vq, "Desc index is %u > %u, head = %u",
 			       i, vq->num, head);
-			return vq->num;
+			return -EINVAL;
 		}
 		if (++found > vq->num) {
 			vq_err(vq, "Loop detected: last one at %u "
 			       "vq size %u head %u\n",
 			       i, vq->num, head);
-			return vq->num;
+			return -EINVAL;
 		}
 		ret = copy_from_user(&desc, vq->desc + i, sizeof desc);
 		if (ret) {
 			vq_err(vq, "Failed to get descriptor: idx %d addr %p\n",
 			       i, vq->desc + i);
-			return vq->num;
+			return -EFAULT;
 		}
 		if (desc.flags & VRING_DESC_F_INDIRECT) {
 			ret = get_indirect(dev, vq, iov, iov_size,
@@ -954,7 +955,7 @@ unsigned vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq,
 			if (ret < 0) {
 				vq_err(vq, "Failure detected "
 				       "in indirect descriptor at idx %d\n", i);
-				return vq->num;
+				return ret;
 			}
 			continue;
 		}
@@ -964,7 +965,7 @@ unsigned vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq,
 		if (ret < 0) {
 			vq_err(vq, "Translation failure %d descriptor idx %d\n",
 			       ret, i);
-			return vq->num;
+			return ret;
 		}
 		if (desc.flags & VRING_DESC_F_WRITE) {
 			/* If this is an input descriptor,
@@ -981,7 +982,7 @@ unsigned vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq,
 			if (*in_num) {
 				vq_err(vq, "Descriptor has out after in: "
 				       "idx %d\n", i);
-				return vq->num;
+				return -EINVAL;
 			}
 			*out_num += ret;
 		}
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 44591ba9b07a..11ee13dba0f7 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -120,10 +120,10 @@ long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, unsigned long arg);
 int vhost_vq_access_ok(struct vhost_virtqueue *vq);
 int vhost_log_access_ok(struct vhost_dev *);
 
-unsigned vhost_get_vq_desc(struct vhost_dev *, struct vhost_virtqueue *,
-			   struct iovec iov[], unsigned int iov_count,
-			   unsigned int *out_num, unsigned int *in_num,
-			   struct vhost_log *log, unsigned int *log_num);
+int vhost_get_vq_desc(struct vhost_dev *, struct vhost_virtqueue *,
+		      struct iovec iov[], unsigned int iov_count,
+		      unsigned int *out_num, unsigned int *in_num,
+		      struct vhost_log *log, unsigned int *log_num);
 void vhost_discard_vq_desc(struct vhost_virtqueue *);
 
 int vhost_add_used(struct vhost_virtqueue *, unsigned int head, int len);
-- 
cgit v1.2.3


From c22d7ac844f1cb9c6a5fd20f89ebadc2feef891b Mon Sep 17 00:00:00 2001
From: Andy Gospodarek <andy@greyhouse.net>
Date: Fri, 25 Jun 2010 09:50:44 +0000
Subject: bonding: prevent netpoll over bonded interfaces

Support for netpoll over bonded interfaces was added here:

	commit f6dc31a85cd46a959bdd987adad14c3b645e03c1
	Author: WANG Cong <amwang@redhat.com>
	Date:   Thu May 6 00:48:51 2010 -0700

	    bonding: make bonding support netpoll

but it is bad enough that we should probably just disable netpoll over
bonding until some of the locking logic in the bonding driver is changed
or converted completely to RCU.  Simple actions like changing the active
slave in active-backup mode will hang the box if a high enough printk
debugging level is enabled.

Keeping the old code around will be good for anyone that wants to work
on it (and for after the RCU conversion), so I propose this small patch
rather than ripping it all out.

Signed-off-by: Andy Gospodarek <andy@greyhouse.net>
Signed-off-by: Jay Vosburgh <fubar@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 33 ++++++++++++++++++++++-----------
 1 file changed, 22 insertions(+), 11 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 5e12462a9d5e..c3d98dde2f86 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -168,7 +168,7 @@ static int arp_ip_count;
 static int bond_mode	= BOND_MODE_ROUNDROBIN;
 static int xmit_hashtype = BOND_XMIT_POLICY_LAYER2;
 static int lacp_fast;
-
+static int disable_netpoll = 1;
 
 const struct bond_parm_tbl bond_lacp_tbl[] = {
 {	"slow",		AD_LACP_SLOW},
@@ -1742,15 +1742,23 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 	bond_set_carrier(bond);
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
-	if (slaves_support_netpoll(bond_dev)) {
-		bond_dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
-		if (bond_dev->npinfo)
-			slave_dev->npinfo = bond_dev->npinfo;
-	} else if (!(bond_dev->priv_flags & IFF_DISABLE_NETPOLL)) {
+	/*
+	 * Netpoll and bonding is broken, make sure it is not initialized
+	 * until it is fixed.
+	 */
+	if (disable_netpoll) {
 		bond_dev->priv_flags |= IFF_DISABLE_NETPOLL;
-		pr_info("New slave device %s does not support netpoll\n",
-			slave_dev->name);
-		pr_info("Disabling netpoll support for %s\n", bond_dev->name);
+	} else {
+		if (slaves_support_netpoll(bond_dev)) {
+			bond_dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
+			if (bond_dev->npinfo)
+				slave_dev->npinfo = bond_dev->npinfo;
+		} else if (!(bond_dev->priv_flags & IFF_DISABLE_NETPOLL)) {
+			bond_dev->priv_flags |= IFF_DISABLE_NETPOLL;
+			pr_info("New slave device %s does not support netpoll\n",
+				slave_dev->name);
+			pr_info("Disabling netpoll support for %s\n", bond_dev->name);
+		}
 	}
 #endif
 	read_unlock(&bond->lock);
@@ -1950,8 +1958,11 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	read_lock_bh(&bond->lock);
-	if (slaves_support_netpoll(bond_dev))
-		bond_dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
+
+	 /* Make sure netpoll over stays disabled until fixed. */
+	if (!disable_netpoll)
+		if (slaves_support_netpoll(bond_dev))
+				bond_dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
 	read_unlock_bh(&bond->lock);
 	if (slave_dev->netdev_ops->ndo_netpoll_cleanup)
 		slave_dev->netdev_ops->ndo_netpoll_cleanup(slave_dev);
-- 
cgit v1.2.3


From db048b69037e7fa6a7d9e95a1271a50dc08ae233 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Mon, 28 Jun 2010 08:44:07 +0000
Subject: ethtool: Fix potential kernel buffer overflow in ETHTOOL_GRXCLSRLALL

On a 32-bit machine, info.rule_cnt >= 0x40000000 leads to integer
overflow and the buffer may be smaller than needed.  Since
ETHTOOL_GRXCLSRLALL is unprivileged, this can presumably be used for at
least denial of service.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Cc: stable@kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/ethtool.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index a0f4964033d2..a3a7e9a48dff 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -347,8 +347,9 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
 
 	if (info.cmd == ETHTOOL_GRXCLSRLALL) {
 		if (info.rule_cnt > 0) {
-			rule_buf = kmalloc(info.rule_cnt * sizeof(u32),
-					   GFP_USER);
+			if (info.rule_cnt <= KMALLOC_MAX_SIZE / sizeof(u32))
+				rule_buf = kmalloc(info.rule_cnt * sizeof(u32),
+						   GFP_USER);
 			if (!rule_buf)
 				return -ENOMEM;
 		}
-- 
cgit v1.2.3


From bf988435bd5b53529f4408a8efb1f433f6ddfda9 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Mon, 28 Jun 2010 08:45:58 +0000
Subject: ethtool: Fix potential user buffer overflow for ETHTOOL_{G, S}RXFH

struct ethtool_rxnfc was originally defined in 2.6.27 for the
ETHTOOL_{G,S}RXFH command with only the cmd, flow_type and data
fields.  It was then extended in 2.6.30 to support various additional
commands.  These commands should have been defined to use a new
structure, but it is too late to change that now.

Since user-space may still be using the old structure definition
for the ETHTOOL_{G,S}RXFH commands, and since they do not need the
additional fields, only copy the originally defined fields to and
from user-space.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Cc: stable@kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h |  2 ++
 net/core/ethtool.c      | 36 +++++++++++++++++++++++++++---------
 2 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 276b40a16835..b4207ca3ad52 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -379,6 +379,8 @@ struct ethtool_rxnfc {
 	__u32				flow_type;
 	/* The rx flow hash value or the rule DB size */
 	__u64				data;
+	/* The following fields are not valid and must not be used for
+	 * the ETHTOOL_{G,X}RXFH commands. */
 	struct ethtool_rx_flow_spec	fs;
 	__u32				rule_cnt;
 	__u32				rule_locs[0];
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index a3a7e9a48dff..75e4ffeb8cc9 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -318,23 +318,33 @@ out:
 }
 
 static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
-						void __user *useraddr)
+						u32 cmd, void __user *useraddr)
 {
-	struct ethtool_rxnfc cmd;
+	struct ethtool_rxnfc info;
+	size_t info_size = sizeof(info);
 
 	if (!dev->ethtool_ops->set_rxnfc)
 		return -EOPNOTSUPP;
 
-	if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
+	/* struct ethtool_rxnfc was originally defined for
+	 * ETHTOOL_{G,S}RXFH with only the cmd, flow_type and data
+	 * members.  User-space might still be using that
+	 * definition. */
+	if (cmd == ETHTOOL_SRXFH)
+		info_size = (offsetof(struct ethtool_rxnfc, data) +
+			     sizeof(info.data));
+
+	if (copy_from_user(&info, useraddr, info_size))
 		return -EFAULT;
 
-	return dev->ethtool_ops->set_rxnfc(dev, &cmd);
+	return dev->ethtool_ops->set_rxnfc(dev, &info);
 }
 
 static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
-						void __user *useraddr)
+						u32 cmd, void __user *useraddr)
 {
 	struct ethtool_rxnfc info;
+	size_t info_size = sizeof(info);
 	const struct ethtool_ops *ops = dev->ethtool_ops;
 	int ret;
 	void *rule_buf = NULL;
@@ -342,7 +352,15 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
 	if (!ops->get_rxnfc)
 		return -EOPNOTSUPP;
 
-	if (copy_from_user(&info, useraddr, sizeof(info)))
+	/* struct ethtool_rxnfc was originally defined for
+	 * ETHTOOL_{G,S}RXFH with only the cmd, flow_type and data
+	 * members.  User-space might still be using that
+	 * definition. */
+	if (cmd == ETHTOOL_GRXFH)
+		info_size = (offsetof(struct ethtool_rxnfc, data) +
+			     sizeof(info.data));
+
+	if (copy_from_user(&info, useraddr, info_size))
 		return -EFAULT;
 
 	if (info.cmd == ETHTOOL_GRXCLSRLALL) {
@@ -360,7 +378,7 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
 		goto err_out;
 
 	ret = -EFAULT;
-	if (copy_to_user(useraddr, &info, sizeof(info)))
+	if (copy_to_user(useraddr, &info, info_size))
 		goto err_out;
 
 	if (rule_buf) {
@@ -1517,12 +1535,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_GRXCLSRLCNT:
 	case ETHTOOL_GRXCLSRULE:
 	case ETHTOOL_GRXCLSRLALL:
-		rc = ethtool_get_rxnfc(dev, useraddr);
+		rc = ethtool_get_rxnfc(dev, ethcmd, useraddr);
 		break;
 	case ETHTOOL_SRXFH:
 	case ETHTOOL_SRXCLSRLDEL:
 	case ETHTOOL_SRXCLSRLINS:
-		rc = ethtool_set_rxnfc(dev, useraddr);
+		rc = ethtool_set_rxnfc(dev, ethcmd, useraddr);
 		break;
 	case ETHTOOL_GGRO:
 		rc = ethtool_get_gro(dev, useraddr);
-- 
cgit v1.2.3


From fa37813401ff52d78591c262d6542e4d5d935584 Mon Sep 17 00:00:00 2001
From: Andy Gospodarek <andy@greyhouse.net>
Date: Tue, 29 Jun 2010 18:28:12 +0000
Subject: ixgbe: fix panic when shutting down system with WoL enabled

This patch added to 2.6.34:

	commit 5f6c01819979afbfec7e0b15fe52371b8eed87e8
	Author: Jesse Brandeburg <jesse.brandeburg@intel.com>
	Date:   Wed Apr 14 16:04:23 2010 -0700

	    ixgbe: fix bug with vlan strip in promsic mode

among other things added a function called ixgbe_vlan_filter_enable.
This new function wants to access and set some rx_ring parameters, but
adapter->rx_ring has already been freed.  This simply moves the free
until after the access and makes __ixgbe_shutdown look more like
ixgbe_remove.

Signed-off-by: Andy Gospodarek <andy@greyhouse.net>
Acked-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Emil Tantilov <emil.s.tantilov@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ixgbe/ixgbe_main.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index ce30c62a97f7..e237748995c1 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -5195,7 +5195,6 @@ static int __ixgbe_shutdown(struct pci_dev *pdev, bool *enable_wake)
 		ixgbe_free_all_tx_resources(adapter);
 		ixgbe_free_all_rx_resources(adapter);
 	}
-	ixgbe_clear_interrupt_scheme(adapter);
 
 #ifdef CONFIG_PM
 	retval = pci_save_state(pdev);
@@ -5230,6 +5229,8 @@ static int __ixgbe_shutdown(struct pci_dev *pdev, bool *enable_wake)
 
 	*enable_wake = !!wufc;
 
+	ixgbe_clear_interrupt_scheme(adapter);
+
 	ixgbe_release_hw_control(adapter);
 
 	pci_disable_device(pdev);
-- 
cgit v1.2.3


From 9f756f018a6d9f83556f972ce7fcd6870274efae Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@intel.com>
Date: Tue, 29 Jun 2010 18:28:36 +0000
Subject: ixgbe: disable tx engine before disabling tx laser

Disabling the tx laser while receiving DMA requests
can hang the device.  After this occurs the device
is in a bad state. The GPIO bit never clears when
PCI master access is disabled and a reboot is required
to get the device in a good state again.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Acked-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ixgbe/ixgbe_main.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index e237748995c1..7ddd60e7d389 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -3684,10 +3684,6 @@ void ixgbe_down(struct ixgbe_adapter *adapter)
 	/* signal that we are down to the interrupt handler */
 	set_bit(__IXGBE_DOWN, &adapter->state);
 
-	/* power down the optics */
-	if (hw->phy.multispeed_fiber)
-		hw->mac.ops.disable_tx_laser(hw);
-
 	/* disable receive for all VFs and wait one second */
 	if (adapter->num_vfs) {
 		/* ping all the active vfs to let them know we are going down */
@@ -3742,6 +3738,10 @@ void ixgbe_down(struct ixgbe_adapter *adapter)
 		                (IXGBE_READ_REG(hw, IXGBE_DMATXCTL) &
 		                 ~IXGBE_DMATXCTL_TE));
 
+	/* power down the optics */
+	if (hw->phy.multispeed_fiber)
+		hw->mac.ops.disable_tx_laser(hw);
+
 	/* clear n-tuple filters that are cached */
 	ethtool_ntuple_flush(netdev);
 
-- 
cgit v1.2.3


From d3ead2413cb99d3e6265577b12537434e229d8c2 Mon Sep 17 00:00:00 2001
From: Guillaume Gaudonville <guillaume.gaudonville@6wind.com>
Date: Tue, 29 Jun 2010 18:29:00 +0000
Subject: ixgbe: skip non IPv4 packets in ATR filter

In driver ixgbe, ixgbe_atr may cause crashes for non-ipv4 packets. Just
add a test to check skb->protocol.  It may crash on short packets due
to ip_hdr() access.

Signed-off-by: Guillaume Gaudonville <guillaume.gaudonville@6wind.com>
Acked-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Signed-off-by: Don Skidmore <donald.c.skidmore@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ixgbe/ixgbe_main.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 7ddd60e7d389..a0b33165b989 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -6024,7 +6024,6 @@ static void ixgbe_tx_queue(struct ixgbe_adapter *adapter,
 static void ixgbe_atr(struct ixgbe_adapter *adapter, struct sk_buff *skb,
 	              int queue, u32 tx_flags)
 {
-	/* Right now, we support IPv4 only */
 	struct ixgbe_atr_input atr_input;
 	struct tcphdr *th;
 	struct iphdr *iph = ip_hdr(skb);
@@ -6033,6 +6032,9 @@ static void ixgbe_atr(struct ixgbe_adapter *adapter, struct sk_buff *skb,
 	u32 src_ipv4_addr, dst_ipv4_addr;
 	u8 l4type = 0;
 
+	/* Right now, we support IPv4 only */
+	if (skb->protocol != htons(ETH_P_IP))
+		return;
 	/* check if we're UDP or TCP */
 	if (iph->protocol == IPPROTO_TCP) {
 		th = tcp_hdr(skb);
-- 
cgit v1.2.3


From 4efd7e833591721bec21cc4730a7f6261417840f Mon Sep 17 00:00:00 2001
From: Andreas Steffen <andreas.steffen@strongswan.org>
Date: Wed, 30 Jun 2010 10:41:15 -0700
Subject: xfrm: fix XFRMA_MARK extraction in xfrm_mark_get

Determine the size of the xfrm_mark struct, not of its pointer.

Signed-off-by: Andreas Steffen <andreas.steffen@strongswan.org>
Acked-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 1913af67c43d..fc8f36dd0f5c 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1586,7 +1586,7 @@ static inline struct xfrm_state *xfrm_input_state(struct sk_buff *skb)
 static inline int xfrm_mark_get(struct nlattr **attrs, struct xfrm_mark *m)
 {
 	if (attrs[XFRMA_MARK])
-		memcpy(m, nla_data(attrs[XFRMA_MARK]), sizeof(m));
+		memcpy(m, nla_data(attrs[XFRMA_MARK]), sizeof(struct xfrm_mark));
 	else
 		m->v = m->m = 0;
 
-- 
cgit v1.2.3


From 9b2c2ff7a1c04e69842254dd4afe0f8ad4efa439 Mon Sep 17 00:00:00 2001
From: Saeed Bishara <saeed@marvell.com>
Date: Sun, 27 Jun 2010 00:26:43 +0000
Subject: mv643xx_eth: use sw csum for big packets

Some controllers (KW, Dove) limits the TX IP/layer4 checksum offloading to a max size.

Signed-off-by: Saeed Bishara <saeed@marvell.com>
Acked-by: Lennert Buytenhek <buytenh@wantstofly.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/mv643xx_eth.c   | 9 +++++++--
 include/linux/mv643xx_eth.h | 5 +++++
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c
index e345ec8cb473..73bb8ea6f54a 100644
--- a/drivers/net/mv643xx_eth.c
+++ b/drivers/net/mv643xx_eth.c
@@ -289,6 +289,7 @@ struct mv643xx_eth_shared_private {
 	unsigned int t_clk;
 	int extended_rx_coal_limit;
 	int tx_bw_control;
+	int tx_csum_limit;
 };
 
 #define TX_BW_CONTROL_ABSENT		0
@@ -776,13 +777,16 @@ static int txq_submit_skb(struct tx_queue *txq, struct sk_buff *skb)
 	l4i_chk = 0;
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		int hdr_len;
 		int tag_bytes;
 
 		BUG_ON(skb->protocol != htons(ETH_P_IP) &&
 		       skb->protocol != htons(ETH_P_8021Q));
 
-		tag_bytes = (void *)ip_hdr(skb) - (void *)skb->data - ETH_HLEN;
-		if (unlikely(tag_bytes & ~12)) {
+		hdr_len = (void *)ip_hdr(skb) - (void *)skb->data;
+		tag_bytes = hdr_len - ETH_HLEN;
+		if (skb->len - hdr_len > mp->shared->tx_csum_limit ||
+		    unlikely(tag_bytes & ~12)) {
 			if (skb_checksum_help(skb) == 0)
 				goto no_csum;
 			kfree_skb(skb);
@@ -2666,6 +2670,7 @@ static int mv643xx_eth_shared_probe(struct platform_device *pdev)
 	 * Detect hardware parameters.
 	 */
 	msp->t_clk = (pd != NULL && pd->t_clk != 0) ? pd->t_clk : 133000000;
+	msp->tx_csum_limit = pd->tx_csum_limit ? pd->tx_csum_limit : 9 * 1024;
 	infer_hw_params(msp);
 
 	platform_set_drvdata(pdev, msp);
diff --git a/include/linux/mv643xx_eth.h b/include/linux/mv643xx_eth.h
index cbbbe9bfecad..30b0c4e78f91 100644
--- a/include/linux/mv643xx_eth.h
+++ b/include/linux/mv643xx_eth.h
@@ -19,6 +19,11 @@ struct mv643xx_eth_shared_platform_data {
 	struct mbus_dram_target_info	*dram;
 	struct platform_device	*shared_smi;
 	unsigned int		t_clk;
+	/*
+	 * Max packet size for Tx IP/Layer 4 checksum, when set to 0, default
+	 * limit of 9KiB will be used.
+	 */
+	int			tx_csum_limit;
 };
 
 #define MV643XX_ETH_PHY_ADDR_DEFAULT	0
-- 
cgit v1.2.3


From dd1589a431e90f9ff587e640c67101a565e52bba Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 30 Jun 2010 13:10:09 -0700
Subject: Bluetooth: Fix abuse of the preincrement operator

Fix abuse of the preincrement operator as detected when building with gcc
4.6.0:

	 CC [M]  drivers/bluetooth/hci_bcsp.o
	drivers/bluetooth/hci_bcsp.c: In function 'bcsp_prepare_pkt':
	drivers/bluetooth/hci_bcsp.c:247:20: warning: operation on 'bcsp->msgq_txseq' may be undefined

Reported-by: Justin P. Mattock <justinmattock@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/bluetooth/hci_bcsp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/bluetooth/hci_bcsp.c b/drivers/bluetooth/hci_bcsp.c
index 40aec0fb8596..42d69d4de05c 100644
--- a/drivers/bluetooth/hci_bcsp.c
+++ b/drivers/bluetooth/hci_bcsp.c
@@ -244,7 +244,7 @@ static struct sk_buff *bcsp_prepare_pkt(struct bcsp_struct *bcsp, u8 *data,
 	if (rel) {
 		hdr[0] |= 0x80 + bcsp->msgq_txseq;
 		BT_DBG("Sending packet with seqno %u", bcsp->msgq_txseq);
-		bcsp->msgq_txseq = ++(bcsp->msgq_txseq) & 0x07;
+		bcsp->msgq_txseq = (bcsp->msgq_txseq + 1) & 0x07;
 	}
 
 	if (bcsp->use_crc)
-- 
cgit v1.2.3


From 42d782ac1bef7cbcdf05b857731345c6e8149f90 Mon Sep 17 00:00:00 2001
From: Flavio Leitner <fleitner@redhat.com>
Date: Tue, 29 Jun 2010 08:24:39 +0000
Subject: bonding: check if clients MAC addr has changed

When two systems using bonding devices in adaptive load
balancing (ALB) communicates with each other, an endless
ping-pong of ARP replies starts between these two systems.

What happens? In the ALB mode, bonding driver keeps track
of each client connected in a hash table, so it can do the
receive load balancing (RLB). This hash table is updated
when an ARP reply is received, then it scans for the client
entry, updates its MAC address and flag it to be announced
later. Therefore, two seconds later, the alb monitor runs
and send for each updated client entry two ARP replies
updating this specific client. The same process happens on
the receiving system, causing the endless ping-pong of arp
replies.

See more information including the relevant functions below:

   System 1                          System 2
    bond0                             bond0

   ping <system2>
    ARP request  --------->
                           <--------- ARP reply

+->rlb_arp_recv  <---------------------+   <--- loop begins
|  rlb_update_entry_from_arp           |
|  client_info->ntt = 1;               |
|  bond_info->rx_ntt = 1;              |
|                                      |
|         <communication succeed>      |
|                                      |
|  bond_alb_monitor                    |
|  rlb_update_rx_clients               |
|  rlb_update_client                   |
|  arp_create(ARPOP_REPLY)             |
|   send ARP reply -------------->     V
|   send ARP reply -------------->
|                               rlb_arp_recv
|                               rlb_update_entry_from_arp
|                               client_info->ntt = 1;
|                               bond_info->rx_ntt = 1;
|                           < snipped, same as in system 1>
+-------           <-------------- send ARP reply
                   <-------------- send ARP reply

Besides the unneeded networking traffic, this loop breaks
a cluster because a backup system can't take over the IP
address. There is always one system sending an ARP reply
poisoning the network.

This patch fixes the problem adding a check for the MAC
address before updating it. Thus, if the MAC address didn't
change, there is no need to update neither to announce it later.

Signed-off-by: Flavio Leitner <fleitner@redhat.com>
Signed-off-by: Jay Vosburgh <fubar@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_alb.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 40fdc41446cc..df483076eda6 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -340,7 +340,8 @@ static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp)
 
 	if ((client_info->assigned) &&
 	    (client_info->ip_src == arp->ip_dst) &&
-	    (client_info->ip_dst == arp->ip_src)) {
+	    (client_info->ip_dst == arp->ip_src) &&
+	    (compare_ether_addr_64bits(client_info->mac_dst, arp->mac_src))) {
 		/* update the clients MAC address */
 		memcpy(client_info->mac_dst, arp->mac_src, ETH_ALEN);
 		client_info->ntt = 1;
-- 
cgit v1.2.3


From 7b3384fc30633738ae4eaf8e1bc6ce70470ced80 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 1 Jul 2010 18:40:12 +0300
Subject: vhost: add unlikely annotations to error path

patch 'break out of polling loop on error' caused
a minor performance regression on my machine: recover
that performance by adding a bunch of unlikely annotations
in the error handling.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/vhost/net.c   |  4 ++--
 drivers/vhost/vhost.c | 53 ++++++++++++++++++++++++++-------------------------
 2 files changed, 29 insertions(+), 28 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 54096eef4840..2406377a6e5e 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -137,7 +137,7 @@ static void handle_tx(struct vhost_net *net)
 					 &out, &in,
 					 NULL, NULL);
 		/* On error, stop handling until the next kick. */
-		if (head < 0)
+		if (unlikely(head < 0))
 			break;
 		/* Nothing new?  Wait for eventfd to tell us they refilled. */
 		if (head == vq->num) {
@@ -234,7 +234,7 @@ static void handle_rx(struct vhost_net *net)
 					 &out, &in,
 					 vq_log, &log);
 		/* On error, stop handling until the next kick. */
-		if (head < 0)
+		if (unlikely(head < 0))
 			break;
 		/* OK, now we need to know about added descriptors. */
 		if (head == vq->num) {
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 5ccd384ec0be..0b99783083f6 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -736,12 +736,12 @@ static int translate_desc(struct vhost_dev *dev, u64 addr, u32 len,
 	mem = rcu_dereference(dev->memory);
 	while ((u64)len > s) {
 		u64 size;
-		if (ret >= iov_size) {
+		if (unlikely(ret >= iov_size)) {
 			ret = -ENOBUFS;
 			break;
 		}
 		reg = find_region(mem, addr, len);
-		if (!reg) {
+		if (unlikely(!reg)) {
 			ret = -EFAULT;
 			break;
 		}
@@ -780,18 +780,18 @@ static unsigned next_desc(struct vring_desc *desc)
 	return next;
 }
 
-static unsigned get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq,
-			     struct iovec iov[], unsigned int iov_size,
-			     unsigned int *out_num, unsigned int *in_num,
-			     struct vhost_log *log, unsigned int *log_num,
-			     struct vring_desc *indirect)
+static int get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq,
+			struct iovec iov[], unsigned int iov_size,
+			unsigned int *out_num, unsigned int *in_num,
+			struct vhost_log *log, unsigned int *log_num,
+			struct vring_desc *indirect)
 {
 	struct vring_desc desc;
 	unsigned int i = 0, count, found = 0;
 	int ret;
 
 	/* Sanity check */
-	if (indirect->len % sizeof desc) {
+	if (unlikely(indirect->len % sizeof desc)) {
 		vq_err(vq, "Invalid length in indirect descriptor: "
 		       "len 0x%llx not multiple of 0x%zx\n",
 		       (unsigned long long)indirect->len,
@@ -801,7 +801,7 @@ static unsigned get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq,
 
 	ret = translate_desc(dev, indirect->addr, indirect->len, vq->indirect,
 			     ARRAY_SIZE(vq->indirect));
-	if (ret < 0) {
+	if (unlikely(ret < 0)) {
 		vq_err(vq, "Translation failure %d in indirect.\n", ret);
 		return ret;
 	}
@@ -813,7 +813,7 @@ static unsigned get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq,
 	count = indirect->len / sizeof desc;
 	/* Buffers are chained via a 16 bit next field, so
 	 * we can have at most 2^16 of these. */
-	if (count > USHRT_MAX + 1) {
+	if (unlikely(count > USHRT_MAX + 1)) {
 		vq_err(vq, "Indirect buffer length too big: %d\n",
 		       indirect->len);
 		return -E2BIG;
@@ -821,19 +821,19 @@ static unsigned get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq,
 
 	do {
 		unsigned iov_count = *in_num + *out_num;
-		if (++found > count) {
+		if (unlikely(++found > count)) {
 			vq_err(vq, "Loop detected: last one at %u "
 			       "indirect size %u\n",
 			       i, count);
 			return -EINVAL;
 		}
-		if (memcpy_fromiovec((unsigned char *)&desc, vq->indirect,
-				     sizeof desc)) {
+		if (unlikely(memcpy_fromiovec((unsigned char *)&desc, vq->indirect,
+					      sizeof desc))) {
 			vq_err(vq, "Failed indirect descriptor: idx %d, %zx\n",
 			       i, (size_t)indirect->addr + i * sizeof desc);
 			return -EINVAL;
 		}
-		if (desc.flags & VRING_DESC_F_INDIRECT) {
+		if (unlikely(desc.flags & VRING_DESC_F_INDIRECT)) {
 			vq_err(vq, "Nested indirect descriptor: idx %d, %zx\n",
 			       i, (size_t)indirect->addr + i * sizeof desc);
 			return -EINVAL;
@@ -841,7 +841,7 @@ static unsigned get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq,
 
 		ret = translate_desc(dev, desc.addr, desc.len, iov + iov_count,
 				     iov_size - iov_count);
-		if (ret < 0) {
+		if (unlikely(ret < 0)) {
 			vq_err(vq, "Translation failure %d indirect idx %d\n",
 			       ret, i);
 			return ret;
@@ -857,7 +857,7 @@ static unsigned get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq,
 		} else {
 			/* If it's an output descriptor, they're all supposed
 			 * to come before any input descriptors. */
-			if (*in_num) {
+			if (unlikely(*in_num)) {
 				vq_err(vq, "Indirect descriptor "
 				       "has out after in: idx %d\n", i);
 				return -EINVAL;
@@ -888,13 +888,13 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq,
 
 	/* Check it isn't doing very strange things with descriptor numbers. */
 	last_avail_idx = vq->last_avail_idx;
-	if (get_user(vq->avail_idx, &vq->avail->idx)) {
+	if (unlikely(get_user(vq->avail_idx, &vq->avail->idx))) {
 		vq_err(vq, "Failed to access avail idx at %p\n",
 		       &vq->avail->idx);
 		return -EFAULT;
 	}
 
-	if ((u16)(vq->avail_idx - last_avail_idx) > vq->num) {
+	if (unlikely((u16)(vq->avail_idx - last_avail_idx) > vq->num)) {
 		vq_err(vq, "Guest moved used index from %u to %u",
 		       last_avail_idx, vq->avail_idx);
 		return -EFAULT;
@@ -909,7 +909,8 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq,
 
 	/* Grab the next descriptor number they're advertising, and increment
 	 * the index we've seen. */
-	if (get_user(head, &vq->avail->ring[last_avail_idx % vq->num])) {
+	if (unlikely(get_user(head,
+			      &vq->avail->ring[last_avail_idx % vq->num]))) {
 		vq_err(vq, "Failed to read head: idx %d address %p\n",
 		       last_avail_idx,
 		       &vq->avail->ring[last_avail_idx % vq->num]);
@@ -917,7 +918,7 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq,
 	}
 
 	/* If their number is silly, that's an error. */
-	if (head >= vq->num) {
+	if (unlikely(head >= vq->num)) {
 		vq_err(vq, "Guest says index %u > %u is available",
 		       head, vq->num);
 		return -EINVAL;
@@ -931,19 +932,19 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq,
 	i = head;
 	do {
 		unsigned iov_count = *in_num + *out_num;
-		if (i >= vq->num) {
+		if (unlikely(i >= vq->num)) {
 			vq_err(vq, "Desc index is %u > %u, head = %u",
 			       i, vq->num, head);
 			return -EINVAL;
 		}
-		if (++found > vq->num) {
+		if (unlikely(++found > vq->num)) {
 			vq_err(vq, "Loop detected: last one at %u "
 			       "vq size %u head %u\n",
 			       i, vq->num, head);
 			return -EINVAL;
 		}
 		ret = copy_from_user(&desc, vq->desc + i, sizeof desc);
-		if (ret) {
+		if (unlikely(ret)) {
 			vq_err(vq, "Failed to get descriptor: idx %d addr %p\n",
 			       i, vq->desc + i);
 			return -EFAULT;
@@ -952,7 +953,7 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq,
 			ret = get_indirect(dev, vq, iov, iov_size,
 					   out_num, in_num,
 					   log, log_num, &desc);
-			if (ret < 0) {
+			if (unlikely(ret < 0)) {
 				vq_err(vq, "Failure detected "
 				       "in indirect descriptor at idx %d\n", i);
 				return ret;
@@ -962,7 +963,7 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq,
 
 		ret = translate_desc(dev, desc.addr, desc.len, iov + iov_count,
 				     iov_size - iov_count);
-		if (ret < 0) {
+		if (unlikely(ret < 0)) {
 			vq_err(vq, "Translation failure %d descriptor idx %d\n",
 			       ret, i);
 			return ret;
@@ -979,7 +980,7 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq,
 		} else {
 			/* If it's an output descriptor, they're all supposed
 			 * to come before any input descriptors. */
-			if (*in_num) {
+			if (unlikely(*in_num)) {
 				vq_err(vq, "Descriptor has out after in: "
 				       "idx %d\n", i);
 				return -EINVAL;
-- 
cgit v1.2.3


From c89827e0e9346c039aed9b63c14096c2d36796b1 Mon Sep 17 00:00:00 2001
From: Cody Rester <codyrester@gmail.com>
Date: Thu, 1 Jul 2010 21:27:44 -0700
Subject: drivers: bluetooth: bluecard_cs.c: Fixed include error, changed to
 linux/io.h

Fixed include error, changed to linux/io.h

Signed-off-by: Cody Rester <codyrester@gmail.com>
Acked-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/bluetooth/bluecard_cs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/bluetooth/bluecard_cs.c b/drivers/bluetooth/bluecard_cs.c
index 6f907ebed2d5..6d34f405a2f3 100644
--- a/drivers/bluetooth/bluecard_cs.c
+++ b/drivers/bluetooth/bluecard_cs.c
@@ -37,7 +37,7 @@
 #include <linux/wait.h>
 
 #include <linux/skbuff.h>
-#include <asm/io.h>
+#include <linux/io.h>
 
 #include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
-- 
cgit v1.2.3


From 499031ac8a3df6738f6186ded9da853e8ea18253 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 2 Jul 2010 10:05:01 +0200
Subject: netfilter: ip6t_REJECT: fix a dst leak in ipv6 REJECT

We should release dst if dst->error is set.

Bug introduced in 2.6.14 by commit e104411b82f5c
([XFRM]: Always release dst_entry on error in xfrm_lookup)

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: stable@kernel.org
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv6/netfilter/ip6t_REJECT.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 47d227713758..2933396e0281 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -97,9 +97,11 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
 	fl.fl_ip_dport = otcph.source;
 	security_skb_classify_flow(oldskb, &fl);
 	dst = ip6_route_output(net, NULL, &fl);
-	if (dst == NULL)
+	if (dst == NULL || dst->error) {
+		dst_release(dst);
 		return;
-	if (dst->error || xfrm_lookup(net, &dst, &fl, NULL, 0))
+	}
+	if (xfrm_lookup(net, &dst, &fl, NULL, 0))
 		return;
 
 	hh_len = (dst->dev->hard_header_len + 15)&~15;
-- 
cgit v1.2.3


From 7b00ac51ffcda994ef0839001257be894cc6e5a8 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Fri, 2 Jul 2010 21:47:54 -0700
Subject: net: Revert "rndis_host: Poll status channel before control channel"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit c17b274dc2aa538b68c1f02b01a3c4e124b435ba.

That change was reported to break rndis_wlan support for the WUSB54GS.

Reported-by: Luís Picciochi Oliveira <pitxyoki@gmail.com>
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/rndis_host.c | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c
index 28d3ee175e7b..dd8a4adf48ca 100644
--- a/drivers/net/usb/rndis_host.c
+++ b/drivers/net/usb/rndis_host.c
@@ -104,10 +104,8 @@ static void rndis_msg_indicate(struct usbnet *dev, struct rndis_indicate *msg,
 int rndis_command(struct usbnet *dev, struct rndis_msg_hdr *buf, int buflen)
 {
 	struct cdc_state	*info = (void *) &dev->data;
-	struct usb_cdc_notification notification;
 	int			master_ifnum;
 	int			retval;
-	int			partial;
 	unsigned		count;
 	__le32			rsp;
 	u32			xid = 0, msg_len, request_id;
@@ -135,17 +133,13 @@ int rndis_command(struct usbnet *dev, struct rndis_msg_hdr *buf, int buflen)
 	if (unlikely(retval < 0 || xid == 0))
 		return retval;
 
-	/* Some devices don't respond on the control channel until
-	 * polled on the status channel, so do that first. */
-	retval = usb_interrupt_msg(
-		dev->udev,
-		usb_rcvintpipe(dev->udev, dev->status->desc.bEndpointAddress),
-		&notification, sizeof(notification), &partial,
-		RNDIS_CONTROL_TIMEOUT_MS);
-	if (unlikely(retval < 0))
-		return retval;
+	// FIXME Seems like some devices discard responses when
+	// we time out and cancel our "get response" requests...
+	// so, this is fragile.  Probably need to poll for status.
 
-	/* Poll the control channel; the request probably completed immediately */
+	/* ignore status endpoint, just poll the control channel;
+	 * the request probably completed immediately
+	 */
 	rsp = buf->msg_type | RNDIS_MSG_COMPLETION;
 	for (count = 0; count < 10; count++) {
 		memset(buf, 0, CONTROL_BUFFER_SIZE);
-- 
cgit v1.2.3


From 0dacca73a3ddefa6cb8a7e0282f938e01faa1a64 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Fri, 2 Jul 2010 21:49:02 -0700
Subject: usbnet: Set parent device early for netdev_printk()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

netdev_printk() follows the net_device's parent device pointer, so
we must set that earlier than we previously did.

Reported-by: Luís Picciochi Oliveira <pitxyoki@gmail.com>
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/usbnet.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index a95c73de5824..81c76ada8e56 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -1293,6 +1293,9 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
 		goto out;
 	}
 
+	/* netdev_printk() needs this so do it as early as possible */
+	SET_NETDEV_DEV(net, &udev->dev);
+
 	dev = netdev_priv(net);
 	dev->udev = xdev;
 	dev->intf = udev;
@@ -1377,8 +1380,6 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
 		dev->rx_urb_size = dev->hard_mtu;
 	dev->maxpacket = usb_maxpacket (dev->udev, dev->out, 1);
 
-	SET_NETDEV_DEV(net, &udev->dev);
-
 	if ((dev->driver_info->flags & FLAG_WLAN) != 0)
 		SET_NETDEV_DEVTYPE(net, &wlan_type);
 	if ((dev->driver_info->flags & FLAG_WWAN) != 0)
-- 
cgit v1.2.3


From 72046d84f0d6e3047f4d5a5173260141983b2b61 Mon Sep 17 00:00:00 2001
From: Breno Leitao <leitao@linux.vnet.ibm.com>
Date: Thu, 1 Jul 2010 03:00:17 +0000
Subject: qlge: Replacing add_timer() to mod_timer()

Currently qlge driver calls add_timer() instead of mod_timer().
This patch changes add_timer() to mod_timer(), which seems a better
solution.

Signed-off-by: Breno Leitao <leitao@linux.vnet.ibm.com>
Signed-off-by: Ron Mercer <ron.mercer@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/qlge/qlge_main.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/net/qlge/qlge_main.c b/drivers/net/qlge/qlge_main.c
index fa4b24c49f42..509dadcd1c5a 100644
--- a/drivers/net/qlge/qlge_main.c
+++ b/drivers/net/qlge/qlge_main.c
@@ -4611,8 +4611,7 @@ static void ql_timer(unsigned long data)
 		return;
 	}
 
-	qdev->timer.expires = jiffies + (5*HZ);
-	add_timer(&qdev->timer);
+	mod_timer(&qdev->timer, jiffies + (5*HZ));
 }
 
 static int __devinit qlge_probe(struct pci_dev *pdev,
@@ -4808,8 +4807,7 @@ static void qlge_io_resume(struct pci_dev *pdev)
 		netif_err(qdev, ifup, qdev->ndev,
 			  "Device was not running prior to EEH.\n");
 	}
-	qdev->timer.expires = jiffies + (5*HZ);
-	add_timer(&qdev->timer);
+	mod_timer(&qdev->timer, jiffies + (5*HZ));
 	netif_device_attach(ndev);
 }
 
@@ -4871,8 +4869,7 @@ static int qlge_resume(struct pci_dev *pdev)
 			return err;
 	}
 
-	qdev->timer.expires = jiffies + (5*HZ);
-	add_timer(&qdev->timer);
+	mod_timer(&qdev->timer, jiffies + (5*HZ));
 	netif_device_attach(ndev);
 
 	return 0;
-- 
cgit v1.2.3


From 7ae80abdba0644e12ac17da567a2db1efc1bf8a8 Mon Sep 17 00:00:00 2001
From: Breno Leitao <leitao@linux.vnet.ibm.com>
Date: Thu, 1 Jul 2010 03:00:18 +0000
Subject: qlge: fix a eeh handler to not add a pending timer

On some ocasions the function qlge_io_resume() tries to add a
pending timer, which causes the system to hit the BUG() on
add_timer() function.

This patch removes the timer during the EEH recovery.

Signed-off-by: Breno Leitao <leitao@linux.vnet.ibm.com>
Signed-off-by: Ron Mercer <ron.mercer@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/qlge/qlge_main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/qlge/qlge_main.c b/drivers/net/qlge/qlge_main.c
index 509dadcd1c5a..d10bcefc0e45 100644
--- a/drivers/net/qlge/qlge_main.c
+++ b/drivers/net/qlge/qlge_main.c
@@ -4712,6 +4712,8 @@ static void ql_eeh_close(struct net_device *ndev)
 		netif_stop_queue(ndev);
 	}
 
+	/* Disabling the timer */
+	del_timer_sync(&qdev->timer);
 	if (test_bit(QL_ADAPTER_UP, &qdev->flags))
 		cancel_delayed_work_sync(&qdev->asic_reset_work);
 	cancel_delayed_work_sync(&qdev->mpi_reset_work);
-- 
cgit v1.2.3


From 4ef6acff83222f4496ceef7d1f0ee9e50a5bb403 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@intel.com>
Date: Thu, 1 Jul 2010 13:21:35 +0000
Subject: sched: qdisc_reset_all_tx is calling qdisc_reset without qdisc_lock

When calling qdisc_reset() the qdisc lock needs to be held.  In
this case there is at least one driver i4l which is using this
without holding the lock.  Add the locking here.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 03ca5d826757..ba749be1e354 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -317,8 +317,16 @@ extern void tcf_destroy_chain(struct tcf_proto **fl);
 static inline void qdisc_reset_all_tx(struct net_device *dev)
 {
 	unsigned int i;
-	for (i = 0; i < dev->num_tx_queues; i++)
-		qdisc_reset(netdev_get_tx_queue(dev, i)->qdisc);
+	struct Qdisc *qdisc;
+
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		qdisc = netdev_get_tx_queue(dev, i)->qdisc;
+		if (qdisc) {
+			spin_lock_bh(qdisc_lock(qdisc));
+			qdisc_reset(qdisc);
+			spin_unlock_bh(qdisc_lock(qdisc));
+		}
+	}
 }
 
 /* Are all TX queues of the device empty?  */
-- 
cgit v1.2.3


From f0796d5c73e59786d09a1e617689d1d415f2db44 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@intel.com>
Date: Thu, 1 Jul 2010 13:21:57 +0000
Subject: net: decreasing real_num_tx_queues needs to flush qdisc

Reducing real_num_queues needs to flush the qdisc otherwise
skbs with queue_mappings greater then real_num_tx_queues can
be sent to the underlying driver.

The flow for this is,

dev_queue_xmit()
	dev_pick_tx()
		skb_tx_hash()  => hash using real_num_tx_queues
		skb_set_queue_mapping()
	...
	qdisc_enqueue_root() => enqueue skb on txq from hash
...
dev->real_num_tx_queues -= n
...
sch_direct_xmit()
	dev_hard_start_xmit()
		ndo_start_xmit(skb,dev) => skb queue set with old hash

skbs are enqueued on the qdisc with skb->queue_mapping set
0 < queue_mappings < real_num_tx_queues.  When the driver
decreases real_num_tx_queues skb's may be dequeued from the
qdisc with a queue_mapping greater then real_num_tx_queues.

This fixes a case in ixgbe where this was occurring with DCB
and FCoE. Because the driver is using queue_mapping to map
skbs to tx descriptor rings we can potentially map skbs to
rings that no longer exist.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ixgbe/ixgbe_main.c |  2 +-
 include/linux/netdevice.h      |  3 +++
 include/net/sch_generic.h      | 12 ++++++++----
 net/core/dev.c                 | 18 ++++++++++++++++++
 4 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index a0b33165b989..7b5d9764f317 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -4001,7 +4001,7 @@ static void ixgbe_set_num_queues(struct ixgbe_adapter *adapter)
 
 done:
 	/* Notify the stack of the (possibly) reduced Tx Queue count. */
-	adapter->netdev->real_num_tx_queues = adapter->num_tx_queues;
+	netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
 }
 
 static void ixgbe_acquire_msix_vectors(struct ixgbe_adapter *adapter,
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 40291f375024..5e6188d9f017 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1656,6 +1656,9 @@ static inline int netif_is_multiqueue(const struct net_device *dev)
 	return (dev->num_tx_queues > 1);
 }
 
+extern void netif_set_real_num_tx_queues(struct net_device *dev,
+					 unsigned int txq);
+
 /* Use this variant when it is known for sure that it
  * is executing from hardware interrupt context or with hardware interrupts
  * disabled.
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index ba749be1e354..433604bb3fe8 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -313,13 +313,12 @@ extern void qdisc_calculate_pkt_len(struct sk_buff *skb,
 extern void tcf_destroy(struct tcf_proto *tp);
 extern void tcf_destroy_chain(struct tcf_proto **fl);
 
-/* Reset all TX qdiscs of a device.  */
-static inline void qdisc_reset_all_tx(struct net_device *dev)
+/* Reset all TX qdiscs greater then index of a device.  */
+static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i)
 {
-	unsigned int i;
 	struct Qdisc *qdisc;
 
-	for (i = 0; i < dev->num_tx_queues; i++) {
+	for (; i < dev->num_tx_queues; i++) {
 		qdisc = netdev_get_tx_queue(dev, i)->qdisc;
 		if (qdisc) {
 			spin_lock_bh(qdisc_lock(qdisc));
@@ -329,6 +328,11 @@ static inline void qdisc_reset_all_tx(struct net_device *dev)
 	}
 }
 
+static inline void qdisc_reset_all_tx(struct net_device *dev)
+{
+	qdisc_reset_all_tx_gt(dev, 0);
+}
+
 /* Are all TX queues of the device empty?  */
 static inline bool qdisc_all_tx_empty(const struct net_device *dev)
 {
diff --git a/net/core/dev.c b/net/core/dev.c
index 2b3bf53bc687..723a34710ad4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1553,6 +1553,24 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 	rcu_read_unlock();
 }
 
+/*
+ * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
+ * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
+ */
+void netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
+{
+	unsigned int real_num = dev->real_num_tx_queues;
+
+	if (unlikely(txq > dev->num_tx_queues))
+		;
+	else if (txq > real_num)
+		dev->real_num_tx_queues = txq;
+	else if (txq < real_num) {
+		dev->real_num_tx_queues = txq;
+		qdisc_reset_all_tx_gt(dev, txq);
+	}
+}
+EXPORT_SYMBOL(netif_set_real_num_tx_queues);
 
 static inline void __netif_reschedule(struct Qdisc *q)
 {
-- 
cgit v1.2.3


From e2aec372ff4b7e78e79c308104a860ae0ed20950 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Thu, 1 Jul 2010 13:18:58 +0000
Subject: linux/net.h: fix kernel-doc warnings

Fix kernel-doc warnings in linux/net.h:

Warning(include/linux/net.h:151): No description found for parameter 'wq'
Warning(include/linux/net.h:151): Excess struct/union/enum/typedef member 'fasync_list' description in 'socket'
Warning(include/linux/net.h:151): Excess struct/union/enum/typedef member 'wait' description in 'socket'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/linux/net.h b/include/linux/net.h
index 2b4deeeb8646..dee0b11a8759 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -129,10 +129,9 @@ struct socket_wq {
  *  @type: socket type (%SOCK_STREAM, etc)
  *  @flags: socket flags (%SOCK_ASYNC_NOSPACE, etc)
  *  @ops: protocol specific socket operations
- *  @fasync_list: Asynchronous wake up list
  *  @file: File back pointer for gc
  *  @sk: internal networking protocol agnostic socket representation
- *  @wait: wait queue for several uses
+ *  @wq: wait queue for several uses
  */
 struct socket {
 	socket_state		state;
-- 
cgit v1.2.3


From 4a49043223e5047c8f60a09f7b2927a2e6e8dfc7 Mon Sep 17 00:00:00 2001
From: Jon Mason <jon.mason@exar.com>
Date: Fri, 2 Jul 2010 09:13:49 +0000
Subject: s2io: resolve statistics issues

This patch resolves a number of issues in the statistics gathering of
the s2io driver.

On Xframe adapters, the received multicast statistics counter includes
pause frames which are not indicated to the driver.  This can cause
issues where the multicast packet count is higher than what has actually
been received, possibly higher than the number of packets received.

The driver software counters are replaced with the adapter hardware
statistics for rx_packets, rx_bytes, and tx_bytes.  It also uses the
overflow registers to determine if the statistics wrapped the 32bit
register (removing the window of having a statistic value less than the
previous call).  rx_length_errors statistic now includes undersized
packets in addition to oversized packets in its counting.  Finally,
rx_crc_errors are now being counted.

Signed-off-by: Jon Mason <jon.mason@exar.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/s2io.c | 101 +++++++++++++++++++++++++++++++++--------------------
 drivers/net/s2io.h |   4 ---
 2 files changed, 64 insertions(+), 41 deletions(-)

diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c
index 668327ccd8d0..1d37f0c310ca 100644
--- a/drivers/net/s2io.c
+++ b/drivers/net/s2io.c
@@ -3130,7 +3130,6 @@ static void tx_intr_handler(struct fifo_info *fifo_data)
 		pkt_cnt++;
 
 		/* Updating the statistics block */
-		nic->dev->stats.tx_bytes += skb->len;
 		swstats->mem_freed += skb->truesize;
 		dev_kfree_skb_irq(skb);
 
@@ -4901,48 +4900,81 @@ static void s2io_updt_stats(struct s2io_nic *sp)
  *  Return value:
  *  pointer to the updated net_device_stats structure.
  */
-
 static struct net_device_stats *s2io_get_stats(struct net_device *dev)
 {
 	struct s2io_nic *sp = netdev_priv(dev);
-	struct config_param *config = &sp->config;
 	struct mac_info *mac_control = &sp->mac_control;
 	struct stat_block *stats = mac_control->stats_info;
-	int i;
+	u64 delta;
 
 	/* Configure Stats for immediate updt */
 	s2io_updt_stats(sp);