From d1510a2e5ab6cb3a67f1c55ca5e7a6d2c6dec340 Mon Sep 17 00:00:00 2001
From: Chris Gorman <chrisjohgorman@gmail.com>
Date: Wed, 12 Jul 2017 13:31:26 -0400
Subject: i2c: mux: pinctrl: mention correct module name in Kconfig help text

Kconfig says the resulting module is pinctrl-i2cmux, but the module when
built is i2c-mux-pinctrl.

Fixes: ae58d1e40698 ("i2c: Add generic I2C multiplexer using pinctrl API")
Signed-off-by: Chris Gorman <chrisjohgorman@gmail.com>
Signed-off-by: Peter Rosin <peda@axentia.se>
---
 drivers/i2c/muxes/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/muxes/Kconfig b/drivers/i2c/muxes/Kconfig
index 2c64d0e0740f..17121329bb79 100644
--- a/drivers/i2c/muxes/Kconfig
+++ b/drivers/i2c/muxes/Kconfig
@@ -83,7 +83,7 @@ config I2C_MUX_PINCTRL
 	  different sets of pins at run-time.
 
 	  This driver can also be built as a module. If so, the module will be
-	  called pinctrl-i2cmux.
+	  called i2c-mux-pinctrl.
 
 config I2C_MUX_REG
 	tristate "Register-based I2C multiplexer"
-- 
cgit v1.2.3


From 3aa0907675a38498d8f2d343e94207ad28a117cf Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Mon, 17 Jul 2017 20:20:08 +0200
Subject: mtd: nand: atmel: Fix DT backward compatibility in pmecc.c

PMECC caps extraction from old DT bindings is broken, thus leading to
erroneous EL registers offset, which in turn make HW ECC unusable on
sama5d2 when old bindings are in use.

Passing the NAND dev node instead of the NFC node to of_match_node()
solves the problem.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Fixes: f88fc122cc34 ("mtd: nand: Cleanup/rework the atmel_nand driver")
Cc: <stable@vger.kernel.org>
Tested-by: Romain Izard <romain.izard.pro@gmail.com>
---
 drivers/mtd/nand/atmel/pmecc.c | 21 ++++++---------------
 1 file changed, 6 insertions(+), 15 deletions(-)

diff --git a/drivers/mtd/nand/atmel/pmecc.c b/drivers/mtd/nand/atmel/pmecc.c
index 55a8ee5306ea..8c210a5776bc 100644
--- a/drivers/mtd/nand/atmel/pmecc.c
+++ b/drivers/mtd/nand/atmel/pmecc.c
@@ -945,6 +945,7 @@ struct atmel_pmecc *devm_atmel_pmecc_get(struct device *userdev)
 		 */
 		struct platform_device *pdev = to_platform_device(userdev);
 		const struct atmel_pmecc_caps *caps;
+		const struct of_device_id *match;
 
 		/* No PMECC engine available. */
 		if (!of_property_read_bool(userdev->of_node,
@@ -953,21 +954,11 @@ struct atmel_pmecc *devm_atmel_pmecc_get(struct device *userdev)
 
 		caps = &at91sam9g45_caps;
 
-		/*
-		 * Try to find the NFC subnode and extract the associated caps
-		 * from there.
-		 */
-		np = of_find_compatible_node(userdev->of_node, NULL,
-					     "atmel,sama5d3-nfc");
-		if (np) {
-			const struct of_device_id *match;
-
-			match = of_match_node(atmel_pmecc_legacy_match, np);
-			if (match && match->data)
-				caps = match->data;
-
-			of_node_put(np);
-		}
+		/* Find the caps associated to the NAND dev node. */
+		match = of_match_node(atmel_pmecc_legacy_match,
+				      userdev->of_node);
+		if (match && match->data)
+			caps = match->data;
 
 		pmecc = atmel_pmecc_create(pdev, caps, 1, 2);
 	}
-- 
cgit v1.2.3


From b82d6cb41eb54bf1c0e5d2ae73f49a4dff54c54b Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Fri, 21 Jul 2017 10:35:31 +0200
Subject: xtensa: remove wrapper header for asm/device.h

xtensa's asm/device.h is a verbatim copy of asm-generic/device.h and
does not add any arch specific extensions. Thus, use the asm-generic
header directly.

Acked-by: Max Filippov <jcmvbkbc@gmail.com>
Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/include/asm/Kbuild   |  1 +
 arch/xtensa/include/asm/device.h | 15 ---------------
 2 files changed, 1 insertion(+), 15 deletions(-)
 delete mode 100644 arch/xtensa/include/asm/device.h

diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index 2d716ebc5a5e..a2fdabfce43a 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -1,5 +1,6 @@
 generic-y += bug.h
 generic-y += clkdev.h
+generic-y += device.h
 generic-y += div64.h
 generic-y += dma-contiguous.h
 generic-y += emergency-restart.h
diff --git a/arch/xtensa/include/asm/device.h b/arch/xtensa/include/asm/device.h
deleted file mode 100644
index 1deeb8ebbb1b..000000000000
--- a/arch/xtensa/include/asm/device.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Arch specific extensions to struct device
- *
- * This file is released under the GPLv2
- */
-#ifndef _ASM_XTENSA_DEVICE_H
-#define _ASM_XTENSA_DEVICE_H
-
-struct dev_archdata {
-};
-
-struct pdev_archdata {
-};
-
-#endif /* _ASM_XTENSA_DEVICE_H */
-- 
cgit v1.2.3


From 536dcc9c34035778892a7e3cbf45166ce73f8d34 Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Fri, 21 Jul 2017 10:44:07 +0200
Subject: xtensa: remove wrapper header for asm/param.h

xtensa's asm/device.h is a verbatim copy of asm-generic/device.h and
does not add any arch specific extensions. Thus, use the asm-generic
header directly.

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/include/asm/Kbuild  |  1 +
 arch/xtensa/include/asm/param.h | 18 ------------------
 2 files changed, 1 insertion(+), 18 deletions(-)
 delete mode 100644 arch/xtensa/include/asm/param.h

diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index a2fdabfce43a..dff7cc39437c 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -18,6 +18,7 @@ generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
+generic-y += param.h
 generic-y += percpu.h
 generic-y += preempt.h
 generic-y += rwsem.h
diff --git a/arch/xtensa/include/asm/param.h b/arch/xtensa/include/asm/param.h
deleted file mode 100644
index 0a70e780ef2a..000000000000
--- a/arch/xtensa/include/asm/param.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * include/asm-xtensa/param.h
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2001 - 2005 Tensilica Inc.
- */
-#ifndef _XTENSA_PARAM_H
-#define _XTENSA_PARAM_H
-
-#include <uapi/asm/param.h>
-
-# define HZ		CONFIG_HZ	/* internal timer frequency */
-# define USER_HZ	100		/* for user interfaces in "ticks" */
-# define CLOCKS_PER_SEC (USER_HZ)	/* frequnzy at which times() counts */
-#endif /* _XTENSA_PARAM_H */
-- 
cgit v1.2.3


From edf3f301db7af7e784d06f7059dfc8a69359af13 Mon Sep 17 00:00:00 2001
From: Feras Daoud <ferasda@mellanox.com>
Date: Mon, 10 Jul 2017 18:45:41 +0300
Subject: IB/ipoib: Fix race between light events and interface restart

A potential race between light_event and interface restart
may attach multicast group to an already attached QP.

Scenario:
light_event flow goes through ipoib_mcast_dev_flush function,
if a context switch occurs before calling ipoib_mcast_remove_list,
then we may face a situation where the broadcast of the priv is null
and the corresponding QP is not detached yet.
If an "interface restart" runs during the previous context switch,
the following scenario occurs:
When the device goes up, ipoib_ib_dev_up function will be called,
it will send a new registration request to the broadcast group and then
attach the group to the QP that was not detached before.

     IPOIB_FLUSH_LIGHT                                          INTERFACE RESTART

    __ipoib_ib_dev_flush                                                |
        |                                                               |
        |                                                               |
        |                                                               |
    ipoib_mcast_dev_flush                                               |
    Move mcast list and broadcast to remove_list                        |
        |                                                               |
        |                                                               |
    Context Switch-->                                                   |
        |                                                       ipoib_ib_dev_down
        |                                                               |
        |                                                               |
        |                                                       ipoib_ib_dev_up
        |                                                               |
        |                                                               |
        |                                                       ipoib_mcast_join_task
        |                                                       allocate new broadcast
        |                                                               |
        |                                                               |
        |                                                       Attach QP to multicast group
        |                                                               |
        |                                                               |
        |                                                       <--Context Switch
    ipoib_mcast_leave
    Detach QP from multicast group

Signed-off-by: Feras Daoud <ferasda@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/ulp/ipoib/ipoib.h           | 1 +
 drivers/infiniband/ulp/ipoib/ipoib_main.c      | 1 +
 drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 2 ++
 3 files changed, 4 insertions(+)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index ff50a7bd66d8..7ac25059c40f 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -336,6 +336,7 @@ struct ipoib_dev_priv {
 	unsigned long flags;
 
 	struct rw_semaphore vlan_rwsem;
+	struct mutex mcast_mutex;
 
 	struct rb_root  path_tree;
 	struct list_head path_list;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 4ce315c92b48..144187b407bd 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1877,6 +1877,7 @@ static void ipoib_build_priv(struct net_device *dev)
 	priv->dev = dev;
 	spin_lock_init(&priv->lock);
 	init_rwsem(&priv->vlan_rwsem);
+	mutex_init(&priv->mcast_mutex);
 
 	INIT_LIST_HEAD(&priv->path_list);
 	INIT_LIST_HEAD(&priv->child_intfs);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 057f58e6afca..0a0b2ce45cbc 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -838,6 +838,7 @@ void ipoib_mcast_dev_flush(struct net_device *dev)
 	struct ipoib_mcast *mcast, *tmcast;
 	unsigned long flags;
 
+	mutex_lock(&priv->mcast_mutex);
 	ipoib_dbg_mcast(priv, "flushing multicast list\n");
 
 	spin_lock_irqsave(&priv->lock, flags);
@@ -865,6 +866,7 @@ void ipoib_mcast_dev_flush(struct net_device *dev)
 			wait_for_completion(&mcast->done);
 
 	ipoib_mcast_remove_list(&remove_list);
+	mutex_unlock(&priv->mcast_mutex);
 }
 
 static int ipoib_mcast_addr_is_valid(const u8 *addr, const u8 *broadcast)
-- 
cgit v1.2.3


From 6bdc8de2e86e717124a715ecc480892a2c331ff5 Mon Sep 17 00:00:00 2001
From: Erez Shitrit <erezsh@mellanox.com>
Date: Wed, 12 Jul 2017 10:40:25 +0300
Subject: IB/ipoib: Use cancel_delayed_work_sync when needed

The work mcast_task can re-queue itself, so instead of doing
cancel && flush_workqueue, that still can leave a queued task
on the air, use cancel_delayed_work_sync.

Also, no need to use lock over the cancel, the original lock was
due to bit assignment setting (IPOIB_MCAST_RUN) that is not in use
anymore.

Signed-off-by: Erez Shitrit <erezsh@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 0a0b2ce45cbc..f80bf0f5d7cf 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -684,15 +684,10 @@ void ipoib_mcast_start_thread(struct net_device *dev)
 int ipoib_mcast_stop_thread(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
-	unsigned long flags;
 
 	ipoib_dbg_mcast(priv, "stopping multicast thread\n");
 
-	spin_lock_irqsave(&priv->lock, flags);
-	cancel_delayed_work(&priv->mcast_task);
-	spin_unlock_irqrestore(&priv->lock, flags);
-
-	flush_workqueue(priv->wq);
+	cancel_delayed_work_sync(&priv->mcast_task);
 
 	return 0;
 }
-- 
cgit v1.2.3


From a08e1120627f72e9ed7c291e3b9f8dd29c1513ab Mon Sep 17 00:00:00 2001
From: Erez Shitrit <erezsh@mellanox.com>
Date: Wed, 12 Jul 2017 13:11:54 +0300
Subject: IB/ipoib: Make sure no in-flight joins while leaving that mcast

While cleaning neighs and there is a send-only mcast neigh, the driver
should wait to finish its join process before trying to remove it.

Without this patch, we will see messages like: "ipoib_mcast_leave on an
in-flight join" and unexpected results in the join_complete.

Signed-off-by: Erez Shitrit <erezsh@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 24 ++++++++----------------
 1 file changed, 8 insertions(+), 16 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index f80bf0f5d7cf..93e149efc1f5 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -743,6 +743,14 @@ void ipoib_mcast_remove_list(struct list_head *remove_list)
 {
 	struct ipoib_mcast *mcast, *tmcast;
 
+	/*
+	 * make sure the in-flight joins have finished before we attempt
+	 * to leave
+	 */
+	list_for_each_entry_safe(mcast, tmcast, remove_list, list)
+		if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
+			wait_for_completion(&mcast->done);
+
 	list_for_each_entry_safe(mcast, tmcast, remove_list, list) {
 		ipoib_mcast_leave(mcast->dev, mcast);
 		ipoib_mcast_free(mcast);
@@ -852,14 +860,6 @@ void ipoib_mcast_dev_flush(struct net_device *dev)
 
 	spin_unlock_irqrestore(&priv->lock, flags);
 
-	/*
-	 * make sure the in-flight joins have finished before we attempt
-	 * to leave
-	 */
-	list_for_each_entry_safe(mcast, tmcast, &remove_list, list)
-		if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
-			wait_for_completion(&mcast->done);
-
 	ipoib_mcast_remove_list(&remove_list);
 	mutex_unlock(&priv->mcast_mutex);
 }
@@ -979,14 +979,6 @@ void ipoib_mcast_restart_task(struct work_struct *work)
 	netif_addr_unlock(dev);
 	local_irq_restore(flags);
 
-	/*
-	 * make sure the in-flight joins have finished before we attempt
-	 * to leave
-	 */
-	list_for_each_entry_safe(mcast, tmcast, &remove_list, list)
-		if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
-			wait_for_completion(&mcast->done);
-
 	ipoib_mcast_remove_list(&remove_list);
 
 	/*
-- 
cgit v1.2.3


From 11f74b40359b19f760964e71d04882a6caf530cc Mon Sep 17 00:00:00 2001
From: Alex Vesker <valex@mellanox.com>
Date: Thu, 13 Jul 2017 11:27:12 +0300
Subject: IB/ipoib: Prevent setting negative values to max_nonsrq_conn_qp

Don't allow negative values to max_nonsrq_conn_qp. There is no functional
impact on a negative value but it is logicically incorrect.

Fixes: 68e995a29572 ("IPoIB/cm: Add connected mode support for devices without SRQs")
Signed-off-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/ulp/ipoib/ipoib_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 144187b407bd..8b7ec15a9d6e 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -2366,6 +2366,7 @@ static int __init ipoib_init_module(void)
 	ipoib_sendq_size = max3(ipoib_sendq_size, 2 * MAX_SEND_CQE, IPOIB_MIN_QUEUE_SIZE);
 #ifdef CONFIG_INFINIBAND_IPOIB_CM
 	ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP);
+	ipoib_max_conn_qp = max(ipoib_max_conn_qp, 0);
 #endif
 
 	/*
-- 
cgit v1.2.3


From d2e46fccc3e3d73a741efe433f00960331280696 Mon Sep 17 00:00:00 2001
From: Feras Daoud <ferasda@mellanox.com>
Date: Sun, 16 Jul 2017 11:33:01 +0300
Subject: IB/ipoib: Set IPOIB_NEIGH_TBL_FLUSH after flushed completion
 initialization

Set IPOIB_NEIGH_TBL_FLUSH bit after initializing the neighbor
flushed completion, otherwise the garbage collector may signal
a completion while it is not initialized yet.

Fixes: b63b70d87741 ("IPoIB: Use a private hash table for path lookup in xmit path")
Signed-off-by: Feras Daoud <ferasda@mellanox.com>
Signed-off-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/ulp/ipoib/ipoib_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 8b7ec15a9d6e..f4403c52cd67 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1560,6 +1560,7 @@ static void ipoib_flush_neighs(struct ipoib_dev_priv *priv)
 	int i, wait_flushed = 0;
 
 	init_completion(&priv->ntbl.flushed);
+	set_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags);
 
 	spin_lock_irqsave(&priv->lock, flags);
 
@@ -1604,7 +1605,6 @@ static void ipoib_neigh_hash_uninit(struct net_device *dev)
 
 	ipoib_dbg(priv, "ipoib_neigh_hash_uninit\n");
 	init_completion(&priv->ntbl.deleted);
-	set_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags);
 
 	/* Stop GC if called at init fail need to cancel work */
 	stopped = test_and_set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
-- 
cgit v1.2.3


From 4829d964dfb027558c732cfa0d13b716ab3f0838 Mon Sep 17 00:00:00 2001
From: Alex Vesker <valex@mellanox.com>
Date: Mon, 10 Jul 2017 18:12:43 +0300
Subject: IB/ipoib: Add multicast packets statistics

Update the multicast counter when multicast packets are received and
provide this information through ethtool support.

Signed-off-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/ulp/ipoib/ipoib_ethtool.c | 3 ++-
 drivers/infiniband/ulp/ipoib/ipoib_ib.c      | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
index 7871379342f4..184a22f48027 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
@@ -52,7 +52,8 @@ static const struct ipoib_stats ipoib_gstrings_stats[] = {
 	IPOIB_NETDEV_STAT(tx_bytes),
 	IPOIB_NETDEV_STAT(tx_errors),
 	IPOIB_NETDEV_STAT(rx_dropped),
-	IPOIB_NETDEV_STAT(tx_dropped)
+	IPOIB_NETDEV_STAT(tx_dropped),
+	IPOIB_NETDEV_STAT(multicast),
 };
 
 #define IPOIB_GLOBAL_STATS_LEN	ARRAY_SIZE(ipoib_gstrings_stats)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 57a9655e844d..02eda1f53a67 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -256,6 +256,8 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
 
 	++dev->stats.rx_packets;
 	dev->stats.rx_bytes += skb->len;
+	if (skb->pkt_type == PACKET_MULTICAST)
+		dev->stats.multicast++;
 
 	skb->dev = dev;
 	if ((dev->features & NETIF_F_RXCSUM) &&
-- 
cgit v1.2.3


From eb54714ddcb2462d4d4b8aa78d028b61e217a835 Mon Sep 17 00:00:00 2001
From: Feras Daoud <ferasda@mellanox.com>
Date: Sun, 2 Jul 2017 15:05:59 +0300
Subject: IB/ipoib: Add get statistics support to SRIOV VF

Add SRIOV VF support to get traffic statistics.

Signed-off-by: Feras Daoud <ferasda@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/ulp/ipoib/ipoib_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index f4403c52cd67..24fa87fe0952 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1847,6 +1847,7 @@ static const struct net_device_ops ipoib_netdev_ops_vf = {
 	.ndo_tx_timeout		 = ipoib_timeout,
 	.ndo_set_rx_mode	 = ipoib_set_mcast_list,
 	.ndo_get_iflink		 = ipoib_get_iflink,
+	.ndo_get_stats64	 = ipoib_get_stats,
 };
 
 void ipoib_setup_common(struct net_device *dev)
-- 
cgit v1.2.3


From dc892e17bbae670a3d7aa6ab8bd1033b15b24645 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Thu, 13 Jul 2017 13:34:19 +0300
Subject: IB/ipoib: Clean error paths in add port

Refactor error paths in ipoib_add_port() function. The code flow
ensures that the function terminates on every error flow and it makes
redundant all "else" cases.

The functions are called during the flow are returning "result < 0", in
case of error, so there is no need to check it explicitly.

Fixes: 58e9cc90cda7 ("IB/IPoIB: Fix bad error flow in ipoib_add_port()")
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/ulp/ipoib/ipoib_main.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 24fa87fe0952..6c77df34869d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -2175,14 +2175,14 @@ static struct net_device *ipoib_add_port(const char *format,
 	priv->dev->dev_id = port - 1;
 
 	result = ib_query_port(hca, port, &attr);
-	if (!result)
-		priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu);
-	else {
+	if (result) {
 		printk(KERN_WARNING "%s: ib_query_port %d failed\n",
 		       hca->name, port);
 		goto device_init_failed;
 	}
 
+	priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu);
+
 	/* MTU will be reset when mcast join happens */
 	priv->dev->mtu  = IPOIB_UD_MTU(priv->max_ib_mtu);
 	priv->mcast_mtu  = priv->admin_mtu = priv->dev->mtu;
@@ -2213,12 +2213,14 @@ static struct net_device *ipoib_add_port(const char *format,
 		printk(KERN_WARNING "%s: ib_query_gid port %d failed (ret = %d)\n",
 		       hca->name, port, result);
 		goto device_init_failed;
-	} else
-		memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid));
+	}
+
+	memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw,
+	       sizeof(union ib_gid));
 	set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags);
 
 	result = ipoib_dev_init(priv->dev, hca, port);
-	if (result < 0) {
+	if (result) {
 		printk(KERN_WARNING "%s: failed to initialize port %d (ret = %d)\n",
 		       hca->name, port, result);
 		goto device_init_failed;
-- 
cgit v1.2.3


From 1b355094b308f3377c8f574ce86135ee159c6285 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Sat, 15 Jul 2017 16:26:55 +0300
Subject: IB/ipoib: Remove double pointer assigning

There is no need to assign "p" pointer twice.

This patch fixes the following smatch warning:
drivers/infiniband/ulp/ipoib/ipoib_cm.c:517 ipoib_cm_rx_handler() warn:
	missing break? reassigning 'p->id'

Fixes: 839fcaba355a ("IPoIB: Connected mode experimental support")
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/ulp/ipoib/ipoib_cm.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index f87d104837dc..d69410c2ed97 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -511,7 +511,6 @@ static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id,
 	case IB_CM_REQ_RECEIVED:
 		return ipoib_cm_req_handler(cm_id, event);
 	case IB_CM_DREQ_RECEIVED:
-		p = cm_id->context;
 		ib_send_cm_drep(cm_id, NULL, 0);
 		/* Fall through */
 	case IB_CM_REJ_RECEIVED:
-- 
cgit v1.2.3


From b287b76e89503ef1d403cc5cc8bd74b035d25bfa Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Sun, 23 Jul 2017 10:46:14 +0300
Subject: Revert "IB/core: Allow QP state transition from reset to error"

The commit ebc9ca43e1d5 ("IB/core: Allow QP state transition from reset to error")
allowed transition from Reset to Error state for the QPs. This behavior
doesn't follow the IBTA specification 1.3, which in 10.3.1 QUEUE PAIR AND
EE CONTEXT STATES section.

The quote from the spec:
"An error can be forced from any state, except Reset, with
the Modify QP/EE Verb."

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/core/verbs.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index fb98ed67d5bc..7f8fe443df46 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -895,7 +895,6 @@ static const struct {
 } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
 	[IB_QPS_RESET] = {
 		[IB_QPS_RESET] = { .valid = 1 },
-		[IB_QPS_ERR] =   { .valid = 1 },
 		[IB_QPS_INIT]  = {
 			.valid = 1,
 			.req_param = {
-- 
cgit v1.2.3


From 5dc78ad1904db597bdb4427f3ead437aae86f54c Mon Sep 17 00:00:00 2001
From: Erez Shitrit <erezsh@mellanox.com>
Date: Thu, 13 Jul 2017 14:29:08 +0300
Subject: IB/ipoib: Notify on modify QP failure only when relevant

Modify QP can fail and it can be acceptable, like when moving from RST to
ERR state, all the rest are not acceptable and a message to the log
should be printed.

The current code prints on all failures and many messages like:
"Failed to modify QP to ERROR state" appear, even when supported by the
state machine of the QP object.

Signed-off-by: Erez Shitrit <erezsh@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/ulp/ipoib/ipoib_ib.c | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 02eda1f53a67..2e075377242e 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -711,6 +711,27 @@ static int recvs_pending(struct net_device *dev)
 	return pending;
 }
 
+static void check_qp_movement_and_print(struct ipoib_dev_priv *priv,
+					struct ib_qp *qp,
+					enum ib_qp_state new_state)
+{
+	struct ib_qp_attr qp_attr;
+	struct ib_qp_init_attr query_init_attr;
+	int ret;
+
+	ret = ib_query_qp(qp, &qp_attr, IB_QP_STATE, &query_init_attr);
+	if (ret) {
+		ipoib_warn(priv, "%s: Failed to query QP\n", __func__);
+		return;
+	}
+	/* print according to the new-state and the previous state.*/
+	if (new_state == IB_QPS_ERR && qp_attr.qp_state == IB_QPS_RESET)
+		ipoib_dbg(priv, "Failed modify QP, IB_QPS_RESET to IB_QPS_ERR, acceptable\n");
+	else
+		ipoib_warn(priv, "Failed to modify QP to state: %d from state: %d\n",
+			   new_state, qp_attr.qp_state);
+}
+
 int ipoib_ib_dev_stop_default(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
@@ -730,7 +751,7 @@ int ipoib_ib_dev_stop_default(struct net_device *dev)
 	 */
 	qp_attr.qp_state = IB_QPS_ERR;
 	if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
-		ipoib_warn(priv, "Failed to modify QP to ERROR state\n");
+		check_qp_movement_and_print(priv, priv->qp, IB_QPS_ERR);
 
 	/* Wait for all sends and receives to complete */
 	begin = jiffies;
-- 
cgit v1.2.3


From 8addebc14a322fa8ca67cd57c6038069acde8ddc Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 24 Jul 2017 12:52:56 +0200
Subject: scsi: bnx2fc: Plug CPU hotplug race

bnx2fc_process_new_cqes() has protection against CPU hotplug, which relies
on the per cpu thread pointer. This protection is racy because it happens
only partially with the per cpu fp_work_lock held.

If the CPU is unplugged after the lock is dropped, the wakeup code can
dereference a NULL pointer or access freed and potentially reused memory.

Restructure the code so the thread check and wakeup happens with the
fp_work_lock held.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Chad Dupuis <chad.dupuis@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/bnx2fc/bnx2fc_hwi.c | 45 ++++++++++++++++++++--------------------
 1 file changed, 23 insertions(+), 22 deletions(-)

diff --git a/drivers/scsi/bnx2fc/bnx2fc_hwi.c b/drivers/scsi/bnx2fc/bnx2fc_hwi.c
index 913c750205ce..26de61d65a4d 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_hwi.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_hwi.c
@@ -1008,6 +1008,28 @@ static struct bnx2fc_work *bnx2fc_alloc_work(struct bnx2fc_rport *tgt, u16 wqe)
 	return work;
 }
 
+/* Pending work request completion */
+static void bnx2fc_pending_work(struct bnx2fc_rport *tgt, unsigned int wqe)
+{
+	unsigned int cpu = wqe % num_possible_cpus();
+	struct bnx2fc_percpu_s *fps;
+	struct bnx2fc_work *work;
+
+	fps = &per_cpu(bnx2fc_percpu, cpu);
+	spin_lock_bh(&fps->fp_work_lock);
+	if (fps->iothread) {
+		work = bnx2fc_alloc_work(tgt, wqe);
+		if (work) {
+			list_add_tail(&work->list, &fps->work_list);
+			wake_up_process(fps->iothread);
+			spin_unlock_bh(&fps->fp_work_lock);
+			return;
+		}
+	}
+	spin_unlock_bh(&fps->fp_work_lock);
+	bnx2fc_process_cq_compl(tgt, wqe);
+}
+
 int bnx2fc_process_new_cqes(struct bnx2fc_rport *tgt)
 {
 	struct fcoe_cqe *cq;
@@ -1042,28 +1064,7 @@ int bnx2fc_process_new_cqes(struct bnx2fc_rport *tgt)
 			/* Unsolicited event notification */
 			bnx2fc_process_unsol_compl(tgt, wqe);
 		} else {
-			/* Pending work request completion */
-			struct bnx2fc_work *work = NULL;
-			struct bnx2fc_percpu_s *fps = NULL;
-			unsigned int cpu = wqe % num_possible_cpus();
-
-			fps = &per_cpu(bnx2fc_percpu, cpu);
-			spin_lock_bh(&fps->fp_work_lock);
-			if (unlikely(!fps->iothread))
-				goto unlock;
-
-			work = bnx2fc_alloc_work(tgt, wqe);
-			if (work)
-				list_add_tail(&work->list,
-					      &fps->work_list);
-unlock:
-			spin_unlock_bh(&fps->fp_work_lock);
-
-			/* Pending work request completion */
-			if (fps->iothread && work)
-				wake_up_process(fps->iothread);
-			else
-				bnx2fc_process_cq_compl(tgt, wqe);
+			bnx2fc_pending_work(tgt, wqe);
 			num_free_sqes++;
 		}
 		cqe++;
-- 
cgit v1.2.3


From 2c2b66ae9d11d99f5f6d7010f0d46401ed9031ce Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 24 Jul 2017 12:52:57 +0200
Subject: scsi: bnx2fc: Prevent recursive cpuhotplug locking

The BNX2FC module init/exit code installs/removes the hotplug callbacks with
the cpu hotplug lock held. This worked with the old CPU locking
implementation which allowed recursive locking, but with the new percpu
rwsem based mechanism this is not longer allowed.

Use the _cpuslocked() variants to fix this.

Reported-by: kernel test robot <fengguang.wu@intel.com>
Acked-by: Chad Dupuis <chad.dupuis@cavium.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/bnx2fc/bnx2fc_fcoe.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
index 7dfe709a7138..c4ebf8c5d884 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
@@ -2766,15 +2766,16 @@ static int __init bnx2fc_mod_init(void)
 	for_each_online_cpu(cpu)
 		bnx2fc_percpu_thread_create(cpu);
 
-	rc = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
-				       "scsi/bnx2fc:online",
-				       bnx2fc_cpu_online, NULL);
+	rc = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ONLINE_DYN,
+						  "scsi/bnx2fc:online",
+						  bnx2fc_cpu_online, NULL);
 	if (rc < 0)
 		goto stop_threads;
 	bnx2fc_online_state = rc;
 
-	cpuhp_setup_state_nocalls(CPUHP_SCSI_BNX2FC_DEAD, "scsi/bnx2fc:dead",
-				  NULL, bnx2fc_cpu_dead);
+	cpuhp_setup_state_nocalls_cpuslocked(CPUHP_SCSI_BNX2FC_DEAD,
+					     "scsi/bnx2fc:dead",
+					     NULL, bnx2fc_cpu_dead);
 	put_online_cpus();
 
 	cnic_register_driver(CNIC_ULP_FCOE, &bnx2fc_cnic_cb);
@@ -2850,8 +2851,8 @@ static void __exit bnx2fc_mod_exit(void)
 		bnx2fc_percpu_thread_destroy(cpu);
 	}
 
-	cpuhp_remove_state_nocalls(bnx2fc_online_state);
-	cpuhp_remove_state_nocalls(CPUHP_SCSI_BNX2FC_DEAD);
+	cpuhp_remove_state_nocalls_cpuslocked(bnx2fc_online_state);
+	cpuhp_remove_state_nocalls_cpuslocked(CPUHP_SCSI_BNX2FC_DEAD);
 
 	put_online_cpus();
 
-- 
cgit v1.2.3


From 2fa2fa1ae6b42fc4c4995fdbf8fd7df96bb25ba4 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 24 Jul 2017 12:52:58 +0200
Subject: scsi: bnx2i: Prevent recursive cpuhotplug locking

The BNX2I module init/exit code installs/removes the hotplug callbacks with
the cpu hotplug lock held. This worked with the old CPU locking
implementation which allowed recursive locking, but with the new percpu
rwsem based mechanism this is not longer allowed.

Use the _cpuslocked() variants to fix this.

Reported-by: Steven Rostedt <rostedt@goodmis.org>
Acked-by: Chad Dupuis <chad.dupuis@cavium.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/bnx2i/bnx2i_init.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/scsi/bnx2i/bnx2i_init.c b/drivers/scsi/bnx2i/bnx2i_init.c
index 86afc002814c..7487b653e799 100644
--- a/drivers/scsi/bnx2i/bnx2i_init.c
+++ b/drivers/scsi/bnx2i/bnx2i_init.c
@@ -516,15 +516,16 @@ static int __init bnx2i_mod_init(void)
 	for_each_online_cpu(cpu)
 		bnx2i_percpu_thread_create(cpu);
 
-	err = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
-				       "scsi/bnx2i:online",
-				       bnx2i_cpu_online, NULL);
+	err = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ONLINE_DYN,
+						   "scsi/bnx2i:online",
+						   bnx2i_cpu_online, NULL);
 	if (err < 0)
 		goto remove_threads;
 	bnx2i_online_state = err;
 
-	cpuhp_setup_state_nocalls(CPUHP_SCSI_BNX2I_DEAD, "scsi/bnx2i:dead",
-				  NULL, bnx2i_cpu_dead);
+	cpuhp_setup_state_nocalls_cpuslocked(CPUHP_SCSI_BNX2I_DEAD,
+					     "scsi/bnx2i:dead",
+					     NULL, bnx2i_cpu_dead);
 	put_online_cpus();
 	return 0;
 
@@ -574,8 +575,8 @@ static void __exit bnx2i_mod_exit(void)
 	for_each_online_cpu(cpu)
 		bnx2i_percpu_thread_destroy(cpu);
 
-	cpuhp_remove_state_nocalls(bnx2i_online_state);
-	cpuhp_remove_state_nocalls(CPUHP_SCSI_BNX2I_DEAD);
+	cpuhp_remove_state_nocalls_cpuslocked(bnx2i_online_state);
+	cpuhp_remove_state_nocalls_cpuslocked(CPUHP_SCSI_BNX2I_DEAD);
 	put_online_cpus();
 
 	iscsi_unregister_transport(&bnx2i_iscsi_transport);
-- 
cgit v1.2.3


From 1937f8a29f4a650bc27e0311b43b53509a34fd22 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 24 Jul 2017 12:52:59 +0200
Subject: scsi: bnx2fc: Simplify CPU hotplug code

The CPU hotplug related code of this driver can be simplified by:

1) Consolidating the callbacks into a single state. The CPU thread can be
   torn down on the CPU which goes offline. There is no point in delaying
   that to the CPU dead state

2) Let the core code invoke the online/offline callbacks and remove the
   extra for_each_online_cpu() loops.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/bnx2fc/bnx2fc_fcoe.c | 69 +++++++++------------------------------
 include/linux/cpuhotplug.h        |  1 -
 2 files changed, 15 insertions(+), 55 deletions(-)

diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
index c4ebf8c5d884..6844ba361616 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
@@ -2624,12 +2624,11 @@ static struct fcoe_transport bnx2fc_transport = {
 };
 
 /**
- * bnx2fc_percpu_thread_create - Create a receive thread for an
- *				 online CPU
+ * bnx2fc_cpu_online - Create a receive thread for an  online CPU
  *
  * @cpu: cpu index for the online cpu
  */
-static void bnx2fc_percpu_thread_create(unsigned int cpu)
+static int bnx2fc_cpu_online(unsigned int cpu)
 {
 	struct bnx2fc_percpu_s *p;
 	struct task_struct *thread;
@@ -2639,15 +2638,17 @@ static void bnx2fc_percpu_thread_create(unsigned int cpu)
 	thread = kthread_create_on_node(bnx2fc_percpu_io_thread,
 					(void *)p, cpu_to_node(cpu),
 					"bnx2fc_thread/%d", cpu);
+	if (IS_ERR(thread))
+		return PTR_ERR(thread);
+
 	/* bind thread to the cpu */
-	if (likely(!IS_ERR(thread))) {
-		kthread_bind(thread, cpu);
-		p->iothread = thread;
-		wake_up_process(thread);
-	}
+	kthread_bind(thread, cpu);
+	p->iothread = thread;
+	wake_up_process(thread);
+	return 0;
 }
 
-static void bnx2fc_percpu_thread_destroy(unsigned int cpu)
+static int bnx2fc_cpu_offline(unsigned int cpu)
 {
 	struct bnx2fc_percpu_s *p;
 	struct task_struct *thread;
@@ -2661,7 +2662,6 @@ static void bnx2fc_percpu_thread_destroy(unsigned int cpu)
 	thread = p->iothread;
 	p->iothread = NULL;
 
-
 	/* Free all work in the list */
 	list_for_each_entry_safe(work, tmp, &p->work_list, list) {
 		list_del_init(&work->list);
@@ -2673,20 +2673,6 @@ static void bnx2fc_percpu_thread_destroy(unsigned int cpu)
 
 	if (thread)
 		kthread_stop(thread);
-}
-
-
-static int bnx2fc_cpu_online(unsigned int cpu)
-{
-	printk(PFX "CPU %x online: Create Rx thread\n", cpu);
-	bnx2fc_percpu_thread_create(cpu);
-	return 0;
-}
-
-static int bnx2fc_cpu_dead(unsigned int cpu)
-{
-	printk(PFX "CPU %x offline: Remove Rx thread\n", cpu);
-	bnx2fc_percpu_thread_destroy(cpu);
 	return 0;
 }
 
@@ -2761,31 +2747,16 @@ static int __init bnx2fc_mod_init(void)
 		spin_lock_init(&p->fp_work_lock);
 	}
 
-	get_online_cpus();
-
-	for_each_online_cpu(cpu)
-		bnx2fc_percpu_thread_create(cpu);
-
-	rc = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ONLINE_DYN,
-						  "scsi/bnx2fc:online",
-						  bnx2fc_cpu_online, NULL);
+	rc = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "scsi/bnx2fc:online",
+			       bnx2fc_cpu_online, bnx2fc_cpu_offline);
 	if (rc < 0)
-		goto stop_threads;
+		goto stop_thread;
 	bnx2fc_online_state = rc;
 
-	cpuhp_setup_state_nocalls_cpuslocked(CPUHP_SCSI_BNX2FC_DEAD,
-					     "scsi/bnx2fc:dead",
-					     NULL, bnx2fc_cpu_dead);
-	put_online_cpus();
-
 	cnic_register_driver(CNIC_ULP_FCOE, &bnx2fc_cnic_cb);
-
 	return 0;
 
-stop_threads:
-	for_each_online_cpu(cpu)
-		bnx2fc_percpu_thread_destroy(cpu);
-	put_online_cpus();
+stop_thread:
 	kthread_stop(l2_thread);
 free_wq:
 	destroy_workqueue(bnx2fc_wq);
@@ -2804,7 +2775,6 @@ static void __exit bnx2fc_mod_exit(void)
 	struct fcoe_percpu_s *bg;
 	struct task_struct *l2_thread;
 	struct sk_buff *skb;
-	unsigned int cpu = 0;
 
 	/*
 	 * NOTE: Since cnic calls register_driver routine rtnl_lock,
@@ -2845,16 +2815,7 @@ static void __exit bnx2fc_mod_exit(void)
 	if (l2_thread)
 		kthread_stop(l2_thread);
 
-	get_online_cpus();
-	/* Destroy per cpu threads */
-	for_each_online_cpu(cpu) {
-		bnx2fc_percpu_thread_destroy(cpu);
-	}
-
-	cpuhp_remove_state_nocalls_cpuslocked(bnx2fc_online_state);
-	cpuhp_remove_state_nocalls_cpuslocked(CPUHP_SCSI_BNX2FC_DEAD);
-
-	put_online_cpus();
+	cpuhp_remove_state(bnx2fc_online_state);
 
 	destroy_workqueue(bnx2fc_wq);
 	/*
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index b56573bf440d..2e7b1731ad12 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -39,7 +39,6 @@ enum cpuhp_state {
 	CPUHP_PCI_XGENE_DEAD,
 	CPUHP_IOMMU_INTEL_DEAD,
 	CPUHP_LUSTRE_CFS_DEAD,
-	CPUHP_SCSI_BNX2FC_DEAD,
 	CPUHP_SCSI_BNX2I_DEAD,
 	CPUHP_WORKQUEUE_PREP,
 	CPUHP_POWER_NUMA_PREPARE,
-- 
cgit v1.2.3


From f9f22a86912f9d36b50e9b3b383fabfb9f22dd46 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 24 Jul 2017 12:53:00 +0200
Subject: scsi: bnx2i: Simplify cpu hotplug code

The CPU hotplug related code of this driver can be simplified by:

1) Consolidating the callbacks into a single state. The CPU thread can be
   torn down on the CPU which goes offline. There is no point in delaying
   that to the CPU dead state

2) Let the core code invoke the online/offline callbacks and remove the
   extra for_each_online_cpu() loops.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Chad Dupuis <chad.dupuis@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/bnx2i/bnx2i_init.c | 65 ++++++++++-------------------------------
 include/linux/cpuhotplug.h      |  1 -
 2 files changed, 15 insertions(+), 51 deletions(-)

diff --git a/drivers/scsi/bnx2i/bnx2i_init.c b/drivers/scsi/bnx2i/bnx2i_init.c
index 7487b653e799..4ebcda8d9500 100644
--- a/drivers/scsi/bnx2i/bnx2i_init.c
+++ b/drivers/scsi/bnx2i/bnx2i_init.c
@@ -404,12 +404,11 @@ int bnx2i_get_stats(void *handle)
 
 
 /**
- * bnx2i_percpu_thread_create - Create a receive thread for an
- *				online CPU
+ * bnx2i_cpu_online - Create a receive thread for an online CPU
  *
  * @cpu:	cpu index for the online cpu
  */
-static void bnx2i_percpu_thread_create(unsigned int cpu)
+static int bnx2i_cpu_online(unsigned int cpu)
 {
 	struct bnx2i_percpu_s *p;
 	struct task_struct *thread;
@@ -419,16 +418,17 @@ static void bnx2i_percpu_thread_create(unsigned int cpu)
 	thread = kthread_create_on_node(bnx2i_percpu_io_thread, (void *)p,
 					cpu_to_node(cpu),
 					"bnx2i_thread/%d", cpu);
+	if (IS_ERR(thread))
+		return PTR_ERR(thread);
+
 	/* bind thread to the cpu */
-	if (likely(!IS_ERR(thread))) {
-		kthread_bind(thread, cpu);
-		p->iothread = thread;
-		wake_up_process(thread);
-	}
+	kthread_bind(thread, cpu);
+	p->iothread = thread;
+	wake_up_process(thread);
+	return 0;
 }
 
-
-static void bnx2i_percpu_thread_destroy(unsigned int cpu)
+static int bnx2i_cpu_offline(unsigned int cpu)
 {
 	struct bnx2i_percpu_s *p;
 	struct task_struct *thread;
@@ -451,19 +451,6 @@ static void bnx2i_percpu_thread_destroy(unsigned int cpu)
 	spin_unlock_bh(&p->p_work_lock);
 	if (thread)
 		kthread_stop(thread);
-}
-
-static int bnx2i_cpu_online(unsigned int cpu)
-{
-	pr_info("bnx2i: CPU %x online: Create Rx thread\n", cpu);
-	bnx2i_percpu_thread_create(cpu);
-	return 0;
-}
-
-static int bnx2i_cpu_dead(unsigned int cpu)
-{
-	pr_info("CPU %x offline: Remove Rx thread\n", cpu);
-	bnx2i_percpu_thread_destroy(cpu);
 	return 0;
 }
 
@@ -511,28 +498,14 @@ static int __init bnx2i_mod_init(void)
 		p->iothread = NULL;
 	}
 
-	get_online_cpus();
-
-	for_each_online_cpu(cpu)
-		bnx2i_percpu_thread_create(cpu);
-
-	err = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ONLINE_DYN,
-						   "scsi/bnx2i:online",
-						   bnx2i_cpu_online, NULL);
+	err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "scsi/bnx2i:online",
+				bnx2i_cpu_online, bnx2i_cpu_offline);
 	if (err < 0)
-		goto remove_threads;
+		goto unreg_driver;
 	bnx2i_online_state = err;
-
-	cpuhp_setup_state_nocalls_cpuslocked(CPUHP_SCSI_BNX2I_DEAD,
-					     "scsi/bnx2i:dead",
-					     NULL, bnx2i_cpu_dead);
-	put_online_cpus();
 	return 0;
 
-remove_threads:
-	for_each_online_cpu(cpu)
-		bnx2i_percpu_thread_destroy(cpu);
-	put_online_cpus();
+unreg_driver:
 	cnic_unregister_driver(CNIC_ULP_ISCSI);
 unreg_xport:
 	iscsi_unregister_transport(&bnx2i_iscsi_transport);
@@ -552,7 +525,6 @@ out:
 static void __exit bnx2i_mod_exit(void)
 {
 	struct bnx2i_hba *hba;
-	unsigned cpu = 0;
 
 	mutex_lock(&bnx2i_dev_lock);
 	while (!list_empty(&adapter_list)) {
@@ -570,14 +542,7 @@ static void __exit bnx2i_mod_exit(void)
 	}
 	mutex_unlock(&bnx2i_dev_lock);
 
-	get_online_cpus();
-
-	for_each_online_cpu(cpu)
-		bnx2i_percpu_thread_destroy(cpu);
-
-	cpuhp_remove_state_nocalls_cpuslocked(bnx2i_online_state);
-	cpuhp_remove_state_nocalls_cpuslocked(CPUHP_SCSI_BNX2I_DEAD);
-	put_online_cpus();
+	cpuhp_remove_state(bnx2i_online_state);
 
 	iscsi_unregister_transport(&bnx2i_iscsi_transport);
 	cnic_unregister_driver(CNIC_ULP_ISCSI);
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 2e7b1731ad12..82b30e638430 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -39,7 +39,6 @@ enum cpuhp_state {
 	CPUHP_PCI_XGENE_DEAD,
 	CPUHP_IOMMU_INTEL_DEAD,
 	CPUHP_LUSTRE_CFS_DEAD,
-	CPUHP_SCSI_BNX2I_DEAD,
 	CPUHP_WORKQUEUE_PREP,
 	CPUHP_POWER_NUMA_PREPARE,
 	CPUHP_HRTIMERS_PREPARE,
-- 
cgit v1.2.3


From 722477c4f2c50bc8d800d293a3bc5aa843f16e8d Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Date: Tue, 25 Jul 2017 11:19:21 +0200
Subject: scsi: qedf: Limit number of CQs

FCOE offloading failed with:

[qed_sp_fcoe_func_start:150(sp-0-3b:00.02)]Cannot satisfy CQ amount. CQs
		 requested 8, CQs available 6. Aborting function start
[qed_fcoe_start:821()]Failed to start fcoe
[__qedf_probe:3041]:6: Cannot start FCoE function.

The reason is a newly introduced check in the qed main part. This change
also provides the information about how many CQs are available, so we
simply limit the number of requested CQs..

Fixes: 3c5da9427802 ("qed: Share additional information with qedf")
Signed-off-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Acked-by: Chad Dupuis <chad.dupuis@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qedf/qedf.h      |  3 ++-
 drivers/scsi/qedf/qedf_main.c | 20 +++++++++-----------
 2 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/drivers/scsi/qedf/qedf.h b/drivers/scsi/qedf/qedf.h
index 4d038926a455..351f06dfc5a0 100644
--- a/drivers/scsi/qedf/qedf.h
+++ b/drivers/scsi/qedf/qedf.h
@@ -528,7 +528,8 @@ struct fip_vlan {
 #define QEDF_WRITE                    (1 << 0)
 #define MAX_FIBRE_LUNS			0xffffffff
 
-#define QEDF_MAX_NUM_CQS		8
+#define MIN_NUM_CPUS_MSIX(x)	min_t(u32, x->dev_info.num_cqs, \
+					num_online_cpus())
 
 /*
  * PCI function probe defines
diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c
index 7786c97e033f..1d13c9ca517d 100644
--- a/drivers/scsi/qedf/qedf_main.c
+++ b/drivers/scsi/qedf/qedf_main.c
@@ -2760,11 +2760,9 @@ static int qedf_set_fcoe_pf_param(struct qedf_ctx *qedf)
 	 * we allocation is the minimum off:
 	 *
 	 * Number of CPUs
-	 * Number of MSI-X vectors
-	 * Max number allocated in hardware (QEDF_MAX_NUM_CQS)
+	 * Number allocated by qed for our PCI function
 	 */
-	qedf->num_queues = min((unsigned int)QEDF_MAX_NUM_CQS,
-	    num_online_cpus());
+	qedf->num_queues = MIN_NUM_CPUS_MSIX(qedf);
 
 	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC, "Number of CQs is %d.\n",
 		   qedf->num_queues);
@@ -2962,6 +2960,13 @@ static int __qedf_probe(struct pci_dev *pdev, int mode)
 		goto err1;
 	}
 
+	/* Learn information crucial for qedf to progress */
+	rc = qed_ops->fill_dev_info(qedf->cdev, &qedf->dev_info);
+	if (rc) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Failed to dev info.\n");
+		goto err1;
+	}
+
 	/* queue allocation code should come here
 	 * order should be
 	 * 	slowpath_start
@@ -2977,13 +2982,6 @@ static int __qedf_probe(struct pci_dev *pdev, int mode)
 	}
 	qed_ops->common->update_pf_params(qedf->cdev, &qedf->pf_params);
 
-	/* Learn information crucial for qedf to progress */
-	rc = qed_ops->fill_dev_info(qedf->cdev, &qedf->dev_info);
-	if (rc) {
-		QEDF_ERR(&(qedf->dbg_ctx), "Failed to dev info.\n");
-		goto err1;
-	}
-
 	/* Record BDQ producer doorbell addresses */
 	qedf->bdq_primary_prod = qedf->dev_info.primary_dbq_rq_addr;
 	qedf->bdq_secondary_prod = qedf->dev_info.secondary_bdq_rq_addr;
-- 
cgit v1.2.3


From e6fd916a625110d75bd4d15b8488df44cf41c9fc Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 25 Jul 2017 22:49:55 +0300
Subject: scsi: aacraid: reading out of bounds

"qd.id" comes directly from the copy_from_user() on the line before so
we should verify that it's within bounds.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/aachba.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c
index 707ee2f5954d..4591113c49de 100644
--- a/drivers/scsi/aacraid/aachba.c
+++ b/drivers/scsi/aacraid/aachba.c
@@ -3198,10 +3198,11 @@ static int query_disk(struct aac_dev *dev, void __user *arg)
 		return -EBUSY;
 	if (copy_from_user(&qd, arg, sizeof (struct aac_query_disk)))
 		return -EFAULT;
-	if (qd.cnum == -1)
+	if (qd.cnum == -1) {
+		if (qd.id < 0 || qd.id >= dev->maximum_num_containers)
+			return -EINVAL;
 		qd.cnum = qd.id;
-	else if ((qd.bus == -1) && (qd.id == -1) && (qd.lun == -1))
-	{
+	} else if ((qd.bus == -1) && (qd.id == -1) && (qd.lun == -1)) {
 		if (qd.cnum < 0 || qd.cnum >= dev->maximum_num_containers)
 			return -EINVAL;
 		qd.instance = dev->scsi_host_ptr->host_no;
-- 
cgit v1.2.3


From f930c7043663188429cd9b254e9d761edfc101ce Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <jthumshirn@suse.de>
Date: Thu, 27 Jul 2017 09:11:26 +0200
Subject: scsi: sg: only check for dxfer_len greater than 256M

Don't make any assumptions on the sg_io_hdr_t::dxfer_direction or the
sg_io_hdr_t::dxferp in order to determine if it is a valid request. The
only way we can check for bad requests is by checking if the length
exceeds 256M.

Signed-off-by: Johannes Thumshirn <jthumshirn@suse.de>
Fixes: 28676d869bbb (scsi: sg: check for valid direction before starting the
request)
Reported-by: Jason L Tibbitts III <tibbs@math.uh.edu>
Tested-by: Jason L Tibbitts III <tibbs@math.uh.edu>
Suggested-by: Doug Gilbert <dgilbert@interlog.com>
Cc: Doug Gilbert <dgilbert@interlog.com>
Cc: <stable@vger.kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/sg.c | 31 +------------------------------
 1 file changed, 1 insertion(+), 30 deletions(-)

diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 4fe606b000b4..d7ff71e0c85c 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -751,35 +751,6 @@ sg_new_write(Sg_fd *sfp, struct file *file, const char __user *buf,
 	return count;
 }
 
-static bool sg_is_valid_dxfer(sg_io_hdr_t *hp)
-{
-	switch (hp->dxfer_direction) {
-	case SG_DXFER_NONE:
-		if (hp->dxferp || hp->dxfer_len > 0)
-			return false;
-		return true;
-	case SG_DXFER_FROM_DEV:
-		/*
-		 * for SG_DXFER_FROM_DEV we always set dxfer_len to > 0. dxferp
-		 * can either be NULL or != NULL so there's no point in checking
-		 * it either. So just return true.
-		 */
-		return true;
-	case SG_DXFER_TO_DEV:
-	case SG_DXFER_TO_FROM_DEV:
-		if (!hp->dxferp || hp->dxfer_len == 0)
-			return false;
-		return true;
-	case SG_DXFER_UNKNOWN:
-		if ((!hp->dxferp && hp->dxfer_len) ||
-		    (hp->dxferp && hp->dxfer_len == 0))
-			return false;
-		return true;
-	default:
-		return false;
-	}
-}
-
 static int
 sg_common_write(Sg_fd * sfp, Sg_request * srp,
 		unsigned char *cmnd, int timeout, int blocking)
@@ -800,7 +771,7 @@ sg_common_write(Sg_fd * sfp, Sg_request * srp,
 			"sg_common_write:  scsi opcode=0x%02x, cmd_size=%d\n",
 			(int) cmnd[0], (int) hp->cmd_len));
 
-	if (!sg_is_valid_dxfer(hp))
+	if (hp->dxfer_len >= SZ_256M)
 		return -EINVAL;
 
 	k = sg_start_req(srp, cmnd);
-- 
cgit v1.2.3


From 6d0f581d1768d3eaba15776e7dd1fdfec10cfe36 Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Fri, 28 Jul 2017 17:42:59 -0700
Subject: xtensa: fix cache aliasing handling code for WT cache

Currently building kernel for xtensa core with aliasing WT cache fails
with the following messages:

  mm/memory.c:2152: undefined reference to `flush_dcache_page'
  mm/memory.c:2332: undefined reference to `local_flush_cache_page'
  mm/memory.c:1919: undefined reference to `local_flush_cache_range'
  mm/memory.c:4179: undefined reference to `copy_to_user_page'
  mm/memory.c:4183: undefined reference to `copy_from_user_page'

This happens because implementation of these functions is only compiled
when data cache is WB, which looks wrong: even when data cache doesn't
need flushing it still needs invalidation. The functions like
__flush_[invalidate_]dcache_* are correctly defined for both WB and WT
caches (and even if they weren't that'd still be ok, just slower).

Fix this by providing the same implementation of the above functions for
both WB and WT cache.

Cc: stable@vger.kernel.org
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/mm/cache.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/arch/xtensa/mm/cache.c b/arch/xtensa/mm/cache.c
index 1a804a2f9a5b..dbb1cdef3663 100644
--- a/arch/xtensa/mm/cache.c
+++ b/arch/xtensa/mm/cache.c
@@ -120,10 +120,6 @@ void copy_user_highpage(struct page *dst, struct page *src,
 	preempt_enable();
 }
 
-#endif /* DCACHE_WAY_SIZE > PAGE_SIZE */
-
-#if (DCACHE_WAY_SIZE > PAGE_SIZE) && XCHAL_DCACHE_IS_WRITEBACK
-
 /*
  * Any time the kernel writes to a user page cache page, or it is about to
  * read from a page cache page this routine is called.
@@ -208,7 +204,7 @@ void local_flush_cache_page(struct vm_area_struct *vma, unsigned long address,
 	__invalidate_icache_page_alias(virt, phys);
 }
 
-#endif
+#endif /* DCACHE_WAY_SIZE > PAGE_SIZE */
 
 void
 update_mmu_cache(struct vm_area_struct * vma, unsigned long addr, pte_t *ptep)
@@ -225,7 +221,7 @@ update_mmu_cache(struct vm_area_struct * vma, unsigned long addr, pte_t *ptep)
 
 	flush_tlb_page(vma, addr);
 
-#if (DCACHE_WAY_SIZE > PAGE_SIZE) && XCHAL_DCACHE_IS_WRITEBACK
+#if (DCACHE_WAY_SIZE > PAGE_SIZE)
 
 	if (!PageReserved(page) && test_bit(PG_arch_1, &page->flags)) {
 		unsigned long phys = page_to_phys(page);
@@ -256,7 +252,7 @@ update_mmu_cache(struct vm_area_struct * vma, unsigned long addr, pte_t *ptep)
  * flush_dcache_page() on the page.
  */
 
-#if (DCACHE_WAY_SIZE > PAGE_SIZE) && XCHAL_DCACHE_IS_WRITEBACK
+#if (DCACHE_WAY_SIZE > PAGE_SIZE)
 
 void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
 		unsigned long vaddr, void *dst, const void *src,
-- 
cgit v1.2.3


From 6ab1d8da972d4c4e318607e96c5ecb32101c80f4 Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Fri, 28 Jul 2017 21:41:18 +0200
Subject: block, bfq: reset in_service_entity if it becomes idle

BFQ implements hierarchical scheduling by representing each group of
queues with a generic parent entity. For each parent entity, BFQ
maintains an in_service_entity pointer: if one of the child entities
happens to be in service, in_service_entity points to it.  The
resetting of these pointers happens only on queue expirations: when
the in-service queue is expired, i.e., stops to be the queue in
service, BFQ resets all in_service_entity pointers along the
parent-entity path from this queue to the root entity.

Functions handling the scheduling of entities assume, naturally, that
in-service entities are active, i.e., have pending I/O requests (or,
as a special case, even if they have no pending requests, they are
expected to receive a new request very soon, with the scheduler idling
the storage device while waiting for such an event). Unfortunately,
the above resetting scheme of the in_service_entity pointers may cause
this assumption to be violated.  For example, the in-service queue may
happen to remain without requests because of a request merge. In this
case the queue does become idle, and all related data structures are
updated accordingly. But in_service_entity still points to the queue
in the parent entity. This inconsistency may even propagate to
higher-level parent entities, if they happen to become idle as well,
as a consequence of the leaf queue becoming idle. For this queue and
parent entities, scheduling functions have an undefined behaviour,
and, as reported, may easily lead to kernel crashes or hangs.

This commit addresses this issue by simply resetting the
in_service_entity field also when it is detected to point to an entity
becoming idle (regardless of why the entity becomes idle).

Reported-by: Laurentiu Nicola <lnicola@dend.ro>
Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
Tested-by: Laurentiu Nicola <lnicola@dend.ro>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-wf2q.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
index 979f8f21b7e2..881bbe5e1827 100644
--- a/block/bfq-wf2q.c
+++ b/block/bfq-wf2q.c
@@ -1158,8 +1158,10 @@ bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree)
 	st = bfq_entity_service_tree(entity);
 	is_in_service = entity == sd->in_service_entity;
 
-	if (is_in_service)
+	if (is_in_service) {
 		bfq_calc_finish(entity, entity->service);
+		sd->in_service_entity = NULL;
+	}
 
 	if (entity->tree == &st->active)
 		bfq_active_extract(st, entity);
-- 
cgit v1.2.3


From 46d556e6aaa0ec4dc83648ab1ca3d01dd2fa3ea3 Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Sat, 29 Jul 2017 12:42:56 +0200
Subject: block, bfq: consider also in_service_entity to state whether an
 entity is active

Groups of BFQ queues are represented by generic entities in BFQ. When
a queue belonging to a parent entity is deactivated, the parent entity
may need to be deactivated too, in case the deactivated queue was the
only active queue for the parent entity. This deactivation may need to
be propagated upwards if the entity belongs, in its turn, to a further
higher-level entity, and so on. In particular, the upward propagation
of deactivation stops at the first parent entity that remains active
even if one of its child entities has been deactivated.

To decide whether the last non-deactivation condition holds for a
parent entity, BFQ checks whether the field next_in_service is still
not NULL for the parent entity, after the deactivation of one of its
child entity. If it is not NULL, then there are certainly other active
entities in the parent entity, and deactivations can stop.

Unfortunately, this check misses a corner case: if in_service_entity
is not NULL, then next_in_service may happen to be NULL, although the
parent entity is evidently active. This happens if: 1) the entity
pointed by in_service_entity is the only active entity in the parent
entity, and 2) according to the definition of next_in_service, the
in_service_entity cannot be considered as next_in_service. See the
comments on the definition of next_in_service for details on this
second point.

Hitting the above corner case causes crashes.

To address this issue, this commit:
1) Extends the above check on only next_in_service to controlling both
next_in_service and in_service_entity (if any of them is not NULL,
then no further deactivation is performed)
2) Improves the (important) comments on how next_in_service is defined
and updated; in particular it fixes a few rather obscure paragraphs

Reported-by: Eric Wheeler <bfq-sched@lists.ewheeler.net>
Reported-by: Rick Yiu <rick_yiu@htc.com>
Reported-by: Tom X Nguyen <tom81094@gmail.com>
Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
Tested-by: Eric Wheeler <bfq-sched@lists.ewheeler.net>
Tested-by: Rick Yiu <rick_yiu@htc.com>
Tested-by: Laurentiu Nicola <lnicola@dend.ro>
Tested-by: Tom X Nguyen <tom81094@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-iosched.h |  22 ++++++--
 block/bfq-wf2q.c    | 142 +++++++++++++++++++++++++++++-----------------------
 2 files changed, 95 insertions(+), 69 deletions(-)

diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index 63e771ab56d8..859f0a8c97c8 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -71,17 +71,29 @@ struct bfq_service_tree {
  *
  * bfq_sched_data is the basic scheduler queue.  It supports three
  * ioprio_classes, and can be used either as a toplevel queue or as an
- * intermediate queue on a hierarchical setup.  @next_in_service
- * points to the active entity of the sched_data service trees that
- * will be scheduled next. It is used to reduce the number of steps
- * needed for each hierarchical-schedule update.
+ * intermediate queue in a hierarchical setup.
  *
  * The supported ioprio_classes are the same as in CFQ, in descending
  * priority order, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE.
  * Requests from higher priority queues are served before all the
  * requests from lower priority queues; among requests of the same
  * queue requests are served according to B-WF2Q+.
- * All the fields are protected by the queue lock of the containing bfqd.
+ *
+ * The schedule is implemented by the service trees, plus the field
+ * @next_in_service, which points to the entity on the active trees
+ * that will be served next, if 1) no changes in the schedule occurs
+ * before the current in-service entity is expired, 2) the in-service
+ * queue becomes idle when it expires, and 3) if the entity pointed by
+ * in_service_entity is not a queue, then the in-service child entity
+ * of the entity pointed by in_service_entity becomes idle on
+ * expiration. This peculiar definition allows for the following
+ * optimization, not yet exploited: while a given entity is still in
+ * service, we already know which is the best candidate for next
+ * service among the other active entitities in the same parent
+ * entity. We can then quickly compare the timestamps of the
+ * in-service entity with those of such best candidate.
+ *
+ * All fields are protected by the lock of the containing bfqd.
  */
 struct bfq_sched_data {
 	/* entity in service */
diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
index 881bbe5e1827..911aa7431dbe 100644
--- a/block/bfq-wf2q.c
+++ b/block/bfq-wf2q.c
@@ -188,21 +188,23 @@ static bool bfq_update_parent_budget(struct bfq_entity *next_in_service)
 
 /*
  * This function tells whether entity stops being a candidate for next
- * service, according to the following logic.
+ * service, according to the restrictive definition of the field
+ * next_in_service. In particular, this function is invoked for an
+ * entity that is about to be set in service.
  *
- * This function is invoked for an entity that is about to be set in
- * service. If such an entity is a queue, then the entity is no longer
- * a candidate for next service (i.e, a candidate entity to serve
- * after the in-service entity is expired). The function then returns
- * true.
+ * If entity is a queue, then the entity is no longer a candidate for
+ * next service according to the that definition, because entity is
+ * about to become the in-service queue. This function then returns
+ * true if entity is a queue.
  *
- * In contrast, the entity could stil be a candidate for next service
- * if it is not a queue, and has more than one child. In fact, even if
- * one of its children is about to be set in service, other children
- * may still be the next to serve. As a consequence, a non-queue
- * entity is not a candidate for next-service only if it has only one
- * child. And only if this condition holds, then the function returns
- * true for a non-queue entity.
+ * In contrast, entity could still be a candidate for next service if
+ * it is not a queue, and has more than one active child. In fact,
+ * even if one of its children is about to be set in service, other
+ * active children may still be the next to serve, for the parent
+ * entity, even according to the above definition. As a consequence, a
+ * non-queue entity is not a candidate for next-service only if it has
+ * only one active child. And only if this condition holds, then this
+ * function returns true for a non-queue entity.
  */
 static bool bfq_no_longer_next_in_service(struct bfq_entity *entity)
 {
@@ -213,6 +215,18 @@ static bool bfq_no_longer_next_in_service(struct bfq_entity *entity)
 
 	bfqg = container_of(entity, struct bfq_group, entity);
 
+	/*
+	 * The field active_entities does not always contain the
+	 * actual number of active children entities: it happens to
+	 * not account for the in-service entity in case the latter is
+	 * removed from its active tree (which may get done after
+	 * invoking the function bfq_no_longer_next_in_service in
+	 * bfq_get_next_queue). Fortunately, here, i.e., while
+	 * bfq_no_longer_next_in_service is not yet completed in
+	 * bfq_get_next_queue, bfq_active_extract has not yet been
+	 * invoked, and thus active_entities still coincides with the
+	 * actual number of active entities.
+	 */
 	if (bfqg->active_entities == 1)
 		return true;
 
@@ -954,7 +968,7 @@ static void bfq_update_fin_time_enqueue(struct bfq_entity *entity,
  * one of its children receives a new request.
  *
  * Basically, this function updates the timestamps of entity and
- * inserts entity into its active tree, ater possible extracting it
+ * inserts entity into its active tree, ater possibly extracting it
  * from its idle tree.
  */
 static void __bfq_activate_entity(struct bfq_entity *entity,
@@ -1048,7 +1062,7 @@ static void __bfq_requeue_entity(struct bfq_entity *entity)
 		entity->start = entity->finish;
 		/*
 		 * In addition, if the entity had more than one child
-		 * when set in service, then was not extracted from
+		 * when set in service, then it was