From d4b2bab4f26345ea1803feb23ea92fbe3f6b77bc Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Fri, 2 Feb 2007 16:50:52 +0900
Subject: libata: add deadline support to prereset and reset methods

Add @deadline to prereset and reset methods and make them honor it.
ata_wait_ready() which directly takes @deadline is implemented to be
used as the wait function.  This patch is in preparation for EH timing
improvements.

* ata_wait_ready() never does busy sleep.  It's only used from EH and
  no wait in EH is that urgent.  This function also prints 'be
  patient' message automatically after 5 secs of waiting if more than
  3 secs is remaining till deadline.

* ata_bus_post_reset() now fails with error code if any of its wait
  fails.  This is important because earlier reset tries will have
  shorter timeout than the spec requires.  If a device fails to
  respond before the short timeout, reset should be retried with
  longer timeout rather than silently ignoring the device.

  There are three behavior differences.

  1. Timeout is applied to both devices at once, not separately.  This
     is more consistent with what the spec says.

  2. When a device passes devchk but fails to become ready before
     deadline.  Previouly, post_reset would just succeed and let
     device classification remove the device.  New code fails the
     reset thus causing reset retry.  After a few times, EH will give
     up disabling the port.

  3. When slave device passes devchk but fails to become accessible
     (TF-wise) after reset.  Original code disables dev1 after 30s
     timeout and continues as if the device doesn't exist, while the
     patched code fails reset.  When this happens, new code fails
     reset on whole port rather than proceeding with only the primary
     device.

  If the failing device is suffering transient problems, new code
  retries reset which is a better behavior.  If the failing device is
  actually broken, the net effect is identical to it, but not to the
  other device sharing the channel.  In the previous code, reset would
  have succeeded after 30s thus detecting the working one.  In the new
  code, reset fails and whole port gets disabled.  IMO, it's a
  pathological case anyway (broken device sharing bus with working
  one) and doesn't really matter.

* ata_bus_softreset() is changed to return error code from
  ata_bus_post_reset().  It used to return 0 unconditionally.

* Spin up waiting is to be removed and not converted to honor
  deadline.

* To be on the safe side, deadline is set to 40s for the time being.

Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
---
 include/linux/libata.h | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index d8cfc72ea9c1..69fc1b8a9215 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -348,8 +348,9 @@ struct ata_queued_cmd;
 
 /* typedefs */
 typedef void (*ata_qc_cb_t) (struct ata_queued_cmd *qc);
-typedef int (*ata_prereset_fn_t)(struct ata_port *ap);
-typedef int (*ata_reset_fn_t)(struct ata_port *ap, unsigned int *classes);
+typedef int (*ata_prereset_fn_t)(struct ata_port *ap, unsigned long deadline);
+typedef int (*ata_reset_fn_t)(struct ata_port *ap, unsigned int *classes,
+			      unsigned long deadline);
 typedef void (*ata_postreset_fn_t)(struct ata_port *ap, unsigned int *classes);
 
 struct ata_ioports {
@@ -688,13 +689,17 @@ extern void __sata_phy_reset(struct ata_port *ap);
 extern void sata_phy_reset(struct ata_port *ap);
 extern void ata_bus_reset(struct ata_port *ap);
 extern int sata_set_spd(struct ata_port *ap);
-extern int sata_phy_debounce(struct ata_port *ap, const unsigned long *param);
-extern int sata_phy_resume(struct ata_port *ap, const unsigned long *param);
-extern int ata_std_prereset(struct ata_port *ap);
-extern int ata_std_softreset(struct ata_port *ap, unsigned int *classes);
-extern int sata_port_hardreset(struct ata_port *ap,
-			       const unsigned long *timing);
-extern int sata_std_hardreset(struct ata_port *ap, unsigned int *class);
+extern int sata_phy_debounce(struct ata_port *ap, const unsigned long *param,
+			     unsigned long deadline);
+extern int sata_phy_resume(struct ata_port *ap, const unsigned long *param,
+			   unsigned long deadline);
+extern int ata_std_prereset(struct ata_port *ap, unsigned long deadline);
+extern int ata_std_softreset(struct ata_port *ap, unsigned int *classes,
+			     unsigned long deadline);
+extern int sata_port_hardreset(struct ata_port *ap, const unsigned long *timing,
+			       unsigned long deadline);
+extern int sata_std_hardreset(struct ata_port *ap, unsigned int *class,
+			      unsigned long deadline);
 extern void ata_std_postreset(struct ata_port *ap, unsigned int *classes);
 extern void ata_port_disable(struct ata_port *);
 extern void ata_std_ports(struct ata_ioports *ioaddr);
@@ -750,6 +755,7 @@ extern void ata_host_resume(struct ata_host *host);
 extern int ata_ratelimit(void);
 extern int ata_busy_sleep(struct ata_port *ap,
 			  unsigned long timeout_pat, unsigned long timeout);
+extern int ata_wait_ready(struct ata_port *ap, unsigned long deadline);
 extern void ata_port_queue_task(struct ata_port *ap, work_func_t fn,
 				void *data, unsigned long delay);
 extern u32 ata_wait_register(void __iomem *reg, u32 mask, u32 val,
-- 
cgit v1.2.3


From 31daabda16063b64a99a526242add727601e43c3 Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Fri, 2 Feb 2007 16:50:52 +0900
Subject: libata: reimplement reset sequencing

libata previously depended upon waits in prereset to get resets after
hotplug right for both spin up and device ready wait.  This was
necessary both for reliablity and speed as reset was likely to fail if
initiated too early and each try usually took more than 30secs to
fail.  Previous patches fixed the reliability part by fixing status
and SCR handling in resets.  This patch remedies the speed part by
improving reset sequencing.

Prereset waiting timeout is adjusted to 10s because spinup wait is
replaced by reset sequencing and !BSY wait is not as important as
before.  During boot or module loading where the drive is already
fully spun up, !BSY wait succeeds immediately, so 10s should be enough
in most cases.  It matters after hotplugging or other error
conditions, but in those cases, !BSY wait in prereset simply can't be
relied upon due to the varied and weird behaviors ATA controllers and
devices show.

Reset is now driven by ata_eh_reset_timeouts[] table which contains
timeouts for each reset try.  The first reset can be softreset but the
following ones are always hardreset if available.  Each timeout
defines deadline for the reset try.  If a reset try fails, reset is
retried with the next timeout till the end of the timeout table is
reached.  If a reset try fails before the timeout with error, libata
waits till the deadline of the failed try before retrying.

IOW, the timeout table defines timetable of reset tries such that the
n'th try always begins at least after the sum of all previous timeouts
has passed.  The current timetable defines 4 tries and takes around 1
minute.

@0	: First try.  This should succeed most of the time during boot.
@10	: 10s is enough to spin up most consumer harddrives.  Give it
	  another shot.
@20	: 20s should spin up > 99% of working drives.  This has 30s
	  timeout for retarded devices needing long idleness post reset.
@55	: Final try with 5s timeout just in case.

The above timetable is trade off between not annoying the device too
much with frequent resets and taking reasonable amount of time in most
cases.  Some controllers may do better with shorter timeouts while
others may fare better with longer but we just can't rely upon LLD
writers to test each controller with wide variety of devices using
various scenarios.  We need default behavior which reasonably fits
most cases.

I've tested the above timetable on a dozen SATA controllers and a few
PATA controllers with about a dozen different drives from all major
vendors and 4 different ODDs from three different vendors for both
boot and hotplug (if available) cases.

Boot probing is not affected unless the device is broken in which
cases new code gives up on the port after a minute rather than five or
nine minutes.  When hotplugging, most devices get detected on the
first or second try.  Multi-platter drives with long spin up time
which sometimes took > 40 secs with the original code, now usually
comes up during the second try and at least right after the third try
@20.

Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
---
 include/linux/libata.h | 16 ----------------
 1 file changed, 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 69fc1b8a9215..7906d750aa77 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -296,18 +296,8 @@ enum {
 
 	/* how hard are we gonna try to probe/recover devices */
 	ATA_PROBE_MAX_TRIES	= 3,
-	ATA_EH_RESET_TRIES	= 3,
 	ATA_EH_DEV_TRIES	= 3,
 
-	/* Drive spinup time (time from power-on to the first D2H FIS)
-	 * in msecs - 8s currently.  Failing to get ready in this time
-	 * isn't critical.  It will result in reset failure for
-	 * controllers which can't wait for the first D2H FIS.  libata
-	 * will retry, so it just has to be long enough to spin up
-	 * most devices.
-	 */
-	ATA_SPINUP_WAIT		= 8000,
-
 	/* Horkage types. May be set by libata or controller on drives
 	   (some horkage may be drive/controller pair dependant */
 
@@ -495,7 +485,6 @@ struct ata_eh_info {
 	unsigned int		dev_action[ATA_MAX_DEVICES]; /* dev EH action */
 	unsigned int		flags;		/* ATA_EHI_* flags */
 
-	unsigned long		hotplug_timestamp;
 	unsigned int		probe_mask;
 
 	char			desc[ATA_EH_DESC_LEN];
@@ -925,12 +914,7 @@ extern void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset,
 
 static inline void __ata_ehi_hotplugged(struct ata_eh_info *ehi)
 {
-	if (ehi->flags & ATA_EHI_HOTPLUGGED)
-		return;
-
 	ehi->flags |= ATA_EHI_HOTPLUGGED | ATA_EHI_RESUME_LINK;
-	ehi->hotplug_timestamp = jiffies;
-
 	ehi->action |= ATA_EH_SOFTRESET;
 	ehi->probe_mask |= (1 << ATA_MAX_DEVICES) - 1;
 }
-- 
cgit v1.2.3