Error handler general clean up

author Mike Anderson <andmike@us.ibm.com>

Mon, 30 Sep 2002 13:43:45 +0000 (09:43 -0400)

committer James Bottomley <jejb@mulgrave.(none)>

Mon, 30 Sep 2002 13:43:45 +0000 (09:43 -0400)
author Mike Anderson <andmike@us.ibm.com>
Mon, 30 Sep 2002 13:43:45 +0000 (09:43 -0400)
committer James Bottomley <jejb@mulgrave.(none)>
Mon, 30 Sep 2002 13:43:45 +0000 (09:43 -0400)
diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c

index 2b0ab4e4e0b4d8e095f27164ca3591dd76d1f8ad..48a8727c5ed16cb15008aa9a3c56efd021f86124 100644 (file)
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -259,6 +259,48 @@ struct Scsi_Host * scsi_register(Scsi_Host_Template * tpnt, int j)
      return retval;
  }
  
+void scsi_host_busy_inc(struct Scsi_Host *shost, Scsi_Device *sdev)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(shost->host_lock, flags);
+       shost->host_busy++;
+       sdev->device_busy++;
+       spin_unlock_irqrestore(shost->host_lock, flags);
+}
+
+void scsi_host_busy_dec_and_test(struct Scsi_Host *shost, Scsi_Device *sdev)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(shost->host_lock, flags);
+       shost->host_busy--;
+       sdev->device_busy--;
+       if (shost->in_recovery && (shost->host_busy == shost->host_failed)) {
+               up(shost->eh_wait);
+               SCSI_LOG_ERROR_RECOVERY(5, printk("Waking error handler"
+                                         "thread (%d)\n",
+                                         atomic_read(&shost->eh_wait->count)));
+       }
+       spin_unlock_irqrestore(shost->host_lock, flags);
+}
+
+void scsi_host_failed_inc_and_test(struct Scsi_Host *shost)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(shost->host_lock, flags);
+       shost->in_recovery = 1;
+       shost->host_failed++;
+       if (shost->host_busy == shost->host_failed) {
+               up(shost->eh_wait);
+               SCSI_LOG_ERROR_RECOVERY(5, printk("Waking error handler"
+                                         "thread (%d)\n",
+                                         atomic_read(&shost->eh_wait->count)));
+       }
+       spin_unlock_irqrestore(shost->host_lock, flags);
+}
+
  /*
   * Overrides for Emacs so that we follow Linus's tabbing style.
   * Emacs will notice this stuff at the end of the file and automatically
diff --git a/drivers/scsi/hosts.h b/drivers/scsi/hosts.h

index 4d7f25dda31182e737180c50f781498569d5deae..e99c729a60f33d76d719f098ca06123bef7581a6 100644 (file)
--- a/drivers/scsi/hosts.h
+++ b/drivers/scsi/hosts.h
@@ -543,6 +543,13 @@ extern int scsi_unregister_device(struct Scsi_Device_Template *);
  extern int scsi_register_host(Scsi_Host_Template *);
  extern int scsi_unregister_host(Scsi_Host_Template *);
  
+/*
+ * host_busy inc/dec/test functions
+ */
+extern void scsi_host_busy_inc(struct Scsi_Host *, Scsi_Device *);
+extern void scsi_host_busy_dec_and_test(struct Scsi_Host *, Scsi_Device *);
+extern void scsi_host_failed_inc_and_test(struct Scsi_Host *);
+
  
  /*
   * This is an ugly hack.  If we expect to be able to load devices at run time,
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c

index 448bb2f167b108f364ceebd6a803fb9e1e21d236..1574e19e313df77e4bf5e014a219e45b6262be82 100644 (file)
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -566,22 +566,6 @@ inline void __scsi_release_command(Scsi_Cmnd * SCpnt)
                                    SCpnt->target,
                                    atomic_read(&SCpnt->host->host_active),
                                    SCpnt->host->host_failed));
-       if (SCpnt->host->host_failed != 0) {
-               SCSI_LOG_ERROR_RECOVERY(5, printk("Error handler thread %d %d\n",
-                                               SCpnt->host->in_recovery,
-                                               SCpnt->host->eh_active));
-       }
-       /*
-        * If the host is having troubles, then look to see if this was the last
-        * command that might have failed.  If so, wake up the error handler.
-        */
-       if (SCpnt->host->in_recovery
-           && !SCpnt->host->eh_active
-           && SCpnt->host->host_busy == SCpnt->host->host_failed) {
-               SCSI_LOG_ERROR_RECOVERY(5, printk("Waking error handler thread (%d)\n",
-                            atomic_read(&SCpnt->host->eh_wait->count)));
-               up(SCpnt->host->eh_wait);
-       }
  
         spin_unlock_irqrestore(&device_request_lock, flags);
  
@@ -1217,28 +1201,11 @@ void scsi_done(Scsi_Cmnd * SCpnt)
          * etc, etc.
          */
         if (!tstatus) {
-               SCpnt->done_late = 1;
                 return;
         }
+
         /* Set the serial numbers back to zero */
         SCpnt->serial_number = 0;
-
-       /*
-        * First, see whether this command already timed out.  If so, we ignore
-        * the response.  We treat it as if the command never finished.
-        *
-        * Since serial_number is now 0, the error handler cound detect this
-        * situation and avoid to call the low level driver abort routine.
-        * (DB)
-         *
-         * FIXME(eric) - I believe that this test is now redundant, due to
-         * the test of the return status of del_timer().
-        */
-       if (SCpnt->state == SCSI_STATE_TIMEOUT) {
-               SCSI_LOG_MLCOMPLETE(1, printk("Ignoring completion of %p due to timeout status", SCpnt));
-               return;
-       }
-
         SCpnt->serial_number_at_timeout = 0;
         SCpnt->state = SCSI_STATE_BHQUEUE;
         SCpnt->owner = SCSI_OWNER_BH_HANDLER;
@@ -1349,21 +1316,11 @@ static void scsi_softirq(struct softirq_action *h)
                                         SCSI_LOG_MLCOMPLETE(3, print_sense("bh", SCpnt));
                                 }
                                 if (SCpnt->host->eh_wait != NULL) {
-                                       SCpnt->host->host_failed++;
+                                       scsi_eh_eflags_set(SCpnt, SCSI_EH_CMD_FAILED | SCSI_EH_CMD_ERR);
                                         SCpnt->owner = SCSI_OWNER_ERROR_HANDLER;
                                         SCpnt->state = SCSI_STATE_FAILED;
-                                       SCpnt->host->in_recovery = 1;
-                                       /*
-                                        * If the host is having troubles, then
-                                        * look to see if this was the last
-                                        * command that might have failed.  If
-                                        * so, wake up the error handler.
-                                        */
-                                       if (SCpnt->host->host_busy == SCpnt->host->host_failed) {
-                                               SCSI_LOG_ERROR_RECOVERY(5, printk("Waking error handler thread (%d)\n",
-                                                                                 atomic_read(&SCpnt->host->eh_wait->count)));
-                                               up(SCpnt->host->eh_wait);
-                                       }
+
+                                       scsi_host_failed_inc_and_test(SCpnt->host);
                                 } else {
                                         /*
                                          * We only get here if the error
@@ -1418,7 +1375,6 @@ void scsi_finish_command(Scsi_Cmnd * SCpnt)
         struct Scsi_Host *host;
         Scsi_Device *device;
         Scsi_Request * SRpnt;
-       unsigned long flags;
  
         host = SCpnt->host;
         device = SCpnt->device;
@@ -1432,10 +1388,7 @@ void scsi_finish_command(Scsi_Cmnd * SCpnt)
           * one execution context, but the device and host structures are
           * shared.
           */
-       spin_lock_irqsave(host->host_lock, flags);
-       host->host_busy--;      /* Indicate that we are free */
-       device->device_busy--;  /* Decrement device usage counter. */
-       spin_unlock_irqrestore(host->host_lock, flags);
+       scsi_host_busy_dec_and_test(host, device);
  
          /*
           * Clear the flags which say that the device/host is no longer
@@ -1450,7 +1403,7 @@ void scsi_finish_command(Scsi_Cmnd * SCpnt)
          * If we have valid sense information, then some kind of recovery
          * must have taken place.  Make a note of this.
          */
-       if (scsi_sense_valid(SCpnt)) {
+       if (SCSI_SENSE_VALID(SCpnt)) {
                 SCpnt->result |= (DRIVER_SENSE << 24);
         }
         SCSI_LOG_MLCOMPLETE(3, printk("Notifying upper driver of completion for device %d %x\n",
diff --git a/drivers/scsi/scsi.h b/drivers/scsi/scsi.h

index d16f4933b178c3ae511a7d63d4638a05d771c9ea..49ab5107e48f7e014cd3ace35d1101d4d56209b8 100644 (file)
--- a/drivers/scsi/scsi.h
+++ b/drivers/scsi/scsi.h
@@ -428,7 +428,6 @@ extern void scsi_add_timer(Scsi_Cmnd * SCset, int timeout,
                            void (*complete) (Scsi_Cmnd *));
  extern int scsi_delete_timer(Scsi_Cmnd * SCset);
  extern void scsi_error_handler(void *host);
-extern int scsi_sense_valid(Scsi_Cmnd *);
  extern int scsi_decide_disposition(Scsi_Cmnd * SCpnt);
  extern int scsi_block_when_processing_errors(Scsi_Device *);
  extern void scsi_sleep(int);
@@ -701,6 +700,7 @@ struct scsi_cmnd {
         struct scsi_cmnd *reset_chain;
  
         int eh_state;           /* Used for state tracking in error handlr */
+       int eh_eflags;          /* Used by error handlr */
         void (*done) (struct scsi_cmnd *);      /* Mid-level done function */
         /*
            A SCSI Command is assigned a nonzero serial_number when internal_cmnd
@@ -940,4 +940,26 @@ static inline Scsi_Cmnd *scsi_find_tag(Scsi_Device *SDpnt, int tag) {
          return (Scsi_Cmnd *)req->special;
  }
  
+#define scsi_eh_eflags_chk(scp, flags) (scp->eh_eflags & flags)
+
+#define scsi_eh_eflags_set(scp, flags) do { \
+       scp->eh_eflags |= flags; \
+       } while(0)
+
+#define scsi_eh_eflags_clr(scp, flags) do { \
+       scp->eh_eflags &= ~flags; \
+       } while(0)
+
+#define scsi_eh_eflags_clr_all(scp) (scp->eh_eflags = 0)
+
+/*
+ * Scsi Error Handler Flags
+ */
+#define SCSI_EH_CMD_ERR        0x0001  /* Orig cmd error'd */
+#define SCSI_EH_CMD_FAILED     0x0002  /* Orig cmd error type failed */
+#define SCSI_EH_CMD_TIMEOUT    0x0004  /* Orig cmd error type timeout */
+#define SCSI_EH_REC_TIMEOUT    0x0008  /* Recovery cmd timeout */
+
+#define SCSI_SENSE_VALID(scmd) ((scmd->sense_buffer[0] & 0x70) == 0x70)
+
  #endif
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c

index 11d9307d20f02e96ab8ff009f3afb4f68d0963a7..a5e8672b216f095f9092d9f3a03a361c576229f6 100644 (file)
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -6,6 +6,8 @@
   *                        Leonard Zubkoff and David Miller at Linux Expo, 
   *                        ideas originating from all over the place.
   *
+ *     Restructured scsi_unjam_host and associated functions.
+ *     September 04, 2002 Mike Anderson (andmike@us.ibm.com)
   */
  
  #include <linux/module.h>
@@ -49,16 +51,10 @@
  
  #ifdef DEBUG
  #define SENSE_TIMEOUT SCSI_TIMEOUT
-#define ABORT_TIMEOUT SCSI_TIMEOUT
-#define RESET_TIMEOUT SCSI_TIMEOUT
  #else
  #define SENSE_TIMEOUT (10*HZ)
-#define RESET_TIMEOUT (2*HZ)
-#define ABORT_TIMEOUT (15*HZ)
  #endif
  
-#define STATIC
-
  /*
   * These should *probably* be handled by the host itself.
   * Since it is allowed to sleep, it probably should.
@@ -66,47 +62,21 @@
  #define BUS_RESET_SETTLE_TIME   5*HZ
  #define HOST_RESET_SETTLE_TIME  10*HZ
  
-
-static const char RCSid[] = "$Header: /mnt/ide/home/eric/CVSROOT/linux/drivers/scsi/scsi_error.c,v 1.10 1997/12/08 04:50:35 eric Exp $";
-
-STATIC int scsi_check_sense(Scsi_Cmnd * SCpnt);
-STATIC int scsi_request_sense(Scsi_Cmnd *);
-STATIC void scsi_send_eh_cmnd(Scsi_Cmnd * SCpnt, int timeout);
-STATIC int scsi_try_to_abort_command(Scsi_Cmnd *, int);
-STATIC int scsi_test_unit_ready(Scsi_Cmnd *);
-STATIC int scsi_try_bus_device_reset(Scsi_Cmnd *, int timeout);
-STATIC int scsi_try_bus_reset(Scsi_Cmnd *);
-STATIC int scsi_try_host_reset(Scsi_Cmnd *);
-STATIC int scsi_unit_is_ready(Scsi_Cmnd *);
-STATIC void scsi_eh_action_done(Scsi_Cmnd *, int);
-STATIC int scsi_eh_retry_command(Scsi_Cmnd *);
-STATIC int scsi_eh_completed_normally(Scsi_Cmnd * SCpnt);
-STATIC void scsi_restart_operations(struct Scsi_Host *);
-STATIC void scsi_eh_finish_command(Scsi_Cmnd ** SClist, Scsi_Cmnd * SCpnt);
-
-
-/*
- * Function:    scsi_add_timer()
- *
- * Purpose:     Start timeout timer for a single scsi command.
- *
- * Arguments:   SCset   - command that is about to start running.
- *              timeout - amount of time to allow this command to run.
- *              complete - timeout function to call if timer isn't
- *                      canceled.
+/**
+ * scsi_add_timer - Start timeout timer for a single scsi command.
+ * @scmd:      scsi command that is about to start running.
+ * @timeout:   amount of time to allow this command to run.
+ * @complete:  timeout function to call if timer isn't canceled.
   *
- * Returns:     Nothing
- *
- * Notes:       This should be turned into an inline function.
- *
- * More Notes:  Each scsi command has it's own timer, and as it is added to
- *              the queue, we set up the timer.  When the command completes,
- *              we cancel the timer.  Pretty simple, really, especially
- *              compared to the old way of handling this crap.
- */
-void scsi_add_timer(Scsi_Cmnd * SCset,
-                   int timeout,
-                   void (*complete) (Scsi_Cmnd *))
+ * Notes:
+ *    This should be turned into an inline function.  Each scsi command
+ *    has it's own timer, and as it is added to the queue, we set up the
+ *    timer.  When the command completes, we cancel the timer.  Pretty
+ *    simple, really, especially compared to the old way of handling this
+ *    crap.
+ **/
+void scsi_add_timer(Scsi_Cmnd *scmd, int timeout, void (*complete)
+                   (Scsi_Cmnd *))
  {
  
         /*
@@ -114,320 +84,541 @@ void scsi_add_timer(Scsi_Cmnd * SCset,
          * first delete the timer.  The timer handling code gets rather
          * confused if we don't do this.
          */
-       if (SCset->eh_timeout.function != NULL) {
-               del_timer(&SCset->eh_timeout);
+       if (scmd->eh_timeout.function != NULL) {
+               del_timer(&scmd->eh_timeout);
         }
-       SCset->eh_timeout.data = (unsigned long) SCset;
-       SCset->eh_timeout.expires = jiffies + timeout;
-       SCset->eh_timeout.function = (void (*)(unsigned long)) complete;
+       scmd->eh_timeout.data = (unsigned long) scmd;
+       scmd->eh_timeout.expires = jiffies + timeout;
+       scmd->eh_timeout.function = (void (*)(unsigned long)) complete;
  
-       SCset->done_late = 0;
+       SCSI_LOG_ERROR_RECOVERY(5, printk("Adding timer for command %p at"
+                                         "%d (%p)\n", scmd, timeout,
+                                         complete));
  
-       SCSI_LOG_ERROR_RECOVERY(5, printk("Adding timer for command %p at %d (%p)\n", SCset, timeout, complete));
-
-       add_timer(&SCset->eh_timeout);
+       add_timer(&scmd->eh_timeout);
  
  }
  
-/*
- * Function:    scsi_delete_timer()
- *
- * Purpose:     Delete/cancel timer for a given function.
- *
- * Arguments:   SCset   - command that we are canceling timer for.
+/**
+ * scsi_delete_timer - Delete/cancel timer for a given function.
+ * @scmd:      Cmd that we are canceling timer for
   *
- * Returns:     1 if we were able to detach the timer.  0 if we
- *              blew it, and the timer function has already started
- *              to run.
+ * Notes:
+ *     This should be turned into an inline function.
   *
- * Notes:       This should be turned into an inline function.
- */
-int scsi_delete_timer(Scsi_Cmnd * SCset)
+ * Return value:
+ *     1 if we were able to detach the timer.  0 if we blew it, and the
+ *     timer function has already started to run.
+ **/
+int scsi_delete_timer(Scsi_Cmnd *scmd)
  {
         int rtn;
  
-       rtn = del_timer(&SCset->eh_timeout);
+       rtn = del_timer(&scmd->eh_timeout);
  
-       SCSI_LOG_ERROR_RECOVERY(5, printk("Clearing timer for command %p %d\n", SCset, rtn));
+       SCSI_LOG_ERROR_RECOVERY(5, printk("Clearing timer for command %p"
+                                        " %d\n", scmd, rtn));
  
-       SCset->eh_timeout.data = (unsigned long) NULL;
-       SCset->eh_timeout.function = NULL;
+       scmd->eh_timeout.data = (unsigned long) NULL;
+       scmd->eh_timeout.function = NULL;
  
         return rtn;
  }
  
-/*
- * Function:    scsi_times_out()
+/**
+ * scsi_times_out - Timeout function for normal scsi commands.
+ * @scmd:      Cmd that is timing out.
   *
- * Purpose:     Timeout function for normal scsi commands..
+ * Notes:
+ *     We do not need to lock this.  There is the potential for a race
+ *     only in that the normal completion handling might run, but if the
+ *     normal completion function determines that the timer has already
+ *     fired, then it mustn't do anything.
+ **/
+void scsi_times_out(Scsi_Cmnd *scmd)
+{
+       /* Set the serial_number_at_timeout to the current serial_number */
+       scmd->serial_number_at_timeout = scmd->serial_number;
+
+       scsi_eh_eflags_set(scmd, SCSI_EH_CMD_TIMEOUT | SCSI_EH_CMD_ERR);
+
+       if( scmd->host->eh_wait == NULL ) {
+               panic("Error handler thread not present at %p %p %s %d",
+                     scmd, scmd->host, __FILE__, __LINE__);
+       }
+
+       scsi_host_failed_inc_and_test(scmd->host);
+
+       SCSI_LOG_TIMEOUT(3, printk("Command timed out active=%d busy=%d "
+                                  "failed=%d\n",
+                                  atomic_read(&scmd->host->host_active),
+                                  scmd->host->host_busy,
+                                  scmd->host->host_failed));
+}
+
+/**
+ * scsi_block_when_processing_errors - Prevent cmds from being queued.
+ * @sdev:      Device on which we are performing recovery.
   *
- * Arguments:   SCpnt   - command that is timing out.
+ * Description:
+ *     We block until the host is out of error recovery, and then check to
+ *     see whether the host or the device is offline.
   *
- * Returns:     Nothing.
+ * Return value:
+ *     FALSE when dev was taken offline by error recovery. TRUE OK to
+ *     proceed.
+ **/
+int scsi_block_when_processing_errors(Scsi_Device *sdev)
+{
+
+       SCSI_SLEEP(&sdev->host->host_wait, sdev->host->in_recovery);
+
+       SCSI_LOG_ERROR_RECOVERY(5, printk("Open returning %d\n",
+                                         sdev->online));
+
+       return sdev->online;
+}
+
+#if CONFIG_SCSI_LOGGING
+/**
+ * scsi_eh_prt_fail_stats - Log info on failures.
+ * @sc_list:   List for failed cmds.
+ * @shost:     scsi host being recovered.
+ **/
+static void scsi_eh_prt_fail_stats(Scsi_Cmnd *sc_list, struct Scsi_Host *shost)
+{
+       Scsi_Cmnd *scmd;
+       Scsi_Device *sdev;
+       int total_failures = 0;
+       int cmd_failed = 0;
+       int cmd_timed_out = 0;
+       int devices_failed = 0;
+
+
+       for (sdev = shost->host_queue; sdev; sdev = sdev->next) {
+               for (scmd = sc_list; scmd; scmd = scmd->bh_next) {
+                       if (scmd->device == sdev) {
+                               ++total_failures;
+                               if (scsi_eh_eflags_chk(scmd,
+                                                      SCSI_EH_CMD_TIMEOUT))
+                                       ++cmd_timed_out;
+                               else
+                                       ++cmd_failed;
+                       }
+               }
+
+               if (cmd_timed_out || cmd_failed) {
+                       SCSI_LOG_ERROR_RECOVERY(3,
+                               printk("scsi_eh: %d:%d:%d:%d cmds failed: %d,"
+                                      "timedout: %d\n",
+                                      shost->host_no, sdev->channel,
+                                      sdev->id, sdev->lun,
+                                      cmd_failed, cmd_timed_out));
+                       cmd_timed_out = 0;
+                       cmd_failed = 0;
+                       ++devices_failed;
+               }
+       }
+
+       SCSI_LOG_ERROR_RECOVERY(2, printk("Total of %d commands on %d "
+                                         "devices require eh work\n",
+                                 total_failures, devices_failed));
+}
+#endif
+
+/**
+ * scsi_eh_get_failed - Gather failed cmds.
+ * @sc_list:   A pointer to a list for failed cmds.
+ * @shost:     Scsi host being recovered.
   *
- * Notes:       We do not need to lock this.  There is the potential for
- *              a race only in that the normal completion handling might
- *              run, but if the normal completion function determines
- *              that the timer has already fired, then it mustn't do
- *              anything.
- */
-void scsi_times_out(Scsi_Cmnd * SCpnt)
+ * XXX Add opaque interator for device / shost. Investigate direct
+ * addition to per eh list on error allowing skipping of this step.
+ **/
+static void scsi_eh_get_failed(Scsi_Cmnd **sc_list, struct Scsi_Host *shost)
  {
-       /* 
-        * Notify the low-level code that this operation failed and we are
-        * reposessing the command.  
-        */
-#ifdef ERIC_neverdef
-       /*
-        * FIXME(eric)
-        * Allow the host adapter to push a queue ordering tag
-        * out to the bus to force the command in question to complete.
-        * If the host wants to do this, then we just restart the timer
-        * for the command.  Before we really do this, some real thought
-        * as to the optimum way to handle this should be done.  We *do*
-        * need to force ordering every so often to ensure that all requests
-        * do eventually complete, but I am not sure if this is the best way
-        * to actually go about it.
-        *
-        * Better yet, force a sync here, but don't block since we are in an
-        * interrupt.
-        */
-       if (SCpnt->host->hostt->eh_ordered_queue_tag) {
-               if ((*SCpnt->host->hostt->eh_ordered_queue_tag) (SCpnt)) {
-                       scsi_add_timer(SCpnt, SCpnt->internal_timeout,
-                                      scsi_times_out);
-                       return;
+       int found;
+       Scsi_Device *sdev;
+       Scsi_Cmnd *scmd;
+
+       for (found = 0, sdev = shost->host_queue; sdev; sdev = sdev->next) {
+               for (scmd = sdev->device_queue; scmd; scmd = scmd->next) {
+                       if (scsi_eh_eflags_chk(scmd, SCSI_EH_CMD_ERR)) {
+                               scmd->bh_next = *sc_list;
+                               *sc_list = scmd;
+                               found++;
+                       } else {
+                               /*
+                                * FIXME Verify how this can happen and if
+                                * this is still needed??
+                                */
+                           if (scmd->state != SCSI_STATE_INITIALIZING
+                           && scmd->state != SCSI_STATE_UNUSED) {
+                               /*
+                                * Rats.  Something is still floating
+                                * around out there This could be the
+                                * result of the fact that the upper level
+                                * drivers are still frobbing commands
+                                * that might have succeeded.  There are
+                                * two outcomes. One is that the command
+                                * block will eventually be freed, and the
+                                * other one is that the command will be
+                                * queued and will be finished along the
+                                * way.
+                                */
+                               SCSI_LOG_ERROR_RECOVERY(1, printk("Error hdlr "
+                                                         "prematurely woken "
+                                                         "cmds still active "
+                                                         "(%p %x %d)\n",
+                                              scmd, scmd->state,
+                                              scmd->target));
+                               }
+                       }
                 }
         }
-       /*
-        * FIXME(eric) - add a second special interface to handle this
-        * case.  Ideally that interface can also be used to request
-        * a queu
-        */
-       if (SCpnt->host->can_queue) {
-               SCpnt->host->hostt->queuecommand(SCpnt, NULL);
+
+       SCSI_LOG_ERROR_RECOVERY(1, scsi_eh_prt_fail_stats(*sc_list, shost));
+
+       BUG_ON(shost->host_failed != found);
+}
+
+/**
+ * scsi_check_sense - Examine scsi cmd sense
+ * @scmd:      Cmd to have sense checked.
+ **/
+static int scsi_check_sense(Scsi_Cmnd *scmd)
+{
+       if (!SCSI_SENSE_VALID(scmd)) {
+               return FAILED;
         }
-#endif
+       if (scmd->sense_buffer[2] & 0xe0)
+               return SUCCESS;
  
-       /* Set the serial_number_at_timeout to the current serial_number */
-       SCpnt->serial_number_at_timeout = SCpnt->serial_number;
+       switch (scmd->sense_buffer[2] & 0xf) {
+       case NO_SENSE:
+               return SUCCESS;
+       case RECOVERED_ERROR:
+               return /* soft_error */ SUCCESS;
  
-       SCpnt->eh_state = FAILED;
-       SCpnt->state = SCSI_STATE_TIMEOUT;
-       SCpnt->owner = SCSI_OWNER_ERROR_HANDLER;
+       case ABORTED_COMMAND:
+               return NEEDS_RETRY;
+       case NOT_READY:
+       case UNIT_ATTENTION:
+               /*
+                * if we are expecting a cc/ua because of a bus reset that we
+                * performed, treat this just as a retry.  otherwise this is
+                * information that we should pass up to the upper-level driver
+                * so that we can deal with it there.
+                */
+               if (scmd->device->expecting_cc_ua) {
+                       scmd->device->expecting_cc_ua = 0;
+                       return NEEDS_RETRY;
+               }
+               /*
+                * if the device is in the process of becoming ready, we 
+                * should retry.
+                */
+               if ((scmd->sense_buffer[12] == 0x04) &&
+                       (scmd->sense_buffer[13] == 0x01)) {
+                       return NEEDS_RETRY;
+               }
+               return SUCCESS;
  
-       SCpnt->host->in_recovery = 1;
-       SCpnt->host->host_failed++;
+               /* these three are not supported */
+       case COPY_ABORTED:
+       case VOLUME_OVERFLOW:
+       case MISCOMPARE:
+               return SUCCESS;
  
-       SCSI_LOG_TIMEOUT(3, printk("Command timed out active=%d busy=%d failed=%d\n",
-                                  atomic_read(&SCpnt->host->host_active),
-                                  SCpnt->host->host_busy,
-                                  SCpnt->host->host_failed));
+       case MEDIUM_ERROR:
+               return NEEDS_RETRY;
  
-       /*
-        * If the host is having troubles, then look to see if this was the last
-        * command that might have failed.  If so, wake up the error handler.
-        */
-       if( SCpnt->host->eh_wait == NULL ) {
-               panic("Error handler thread not present at %p %p %s %d", 
-                     SCpnt, SCpnt->host, __FILE__, __LINE__);
-       }
-       if (SCpnt->host->host_busy == SCpnt->host->host_failed) {
-               up(SCpnt->host->eh_wait);
+       case ILLEGAL_REQUEST:
+       case BLANK_CHECK:
+       case DATA_PROTECT:
+       case HARDWARE_ERROR:
+       default:
+               return SUCCESS;
         }
  }
  
-/*
- * Function     scsi_block_when_processing_errors
+/**
+ * scsi_eh_completed_normally - Disposition a eh cmd on return from LLD.
+ * @scmd:      SCSI cmd to examine.
   *
- * Purpose:     Prevent more commands from being queued while error recovery
- *              is taking place.
- *
- * Arguments:   SDpnt - device on which we are performing recovery.
- *
- * Returns:     FALSE   The device was taken offline by error recovery.
- *              TRUE    OK to proceed.
- *
- * Notes:       We block until the host is out of error recovery, and then
- *              check to see whether the host or the device is offline.
- */
-int scsi_block_when_processing_errors(Scsi_Device * SDpnt)
+ * Notes:
+ *    This is *only* called when we are examining the status of commands
+ *    queued during error recovery.  the main difference here is that we
+ *    don't allow for the possibility of retries here, and we are a lot
+ *    more restrictive about what we consider acceptable.
+ **/
+static int scsi_eh_completed_normally(Scsi_Cmnd *scmd)
  {
+       int rtn;
  
-       SCSI_SLEEP(&SDpnt->host->host_wait, SDpnt->host->in_recovery);
-
-       SCSI_LOG_ERROR_RECOVERY(5, printk("Open returning %d\n", SDpnt->online));
+       /*
+        * first check the host byte, to see if there is anything in there
+        * that would indicate what we need to do.
+        */
+       if (host_byte(scmd->result) == DID_RESET) {
+               if (scmd->flags & IS_RESETTING) {
+                       /*
+                        * ok, this is normal.  we don't know whether in fact
+                        * the command in question really needs to be rerun
+                        * or not - if this was the original data command then
+                        * the answer is yes, otherwise we just flag it as
+                        * SUCCESS.
+                        */
+                       scmd->flags &= ~IS_RESETTING;
+                       goto maybe_retry;
+               }
+               /*
+                * rats.  we are already in the error handler, so we now
+                * get to try and figure out what to do next.  if the sense
+                * is valid, we have a pretty good idea of what to do.
+                * if not, we mark it as FAILED.
+                */
+               rtn = scsi_check_sense(scmd);
+               if (rtn == NEEDS_RETRY)
+                       goto maybe_retry;
+               return rtn;
+       }
+       if (host_byte(scmd->result) != DID_OK) {
+               return FAILED;
+       }
+       /*
+        * next, check the message byte.
+        */
+       if (msg_byte(scmd->result) != COMMAND_COMPLETE) {
+               return FAILED;
+       }
+       /*
+        * now, check the status byte to see if this indicates
+        * anything special.
+        */
+       switch (status_byte(scmd->result)) {
+       case GOOD:
+       case COMMAND_TERMINATED:
+               return SUCCESS;
+       case CHECK_CONDITION:
+               rtn = scsi_check_sense(scmd);
+               if (rtn == NEEDS_RETRY)
+                       goto maybe_retry;
+               return rtn;
+       case CONDITION_GOOD:
+       case INTERMEDIATE_GOOD:
+       case INTERMEDIATE_C_GOOD:
+               /*
+                * who knows?  FIXME(eric)
+                */
+               return SUCCESS;
+       case BUSY:
+       case QUEUE_FULL:
+       case RESERVATION_CONFLICT:
+       default:
+               return FAILED;
+       }
+       return FAILED;
  
-       return SDpnt->online;
+ maybe_retry:
+       if ((++scmd->retries) < scmd->allowed) {
+               return NEEDS_RETRY;
+       } else {
+               /* no more retries - report this one back to upper level */
+               return SUCCESS;
+       }
  }
  
-/*
- * Function:    scsi_eh_times_out()
- *
- * Purpose:     Timeout function for error handling.
+/**
+ * scsi_eh_times_out - timeout function for error handling.
+ * @scmd:      Cmd that is timing out.
   *
- * Arguments:   SCpnt   - command that is timing out.
- *
- * Returns:     Nothing.
- *
- * Notes:       During error handling, the kernel thread will be sleeping
- *              waiting for some action to complete on the device.  Our only
- *              job is to record that it timed out, and to wake up the
- *              thread.
- */
-STATIC
-void scsi_eh_times_out(Scsi_Cmnd * SCpnt)
+ * Notes:
+ *    During error handling, the kernel thread will be sleeping waiting
+ *    for some action to complete on the device.  our only job is to
+ *    record that it timed out, and to wake up the thread.
+ **/
+static void scsi_eh_times_out(Scsi_Cmnd *scmd)
  {
-       SCpnt->eh_state = SCSI_STATE_TIMEOUT;
-       SCSI_LOG_ERROR_RECOVERY(5, printk("In scsi_eh_times_out %p\n", SCpnt));
+       scsi_eh_eflags_set(scmd, SCSI_EH_REC_TIMEOUT);
+       SCSI_LOG_ERROR_RECOVERY(3, printk("in scsi_eh_times_out %p\n", scmd));
  
-       if (SCpnt->host->eh_action != NULL)
-               up(SCpnt->host->eh_action);
+       if (scmd->host->eh_action != NULL)
+               up(scmd->host->eh_action);
         else
-               printk("Missing scsi error handler thread\n");
+               printk("missing scsi error handler thread\n");
  }
  
-
-/*
- * Function:    scsi_eh_done()
- *
- * Purpose:     Completion function for error handling.
- *
- * Arguments:   SCpnt   - command that is timing out.
- *
- * Returns:     Nothing.
- *
- * Notes:       During error handling, the kernel thread will be sleeping
- *              waiting for some action to complete on the device.  Our only
- *              job is to record that the action completed, and to wake up the
- *              thread.
- */
-STATIC
-void scsi_eh_done(Scsi_Cmnd * SCpnt)
+/**
+ * scsi_eh_done - Completion function for error handling.
+ * @scmd:      Cmd that is done.
+ **/
+static void scsi_eh_done(Scsi_Cmnd *scmd)
  {
         int     rtn;
  
         /*
-        * If the timeout handler is already running, then just set the
-        * flag which says we finished late, and return.  We have no
+        * if the timeout handler is already running, then just set the
+        * flag which says we finished late, and return.  we have no
          * way of stopping the timeout handler from running, so we must
          * always defer to it.
          */
-       rtn = del_timer(&SCpnt->eh_timeout);
+       rtn = del_timer(&scmd->eh_timeout);
         if (!rtn) {
-               SCpnt->done_late = 1;
                 return;
         }
  
-       SCpnt->request->rq_status = RQ_SCSI_DONE;
+       scmd->request->rq_status = RQ_SCSI_DONE;
  
-       SCpnt->owner = SCSI_OWNER_ERROR_HANDLER;
-       SCpnt->eh_state = SUCCESS;
+       scmd->owner = SCSI_OWNER_ERROR_HANDLER;
  
-       SCSI_LOG_ERROR_RECOVERY(5, printk("In eh_done %p result:%x\n", SCpnt,
-                                         SCpnt->result));
+       SCSI_LOG_ERROR_RECOVERY(3, printk("in eh_done %p result:%x\n", scmd,
+                                         scmd->result));
  
-       if (SCpnt->host->eh_action != NULL)
-               up(SCpnt->host->eh_action);
+       if (scmd->host->eh_action != NULL)
+               up(scmd->host->eh_action);
  }
  
-/*
- * Function:    scsi_eh_action_done()
+/**
+ * scsi_send_eh_cmnd  - send a cmd to a device as part of error recovery.
+ * @scmd:      SCSI Cmd to send.
+ * @timeout:   Timeout for cmd.
   *
- * Purpose:     Completion function for error handling.
- *
- * Arguments:   SCpnt   - command that is timing out.
- *              answer  - boolean that indicates whether operation succeeded.
- *
- * Returns:     Nothing.
- *
- * Notes:       This callback is only used for abort and reset operations.
- */
-STATIC
-void scsi_eh_action_done(Scsi_Cmnd * SCpnt, int answer)
+ * Notes:
+ *    The initialization of the structures is quite a bit different in
+ *    this case, and furthermore, there is a different completion handler
+ *    vs scsi_dispatch_cmd.
+ * Return value:
+ *    SUCCESS/FAILED
+ **/
+static int scsi_send_eh_cmnd(Scsi_Cmnd *scmd, int timeout)
  {
-       SCpnt->request->rq_status = RQ_SCSI_DONE;
+       unsigned long flags;
+       struct Scsi_Host *host = scmd->host;
+       int rtn = SUCCESS;
  
-       SCpnt->owner = SCSI_OWNER_ERROR_HANDLER;
-       SCpnt->eh_state = (answer ? SUCCESS : FAILED);
+       ASSERT_LOCK(host->host_lock, 0);
  
-       if (SCpnt->host->eh_action != NULL)
-               up(SCpnt->host->eh_action);
-}
+retry:
+       /*
+        * we will use a queued command if possible, otherwise we will
+        * emulate the queuing and calling of completion function ourselves.
+        */
+       scmd->owner = SCSI_OWNER_LOWLEVEL;
  
-/*
- * Function:  scsi_sense_valid()
- *
- * Purpose:     Determine whether a host has automatically obtained sense
- *              information or not.  If we have it, then give a recommendation
- *              as to what we should do next.
- */
-int scsi_sense_valid(Scsi_Cmnd * SCpnt)
-{
-       if (((SCpnt->sense_buffer[0] & 0x70) >> 4) != 7) {
-               return FALSE;
-       }
-       return TRUE;
-}
+       if (host->can_queue) {
+               DECLARE_MUTEX_LOCKED(sem);
  
-/*
- * Function:  scsi_eh_retry_command()
- *
- * Purpose:     Retry the original command
- *
- * Returns:     SUCCESS - we were able to get the sense data.
- *              FAILED  - we were not able to get the sense data.
- * 
- * Notes:       This function will *NOT* return until the command either
- *              times out, or it completes.
- */
-STATIC int scsi_eh_retry_command(Scsi_Cmnd * SCpnt)
-{
-       memcpy((void *) SCpnt->cmnd, (void *) SCpnt->data_cmnd,
-              sizeof(SCpnt->data_cmnd));
-       SCpnt->request_buffer = SCpnt->buffer;
-       SCpnt->request_bufflen = SCpnt->bufflen;
-       SCpnt->use_sg = SCpnt->old_use_sg;
-       SCpnt->cmd_len = SCpnt->old_cmd_len;
-       SCpnt->sc_data_direction = SCpnt->sc_old_data_direction;
-       SCpnt->underflow = SCpnt->old_underflow;
+               scsi_add_timer(scmd, timeout, scsi_eh_times_out);
+
+               /*
+                * set up the semaphore so we wait for the command to complete.
+                */
+               scmd->host->eh_action = &sem;
+               scmd->request->rq_status = RQ_SCSI_BUSY;
+
+               spin_lock_irqsave(scmd->host->host_lock, flags);
+               host->hostt->queuecommand(scmd, scsi_eh_done);
+               spin_unlock_irqrestore(scmd->host->host_lock, flags);
+
+               down(&sem);
+
+               scmd->host->eh_action = NULL;
+
+               /*
+                * see if timeout.  if so, tell the host to forget about it.
+                * in other words, we don't want a callback any more.
+                */
+               if (scsi_eh_eflags_chk(scmd, SCSI_EH_REC_TIMEOUT)) {
+                       scsi_eh_eflags_clr(scmd,  SCSI_EH_REC_TIMEOUT);
+                        scmd->owner = SCSI_OWNER_LOWLEVEL;
+
+                       /*
+                        * as far as the low level driver is
+                        * concerned, this command is still active, so
+                        * we must give the low level driver a chance
+                        * to abort it. (db) 
+                        *
+                        * FIXME(eric) - we are not tracking whether we could
+                        * abort a timed out command or not.  not sure how
+                        * we should treat them differently anyways.
+                        */
+                       spin_lock_irqsave(scmd->host->host_lock, flags);
+                       if (scmd->host->hostt->eh_abort_handler)
+                               scmd->host->hostt->eh_abort_handler(scmd);
+                       spin_unlock_irqrestore(scmd->host->host_lock, flags);
+                       
+                       scmd->request->rq_status = RQ_SCSI_DONE;
+                       scmd->owner = SCSI_OWNER_ERROR_HANDLER;
+                       
+                       rtn = FAILED;
+               }
+               SCSI_LOG_ERROR_RECOVERY(3, printk("%s: %p rtn:%x\n",
+                                                 __FUNCTION__, scmd,
+                                                 rtn));
+       } else {
+               int temp;
  
-       scsi_send_eh_cmnd(SCpnt, SCpnt->timeout_per_command);
+               /*
+                * we damn well had better never use this code.  there is no
+                * timeout protection here, since we would end up waiting in
+                * the actual low level driver, we don't know how to wake it up.
+                */
+               spin_lock_irqsave(host->host_lock, flags);
+               temp = host->hostt->command(scmd);
+               spin_unlock_irqrestore(host->host_lock, flags);
+
+               scmd->result = temp;
+               /* fall through to code below to examine status. */
+       }
  
         /*
-        * Hey, we are done.  Let's look to see what happened.
+        * now examine the actual status codes to see whether the command
+        * actually did complete normally.
          */
-       return SCpnt->eh_state;
+       if (rtn == SUCCESS) {
+               int ret = scsi_eh_completed_normally(scmd);
+               SCSI_LOG_ERROR_RECOVERY(3,
+                       printk("%s: scsi_eh_completed_normally %x\n",
+                              __FUNCTION__, ret));
+               switch (ret) {
+               case SUCCESS:
+                       break;
+               case NEEDS_RETRY:
+                       goto retry;
+               case FAILED:
+               default:
+                       rtn = FAILED;
+                       break;
+               }
+       }
+
+       return rtn;
  }
  
-/*
- * Function:  scsi_request_sense()
- *
- * Purpose:     Request sense data from a particular target.
+/**
+ * scsi_request_sense - Request sense data from a particular target.
+ * @scmd:      SCSI cmd for request sense.
   *
- * Returns:     SUCCESS - we were able to get the sense data.
- *              FAILED  - we were not able to get the sense data.
- * 
- * Notes:       Some hosts automatically obtain this information, others
- *              require that we obtain it on our own.
- *
- *              This function will *NOT* return until the command either
- *              times out, or it completes.
- */
-STATIC int scsi_request_sense(Scsi_Cmnd * SCpnt)
+ * Notes:
+ *    Some hosts automatically obtain this information, others require
+ *    that we obtain it on our own. This function will *not* return until
+ *    the command either times out, or it completes.
+ **/
+static int scsi_request_sense(Scsi_Cmnd *scmd)
  {
         static unsigned char generic_sense[6] =
         {REQUEST_SENSE, 0, 0, 0, 255, 0};
         unsigned char scsi_result0[256], *scsi_result = NULL;
         int saved_result;
+       int rtn;
  
-       memcpy((void *) SCpnt->cmnd, (void *) generic_sense,
+       memcpy((void *) scmd->cmnd, (void *) generic_sense,
                sizeof(generic_sense));
  
-       if (SCpnt->device->scsi_level <= SCSI_2)
-               SCpnt->cmnd[1] = SCpnt->lun << 5;
+       if (scmd->device->scsi_level <= SCSI_2)
+               scmd->cmnd[1] = scmd->lun << 5;
  
-       scsi_result = (!SCpnt->host->hostt->unchecked_isa_dma)
+       scsi_result = (!scmd->host->hostt->unchecked_isa_dma)
             ? &scsi_result0[0] : kmalloc(512, GFP_ATOMIC | GFP_DMA);
  
         if (scsi_result == NULL) {
@@ -435,267 +626,275 @@ STATIC int scsi_request_sense(Scsi_Cmnd * SCpnt)
                 return FAILED;
         }
         /*
-        * Zero the sense buffer.  Some host adapters automatically always request
-        * sense, so it is not a good idea that SCpnt->request_buffer and
-        * SCpnt->sense_buffer point to the same address (DB).
-        * 0 is not a valid sense code. 
+        * zero the sense buffer.  some host adapters automatically always
+        * request sense, so it is not a good idea that
+        * scmd->request_buffer and scmd->sense_buffer point to the same
+        * address (db).  0 is not a valid sense code. 
          */
-       memset((void *) SCpnt->sense_buffer, 0, sizeof(SCpnt->sense_buffer));
+       memset((void *) scmd->sense_buffer, 0, sizeof(scmd->sense_buffer));
         memset((void *) scsi_result, 0, 256);
  
-       saved_result = SCpnt->result;
-       SCpnt->request_buffer = scsi_result;
-       SCpnt->request_bufflen = 256;
-       SCpnt->use_sg = 0;
-       SCpnt->cmd_len = COMMAND_SIZE(SCpnt->cmnd[0]);
-       SCpnt->sc_data_direction = SCSI_DATA_READ;
-       SCpnt->underflow = 0;
+       saved_result = scmd->result;
+       scmd->request_buffer = scsi_result;
+       scmd->request_bufflen = 256;
+       scmd->use_sg = 0;
+       scmd->cmd_len = COMMAND_SIZE(scmd->cmnd[0]);
+       scmd->sc_data_direction = SCSI_DATA_READ;
+       scmd->underflow = 0;
  
-       scsi_send_eh_cmnd(SCpnt, SENSE_TIMEOUT);
+       rtn = scsi_send_eh_cmnd(scmd, SENSE_TIMEOUT);
  
-       /* Last chance to have valid sense data */
-       if (!scsi_sense_valid(SCpnt))
-               memcpy((void *) SCpnt->sense_buffer,
-                      SCpnt->request_buffer,
-                      sizeof(SCpnt->sense_buffer));
+       /* last chance to have valid sense data */
+       if (!SCSI_SENSE_VALID(scmd))
+               memcpy((void *) scmd->sense_buffer,
+                      scmd->request_buffer,
+                      sizeof(scmd->sense_buffer));
  
         if (scsi_result != &scsi_result0[0] && scsi_result != NULL)
                 kfree(scsi_result);
  
         /*
-        * When we eventually call scsi_finish, we really wish to complete
-        * the original request, so let's restore the original data. (DB)
+        * when we eventually call scsi_finish, we really wish to complete
+        * the original request, so let's restore the original data. (db)
          */
-       memcpy((void *) SCpnt->cmnd, (void *) SCpnt->data_cmnd,
-              sizeof(SCpnt->data_cmnd));
-       SCpnt->result = saved_result;
-       SCpnt->request_buffer = SCpnt->buffer;
-       SCpnt->request_bufflen = SCpnt->bufflen;
-       SCpnt->use_sg = SCpnt->old_use_sg;
-       SCpnt->cmd_len = SCpnt->old_cmd_len;
-       SCpnt->sc_data_direction = SCpnt->sc_old_data_direction;
-       SCpnt->underflow = SCpnt->old_underflow;
+       memcpy((void *) scmd->cmnd, (void *) scmd->data_cmnd,
+              sizeof(scmd->data_cmnd));
+       scmd->result = saved_result;
+       scmd->request_buffer = scmd->buffer;
+       scmd->request_bufflen = scmd->bufflen;
+       scmd->use_sg = scmd->old_use_sg;
+       scmd->cmd_len = scmd->old_cmd_len;
+       scmd->sc_data_direction = scmd->sc_old_data_direction;
+       scmd->underflow = scmd->old_underflow;
  
         /*
-        * Hey, we are done.  Let's look to see what happened.
+        * hey, we are done.  let's look to see what happened.
          */
-       return SCpnt->eh_state;
+       return rtn;
  }
  
-/*
- * Function:  scsi_test_unit_ready()
+/**
+ * scsi_eh_retry_cmd - Retry the original command
+ * @scmd:      Original failed SCSI cmd.
   *
- * Purpose:     Run test unit ready command to see if the device is talking to us or not.
- *
- */
-STATIC int scsi_test_unit_ready(Scsi_Cmnd * SCpnt)
+ * Notes:
+ *    This function will *not* return until the command either times out,
+ *    or it completes.
+ **/
+static int scsi_eh_retry_cmd(Scsi_Cmnd *scmd)
  {
-       static unsigned char tur_command[6] =
-       {TEST_UNIT_READY, 0, 0, 0, 0, 0};
-
-       memcpy((void *) SCpnt->cmnd, (void *) tur_command,
-              sizeof(tur_command));
+       memcpy((void *) scmd->cmnd, (void *) scmd->data_cmnd,
+              sizeof(scmd->data_cmnd));
+       scmd->request_buffer = scmd->buffer;
+       scmd->request_bufflen = scmd->bufflen;
+       scmd->use_sg = scmd->old_use_sg;
+       scmd->cmd_len = scmd->old_cmd_len;
+       scmd->sc_data_direction = scmd->sc_old_data_direction;
+       scmd->underflow = scmd->old_underflow;
+
+       return scsi_send_eh_cmnd(scmd, scmd->timeout_per_command);
+}
  
-       if (SCpnt->device->scsi_level <= SCSI_2)
-               SCpnt->cmnd[1] = SCpnt->lun << 5;
+/**
+ * scsi_eh_finish_cmd - Handle a cmd that eh is finished with.
+ * @scmd:      Original SCSI cmd that eh has finished.
+ * @shost:     SCSI host that cmd originally failed on.
+ *
+ * Notes:
+ *    We don't want to use the normal command completion while we are are
+ *    still handling errors - it may cause other commands to be queued,
+ *    and that would disturb what we are doing.  thus we really want to
+ *    keep a list of pending commands for final completion, and once we
+ *    are ready to leave error handling we handle completion for real.
+ **/
+static void scsi_eh_finish_cmd(Scsi_Cmnd *scmd, struct Scsi_Host *shost)
+{
+       shost->host_failed--;
+       scmd->state = SCSI_STATE_BHQUEUE;
+       scsi_eh_eflags_clr_all(scmd);
  
         /*
-        * Zero the sense buffer.  The SCSI spec mandates that any
-        * untransferred sense data should be interpreted as being zero.
+        * set this back so that the upper level can correctly free up
+        * things.
          */
-       memset((void *) SCpnt->sense_buffer, 0, sizeof(SCpnt->sense_buffer));
+       scmd->use_sg = scmd->old_use_sg;
+       scmd->sc_data_direction = scmd->sc_old_data_direction;
+       scmd->underflow = scmd->old_underflow;
+}
  
-       SCpnt->request_buffer = NULL;
-       SCpnt->request_bufflen = 0;
-       SCpnt->use_sg = 0;
-       SCpnt->cmd_len = COMMAND_SIZE(SCpnt->cmnd[0]);
-       SCpnt->underflow = 0;
-       SCpnt->sc_data_direction = SCSI_DATA_NONE;
+/**
+ * scsi_eh_get_sense - Get device sense data.
+ * @sc_todo:   list of cmds that have failed.
+ * @shost:     scsi host being recovered.
+ *
+ * Description:
+ *    See if we need to request sense information.  if so, then get it
+ *    now, so we have a better idea of what to do.  
+ *
+ *
+ * Notes:
+ *    This has the unfortunate side effect that if a shost adapter does
+ *    not automatically request sense information, that we end up shutting
+ *    it down before we request it.  All shosts should be doing this
+ *    anyways, so for now all I have to say is tough noogies if you end up
+ *    in here.  On second thought, this is probably a good idea.  We
+ *    *really* want to give authors an incentive to automatically request
+ *    this.
+ *
+ *    In 2.5 this capability will be going away.
+ **/
+static int scsi_eh_get_sense(Scsi_Cmnd *sc_todo, struct Scsi_Host *shost)
+{
+       int rtn;
+       Scsi_Cmnd *scmd;
  
-       scsi_send_eh_cmnd(SCpnt, SENSE_TIMEOUT);
+       SCSI_LOG_ERROR_RECOVERY(3, printk("%s: checking to see if we need"
+                                         " to request sense\n",
+                                         __FUNCTION__));
  
-       /*
-        * When we eventually call scsi_finish, we really wish to complete
-        * the original request, so let's restore the original data. (DB)
-        */
-       memcpy((void *) SCpnt->cmnd, (void *) SCpnt->data_cmnd,
-              sizeof(SCpnt->data_cmnd));
-       SCpnt->request_buffer = SCpnt->buffer;
-       SCpnt->request_bufflen = SCpnt->bufflen;
-       SCpnt->use_sg = SCpnt->old_use_sg;
-       SCpnt->cmd_len = SCpnt->old_cmd_len;
-       SCpnt->sc_data_direction = SCpnt->sc_old_data_direction;
-       SCpnt->underflow = SCpnt->old_underflow;
+       for (scmd = sc_todo; scmd; scmd = scmd->bh_next) {
+               if (!scsi_eh_eflags_chk(scmd, SCSI_EH_CMD_FAILED) ||
+                   SCSI_SENSE_VALID(scmd))
+                       continue;
  
-       /*
-        * Hey, we are done.  Let's look to see what happened.
-        */
-       SCSI_LOG_ERROR_RECOVERY(3,
-               printk("scsi_test_unit_ready: SCpnt %p eh_state %x\n",
-               SCpnt, SCpnt->eh_state));
-       return SCpnt->eh_state;
-}
+               SCSI_LOG_ERROR_RECOVERY(2, printk("%s: requesting sense"
+                                                 "for %d\n", __FUNCTION__,
+                                                 scmd->target));
+               rtn = scsi_request_sense(scmd);
+               if (rtn != SUCCESS)
+                       continue;
  
-/*
- * This would normally need to get the IO request lock,
- * but as it doesn't actually touch anything that needs
- * to be locked we can avoid the lock here..
- */
-STATIC
-void scsi_sleep_done(struct semaphore *sem)
-{
-       if (sem != NULL) {
-               up(sem);
-       }
-}
+               SCSI_LOG_ERROR_RECOVERY(3, printk("sense requested for %p"
+                                                 "- result %x\n", scmd,
+                                                 scmd->result));
+               SCSI_LOG_ERROR_RECOVERY(3, print_sense("bh", scmd));
  
-void scsi_sleep(int timeout)
-{
-       DECLARE_MUTEX_LOCKED(sem);
-       struct timer_list timer;
+               rtn = scsi_decide_disposition(scmd);
  
-       init_timer(&timer);
-       timer.data = (unsigned long) &sem;
-       timer.expires = jiffies + timeout;
-       timer.function = (void (*)(unsigned long)) scsi_sleep_done;
+               /*
+                * if the result was normal, then just pass it along to the
+                * upper level.
+                */
+               if (rtn == SUCCESS)
+                       scsi_eh_finish_cmd(scmd, shost);
+               if (rtn != NEEDS_RETRY)
+                       continue;
  
-       SCSI_LOG_ERROR_RECOVERY(5, printk("Sleeping for timer tics %d\n", timeout));
+               /*
+                * we only come in here if we want to retry a
+                * command.  the test to see whether the command
+                * should be retried should be keeping track of the
+                * number of tries, so we don't end up looping, of
+                * course.
+                */
+               scmd->state = NEEDS_RETRY;
+               rtn = scsi_eh_retry_cmd(scmd);
+               if (rtn != SUCCESS)
+                       continue;
  
-       add_timer(&timer);
+               /*
+                * we eventually hand this one back to the top level.
+                */
+               scsi_eh_finish_cmd(scmd, shost);
+       }
  
-       down(&sem);
-       del_timer(&timer);
+       return shost->host_failed;
  }
  
-/*
- * Function:  scsi_send_eh_cmnd
+/**
+ * scsi_try_to_abort_cmd - Ask host to abort a running command.
+ * @scmd:      SCSI cmd to abort from Lower Level.
   *
- * Purpose:     Send a command out to a device as part of error recovery.
- *
- * Notes:       The initialization of the structures is quite a bit different
- *              in this case, and furthermore, there is a different completion
- *              handler.
- */
-STATIC void scsi_send_eh_cmnd(Scsi_Cmnd * SCpnt, int timeout)
+ * Notes:
+ *    This function will not return until the user's completion function
+ *    has been called.  there is no timeout on this operation.  if the
+ *    author of the low-level driver wishes this operation to be timed,
+ *    they can provide this facility themselves.  helper functions in
+ *    scsi_error.c can be supplied to make this easier to do.
+ **/
+static int scsi_try_to_abort_cmd(Scsi_Cmnd *scmd)
  {
+       int rtn = FAILED;
         unsigned long flags;
-       struct Scsi_Host *host = SCpnt->host;
-
-       ASSERT_LOCK(host->host_lock, 0);
  
-retry:
+       if (scmd->host->hostt->eh_abort_handler == NULL) {
+               return rtn;
+       }
         /*
-        * We will use a queued command if possible, otherwise we will
-        * emulate the queuing and calling of completion function ourselves.
+        * scsi_done was called just after the command timed out and before
+        * we had a chance to process it. (db)
          */
-       SCpnt->owner = SCSI_OWNER_LOWLEVEL;
-
-       if (host->can_queue) {
-               DECLARE_MUTEX_LOCKED(sem);
-
-               SCpnt->eh_state = SCSI_STATE_QUEUED;
+       if (scmd->serial_number == 0)
+               return SUCCESS;
  
-               scsi_add_timer(SCpnt, timeout, scsi_eh_times_out);
+       scmd->owner = SCSI_OWNER_LOWLEVEL;
  
-               /*
-                * Set up the semaphore so we wait for the command to complete.
-                */
-               SCpnt->host->eh_action = &sem;
-               SCpnt->request->rq_status = RQ_SCSI_BUSY;
+       spin_lock_irqsave(scmd->host->host_lock, flags);
+       rtn = scmd->host->hostt->eh_abort_handler(scmd);
+       spin_unlock_irqrestore(scmd->host->host_lock, flags);
+       return rtn;
+}
  
-               spin_lock_irqsave(SCpnt->host->host_lock, flags);
-               host->hostt->queuecommand(SCpnt, scsi_eh_done);
-               spin_unlock_irqrestore(SCpnt->host->host_lock, flags);
+/**
+ * scsi_eh_tur - Send TUR to device.
+ * @scmd:      Scsi cmd to send TUR
+ *
+ * Return value:
+ *    0 - Device is ready. 1 - Device NOT ready.
+ **/
+static int scsi_eh_tur(Scsi_Cmnd *scmd)
+{
+       static unsigned char tur_command[6] =
+       {TEST_UNIT_READY, 0, 0, 0, 0, 0};
+       int rtn;
  
-               down(&sem);
+       memcpy((void *) scmd->cmnd, (void *) tur_command,
+              sizeof(tur_command));
  
-               SCpnt->host->eh_action = NULL;
+       if (scmd->device->scsi_level <= SCSI_2)
+               scmd->cmnd[1] = scmd->lun << 5;
  
-               /*
-                * See if timeout.  If so, tell the host to forget about it.
-                * In other words, we don't want a callback any more.
-                */
-               if (SCpnt->eh_state == SCSI_STATE_TIMEOUT) {
-                        SCpnt->owner = SCSI_OWNER_LOWLEVEL;
-
-                       /*
-                        * As far as the low level driver is
-                        * concerned, this command is still active, so
-                        * we must give the low level driver a chance
-                        * to abort it. (DB) 
-                        *
-                        * FIXME(eric) - we are not tracking whether we could
-                        * abort a timed out command or not.  Not sure how
-                        * we should treat them differently anyways.
-                        */
-                       spin_lock_irqsave(SCpnt->host->host_lock, flags);
-                       if (SCpnt->host->hostt->eh_abort_handler)
-                               SCpnt->host->hostt->eh_abort_handler(SCpnt);
-                       spin_unlock_irqrestore(SCpnt->host->host_lock, flags);
-                       
-                       SCpnt->request->rq_status = RQ_SCSI_DONE;
-                       SCpnt->owner = SCSI_OWNER_ERROR_HANDLER;
-                       
-                       SCpnt->eh_state = FAILED;
-               }
-               SCSI_LOG_ERROR_RECOVERY(5, printk("send_eh_cmnd: %p eh_state:%x\n",
-                                               SCpnt, SCpnt->eh_state));
-       } else {
-               int temp;
+       /*
+        * zero the sense buffer.  the scsi spec mandates that any
+        * untransferred sense data should be interpreted as being zero.
+        */
+       memset((void *) scmd->sense_buffer, 0, sizeof(scmd->sense_buffer));
  
-               /*
-                * We damn well had better never use this code.  There is no
-                * timeout protection here, since we would end up waiting in
-                * the actual low level driver, we don't know how to wake it up.
-                */
-               spin_lock_irqsave(host->host_lock, flags);
-               temp = host->hostt->command(SCpnt);
-               spin_unlock_irqrestore(host->host_lock, flags);
+       scmd->request_buffer = NULL;
+       scmd->request_bufflen = 0;
+       scmd->use_sg = 0;
+       scmd->cmd_len = COMMAND_SIZE(scmd->cmnd[0]);
+       scmd->underflow = 0;
+       scmd->sc_data_direction = SCSI_DATA_NONE;
  
-               SCpnt->result = temp;
-               /* Fall through to code below to examine status. */
-               SCpnt->eh_state = SUCCESS;
-       }
+       rtn = scsi_send_eh_cmnd(scmd, SENSE_TIMEOUT);
  
         /*
-        * Now examine the actual status codes to see whether the command
-        * actually did complete normally.
+        * when we eventually call scsi_finish, we really wish to complete
+        * the original request, so let's restore the original data. (db)
          */
-       if (SCpnt->eh_state == SUCCESS) {
-               int ret = scsi_eh_completed_normally(SCpnt);
-               SCSI_LOG_ERROR_RECOVERY(3,
-                       printk("scsi_send_eh_cmnd: scsi_eh_completed_normally %x\n", ret));
-               switch (ret) {
-               case SUCCESS:
-                       SCpnt->eh_state = SUCCESS;
-                       break;
-               case NEEDS_RETRY:
-                       goto retry;
-               case FAILED:
-               default:
-                       SCpnt->eh_state = FAILED;
-                       break;
-               }
-       } else {
-               SCpnt->eh_state = FAILED;
-       }
-}
+       memcpy((void *) scmd->cmnd, (void *) scmd->data_cmnd,
+              sizeof(scmd->data_cmnd));
+       scmd->request_buffer = scmd->buffer;
+       scmd->request_bufflen = scmd->bufflen;
+       scmd->use_sg = scmd->old_use_sg;
+       scmd->cmd_len = scmd->old_cmd_len;
+       scmd->sc_data_direction = scmd->sc_old_data_direction;
+       scmd->underflow = scmd->old_underflow;
  
-/*
- * Function:  scsi_unit_is_ready()
- *
- * Purpose:     Called after TEST_UNIT_READY is run, to test to see if
- *              the unit responded in a way that indicates it is ready.
- */
-STATIC int scsi_unit_is_ready(Scsi_Cmnd * SCpnt)
-{
-       if (SCpnt->result) {
-               if (((driver_byte(SCpnt->result) & DRIVER_SENSE) ||
-                    (status_byte(SCpnt->result) & CHECK_CONDITION)) &&
-                   ((SCpnt->sense_buffer[0] & 0x70) >> 4) == 7) {
-                       if (((SCpnt->sense_buffer[2] & 0xf) != NOT_READY) &&
-                           ((SCpnt->sense_buffer[2] & 0xf) != UNIT_ATTENTION) &&
-                           ((SCpnt->sense_buffer[2] & 0xf) != ILLEGAL_REQUEST)) {
+       /*
+        * hey, we are done.  let's look to see what happened.
+        */
+       SCSI_LOG_ERROR_RECOVERY(3,
+               printk("%s: scmd %p rtn %x\n",
+               __FUNCTION__, scmd, rtn));
+       if ((rtn == SUCCESS) && scmd->result) {
+               if (((driver_byte(scmd->result) & DRIVER_SENSE) ||
+                    (status_byte(scmd->result) & CHECK_CONDITION)) &&
+                   (SCSI_SENSE_VALID(scmd))) {
+                       if (((scmd->sense_buffer[2] & 0xf) != NOT_READY) &&
+                           ((scmd->sense_buffer[2] & 0xf) != UNIT_ATTENTION) &&
+                           ((scmd->sense_buffer[2] & 0xf) != ILLEGAL_REQUEST)) {
                                 return 0;
                         }
                 }
@@ -703,275 +902,385 @@ STATIC int scsi_unit_is_ready(Scsi_Cmnd * SCpnt)
         return 1;
  }
  
-/*
- * Function:    scsi_eh_finish_command
- *
- * Purpose:     Handle a command that we are finished with WRT error handling.
- *
- * Arguments:   SClist - pointer to list into which we are putting completed commands.
- *              SCpnt  - command that is completing
- *
- * Notes:       We don't want to use the normal command completion while we are
- *              are still handling errors - it may cause other commands to be queued,
- *              and that would disturb what we are doing.  Thus we really want to keep
- *              a list of pending commands for final completion, and once we
- *              are ready to leave error handling we handle completion for real.
- */
-STATIC void scsi_eh_finish_command(Scsi_Cmnd ** SClist, Scsi_Cmnd * SCpnt)
+/**
+ * scsi_eh_abort_cmd - abort a timed-out cmd.
+ * @sc_todo:   A list of cmds that have failed.
+ * @shost:     scsi host being recovered.
+ *
+ * Decription:
+ *    Try and see whether or not it makes sense to try and abort the
+ *    running command.  this only works out to be the case if we have one
+ *    command that has timed out.  if the command simply failed, it makes
+ *    no sense to try and abort the command, since as far as the shost
+ *    adapter is concerned, it isn't running.
+ **/
+static int scsi_eh_abort_cmd(Scsi_Cmnd *sc_todo, struct Scsi_Host *shost)
  {
-       SCpnt->state = SCSI_STATE_BHQUEUE;
-       SCpnt->bh_next = *SClist;
-       /*
-        * Set this back so that the upper level can correctly free up
-        * things.
-        */
-       SCpnt->use_sg = SCpnt->old_use_sg;
-       SCpnt->sc_data_direction = SCpnt->sc_old_data_direction;
-       SCpnt->underflow = SCpnt->old_underflow;
-       *SClist = SCpnt;
+
+       int rtn;
+       Scsi_Cmnd *scmd;
+
+       SCSI_LOG_ERROR_RECOVERY(3, printk("%s: checking to see if we need"
+                                         " to abort cmd\n", __FUNCTION__));
+
+       for (scmd = sc_todo; scmd; scmd = scmd->bh_next) {
+               if (!scsi_eh_eflags_chk(scmd, SCSI_EH_CMD_TIMEOUT))
+                       continue;
+
+               rtn = scsi_try_to_abort_cmd(scmd);
+               if (rtn == SUCCESS) {
+                       if (scsi_eh_tur(scmd)) {
+                               rtn = scsi_eh_retry_cmd(scmd);
+                               if (rtn == SUCCESS)
+                                       scsi_eh_finish_cmd(scmd, shost);
+                       }
+               }
+       }
+       return shost->host_failed;
  }
  
-/*
- * Function:  scsi_try_to_abort_command
+/**
+ * scsi_try_bus_device_reset - Ask host to perform a BDR on a dev
+ * @scmd:      SCSI cmd used to send BDR       
   *
- * Purpose:     Ask host adapter to abort a running command.
- *
- * Returns:     FAILED          Operation failed or not supported.
- *              SUCCESS         Succeeded.
- *
- * Notes:       This function will not return until the user's completion
- *              function has been called.  There is no timeout on this
- *              operation.  If the author of the low-level driver wishes
- *              this operation to be timed, they can provide this facility
- *              themselves.  Helper functions in scsi_error.c can be supplied
- *              to make this easier to do.
- *
- * Notes:       It may be possible to combine this with all of the reset
- *              handling to eliminate a lot of code duplication.  I don't
- *              know what makes more sense at the moment - this is just a
- *              prototype.
- */
-STATIC int scsi_try_to_abort_command(Scsi_Cmnd * SCpnt, int timeout)
+ * Notes:
+ *    There is no timeout for this operation.  if this operation is
+ *    unreliable for a given host, then the host itself needs to put a
+ *    timer on it, and set the host back to a consistent state prior to
+ *    returning.
+ **/
+static int scsi_try_bus_device_reset(Scsi_Cmnd *scmd)
  {
-       int rtn;
         unsigned long flags;
+       int rtn = FAILED;
  
-       SCpnt->eh_state = FAILED;       /* Until we come up with something better */
-
-       if (SCpnt->host->hostt->eh_abort_handler == NULL) {
-               return FAILED;
+       if (scmd->host->hostt->eh_device_reset_handler == NULL) {
+               return rtn;
         }
-       /* 
-        * scsi_done was called just after the command timed out and before
-        * we had a chance to process it. (DB)
-        */
-       if (SCpnt->serial_number == 0)
-               return SUCCESS;
+       scmd->owner = SCSI_OWNER_LOWLEVEL;
  
-       SCpnt->owner = SCSI_OWNER_LOWLEVEL;
+       spin_lock_irqsave(scmd->host->host_lock, flags);
+       rtn = scmd->host->hostt->eh_device_reset_handler(scmd);
+       spin_unlock_irqrestore(scmd->host->host_lock, flags);
  
-       spin_lock_irqsave(SCpnt->host->host_lock, flags);
-       rtn = SCpnt->host->hostt->eh_abort_handler(SCpnt);
-       spin_unlock_irqrestore(SCpnt->host->host_lock, flags);
         return rtn;
  }
  
-/*
- * Function:  scsi_try_bus_device_reset
- *
- * Purpose:     Ask host adapter to perform a bus device reset for a given
- *              device.
+/**
+ * scsi_eh_bus_device_reset - send bdr is needed
+ * @sc_todo:   a list of cmds that have failed.
+ * @shost:     scsi host being recovered.
   *
- * Returns:     FAILED          Operation failed or not supported.
- *              SUCCESS         Succeeded.
- *
- * Notes:       There is no timeout for this operation.  If this operation is
- *              unreliable for a given host, then the host itself needs to put a
- *              timer on it, and set the host back to a consistent state prior
- *              to returning.
- */
-STATIC int scsi_try_bus_device_reset(Scsi_Cmnd * SCpnt, int timeout)
+ * Notes:
+ *    Try a bus device reset.  still, look to see whether we have multiple
+ *    devices that are jammed or not - if we have multiple devices, it
+ *    makes no sense to try bus_device_reset - we really would need to try
+ *    a bus_reset instead. 
+ **/
+static int scsi_eh_bus_device_reset(Scsi_Cmnd *sc_todo, struct Scsi_Host *shost)
  {
-       unsigned long flags;
         int rtn;
+       Scsi_Cmnd *scmd;
+       Scsi_Device *sdev;
  
-       SCpnt->eh_state = FAILED;       /* Until we come up with something better */
+       SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Trying BDR\n", __FUNCTION__));
  
-       if (SCpnt->host->hostt->eh_device_reset_handler == NULL) {
-               return FAILED;
-       }
-       SCpnt->owner = SCSI_OWNER_LOWLEVEL;
+       for (sdev = shost->host_queue; sdev; sdev = sdev->next) {
+               for (scmd = sc_todo; scmd; scmd = scmd->bh_next)
+                       if ((scmd->device == sdev) &&
+                           scsi_eh_eflags_chk(scmd, SCSI_EH_CMD_ERR))
+                               break;
  
-       spin_lock_irqsave(SCpnt->host->host_lock, flags);
-       rtn = SCpnt->host->hostt->eh_device_reset_handler(SCpnt);
-       spin_unlock_irqrestore(SCpnt->host->host_lock, flags);
+               if (!scmd)
+                       continue;
  
-       if (rtn == SUCCESS)
-               SCpnt->eh_state = SUCCESS;
+               /*
+                * ok, we have a device that is having problems.  try and send
+                * a bus device reset to it.
+                */
+               rtn = scsi_try_bus_device_reset(scmd);
+               if ((rtn == SUCCESS) && (scsi_eh_tur(scmd)))
+                               for (scmd = sc_todo; scmd; scmd = scmd->bh_next)
+                                       if ((scmd->device == sdev) &&
+                                           scsi_eh_eflags_chk(scmd, SCSI_EH_CMD_ERR)) {
+                                               rtn = scsi_eh_retry_cmd(scmd);
+                                               if (rtn == SUCCESS)
+                                                       scsi_eh_finish_cmd(scmd, shost);
+                                       }
+       }
  
-       return SCpnt->eh_state;
+       return shost->host_failed;
  }
  
-/*
- * Function:  scsi_try_bus_reset
- *
- * Purpose:     Ask host adapter to perform a bus reset for a host.
- *
- * Returns:     FAILED          Operation failed or not supported.
- *              SUCCESS         Succeeded.
- *
- * Notes:       
- */
-STATIC int scsi_try_bus_reset(Scsi_Cmnd * SCpnt)
+/**
+ * scsi_try_bus_reset - ask host to perform a bus reset
+ * @scmd:      SCSI cmd to send bus reset.
+ **/
+static int scsi_try_bus_reset(Scsi_Cmnd *scmd)
  {
         unsigned long flags;
         int rtn;
+       Scsi_Device *sdev;
  
-       SCpnt->eh_state = FAILED;       /* Until we come up with something better */
-       SCpnt->owner = SCSI_OWNER_LOWLEVEL;
-       SCpnt->serial_number_at_timeout = SCpnt->serial_number;
+       SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Snd Bus RST\n",
+                                         __FUNCTION__));
+       scmd->owner = SCSI_OWNER_LOWLEVEL;
+       scmd->serial_number_at_timeout = scmd->serial_number;
  
-       if (SCpnt->host->hostt->eh_bus_reset_handler == NULL) {
+       if (scmd->host->hostt->eh_bus_reset_handler == NULL)
                 return FAILED;
-       }
  
-       spin_lock_irqsave(SCpnt->host->host_lock, flags);
-       rtn = SCpnt->host->hostt->eh_bus_reset_handler(SCpnt);
-       spin_unlock_irqrestore(SCpnt->host->host_lock, flags);
+       spin_lock_irqsave(scmd->host->host_lock, flags);
+       rtn = scmd->host->hostt->eh_bus_reset_handler(scmd);
+       spin_unlock_irqrestore(scmd->host->host_lock, flags);
  
-       if (rtn == SUCCESS)
-               SCpnt->eh_state = SUCCESS;
-
-       /*
-        * If we had a successful bus reset, mark the command blocks to expect
-        * a condition code of unit attention.
-        */
-       scsi_sleep(BUS_RESET_SETTLE_TIME);
-       if (SCpnt->eh_state == SUCCESS) {
-               Scsi_Device *SDloop;
-               for (SDloop = SCpnt->host->host_queue; SDloop; SDloop = SDloop->next) {
-                       if (SCpnt->channel == SDloop->channel) {
-                               SDloop->was_reset = 1;
-                               SDloop->expecting_cc_ua = 1;
+       if (rtn == SUCCESS) {
+               scsi_sleep(BUS_RESET_SETTLE_TIME);
+               /*
+                * Mark all affected devices to expect a unit attention.
+                */
+               for (sdev = scmd->host->host_queue; sdev; sdev = sdev->next)
+                       if (scmd->channel == sdev->channel) {
+                               sdev->was_reset = 1;
+                               sdev->expecting_cc_ua = 1;
                         }
-               }
         }
-       return SCpnt->eh_state;
+       return rtn;
  }
  
-/*
- * Function:  scsi_try_host_reset
- *
- * Purpose:     Ask host adapter to reset itself, and the bus.
- *
- * Returns:     FAILED          Operation failed or not supported.
- *              SUCCESS         Succeeded.
- *
- * Notes:
- */
-STATIC int scsi_try_host_reset(Scsi_Cmnd * SCpnt)
+/**
+ * scsi_try_host_reset - ask host adapter to reset itself
+ * @scmd:      SCSI cmd to send hsot reset.
+ **/
+static int scsi_try_host_reset(Scsi_Cmnd *scmd)
  {
         unsigned long flags;
         int rtn;
+        Scsi_Device *sdev;
  
-       SCpnt->eh_state = FAILED;       /* Until we come up with something better */
-       SCpnt->owner = SCSI_OWNER_LOWLEVEL;
-       SCpnt->serial_number_at_timeout = SCpnt->serial_number;
+       SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Snd Host RST\n",
+                                         __FUNCTION__));
+       scmd->owner = SCSI_OWNER_LOWLEVEL;
+       scmd->serial_number_at_timeout = scmd->serial_number;
  
-       if (SCpnt->host->hostt->eh_host_reset_handler == NULL) {
+       if (scmd->host->hostt->eh_host_reset_handler == NULL)
                 return FAILED;
+
+       spin_lock_irqsave(scmd->host->host_lock, flags);
+       rtn = scmd->host->hostt->eh_host_reset_handler(scmd);
+       spin_unlock_irqrestore(scmd->host->host_lock, flags);
+
+       if (rtn == SUCCESS) {
+               scsi_sleep(HOST_RESET_SETTLE_TIME);
+               /*
+                * Mark all affected devices to expect a unit attention.
+                */
+               for (sdev = scmd->host->host_queue; sdev; sdev = sdev->next)
+                       if (scmd->channel == sdev->channel) {
+                               sdev->was_reset = 1;
+                               sdev->expecting_cc_ua = 1;
+                       }
         }
-       spin_lock_irqsave(SCpnt->host->host_lock, flags);
-       rtn = SCpnt->host->hostt->eh_host_reset_handler(SCpnt);
-       spin_unlock_irqrestore(SCpnt->host->host_lock, flags);
+       return rtn;
+}
  
-       if (rtn == SUCCESS)
-               SCpnt->eh_state = SUCCESS;
+/**
+ * scsi_eh_bus_host_reset - send a bus reset and on failure try host reset
+ * @sc_todo:   a list of cmds that have failed.
+ * @shost:     scsi host being recovered.
+ **/
+static int scsi_eh_bus_host_reset(Scsi_Cmnd *sc_todo, struct Scsi_Host *shost)
+{
+       int rtn;
+       Scsi_Cmnd *scmd;
+       Scsi_Cmnd *chan_scmd;
+       unsigned int channel;
  
         /*
-        * If we had a successful host reset, mark the command blocks to expect
-        * a condition code of unit attention.
+        * if we ended up here, we have serious problems.  the only thing left
+        * to try is a full bus reset.  if someone has grabbed the bus and isn't
+        * letting go, then perhaps this will help.
+        */
+       SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Try Bus/Host RST\n",
+                                         __FUNCTION__));
+
+       /* 
+        * we really want to loop over the various channels, and do this on
+        * a channel by channel basis.  we should also check to see if any
+        * of the failed commands are on soft_reset devices, and if so, skip
+        * the reset.  
          */
-       scsi_sleep(HOST_RESET_SETTLE_TIME);
-       if (SCpnt->eh_state == SUCCESS) {
-               Scsi_Device *SDloop;
-               for (SDloop = SCpnt->host->host_queue; SDloop; SDloop = SDloop->next) {
-                       SDloop->was_reset = 1;
-                       SDloop->expecting_cc_ua = 1;
+
+       for (channel = 0; channel <= shost->max_channel; channel++) {
+               for (scmd = sc_todo; scmd; scmd = scmd->bh_next) {
+                       if (!scsi_eh_eflags_chk(scmd, SCSI_EH_CMD_ERR))
+                               continue;
+                       if (channel == scmd->channel) {
+                               chan_scmd = scmd;
+                               break;
+                               /*
+                                * FIXME add back in some support for
+                                * soft_reset devices.
+                                */
+                       }
                 }
+
+               if (!scmd)
+                       continue;
+
+               /*
+                * we now know that we are able to perform a reset for the
+                * channel that scmd points to.
+                */
+               rtn = scsi_try_bus_reset(scmd);
+               if (rtn != SUCCESS)
+                       rtn = scsi_try_host_reset(scmd);
+
+               if (rtn == SUCCESS) {
+                       for (scmd = sc_todo; scmd; scmd = scmd->bh_next) {
+                               if (!scsi_eh_eflags_chk(scmd, SCSI_EH_CMD_ERR)
+                                   || channel != scmd->channel)
+                                       continue;
+                               if (scsi_eh_tur(scmd)) {
+                                       rtn = scsi_eh_retry_cmd(scmd);
+
+                                       if (rtn == SUCCESS)
+                                               scsi_eh_finish_cmd(scmd, shost);
+                               }
+                       }
+               }
+
         }
-       return SCpnt->eh_state;
+       return shost->host_failed;
  }
  
-/*
- * Function:  scsi_decide_disposition
+/**
+ * scsi_eh_offline_sdevs - offline scsi devices that fail to recover
+ * @sc_todo:   a list of cmds that have failed.
+ * @shost:     scsi host being recovered.
   *
- * Purpose:     Examine a command block that has come back from the low-level
- *              and figure out what to do next.
+ **/
+static void scsi_eh_offline_sdevs(Scsi_Cmnd *sc_todo, struct Scsi_Host *shost)
+{
+       Scsi_Cmnd *scmd;
+
+       for (scmd = sc_todo; scmd; scmd = scmd->bh_next) {
+               if (!scsi_eh_eflags_chk(scmd, SCSI_EH_CMD_ERR))
+                       continue;
+
+               printk(KERN_INFO "%s: Device set offline - not"
+                               "ready or command retry failed"
+                               "after error recovery: host"
+                               "%d channel %d id %d lun %d\n",
+                               __FUNCTION__, shost->host_no,
+                               scmd->device->channel,
+                               scmd->device->id,
+                               scmd->device->lun);
+               scmd->device->online = FALSE;
+               scsi_eh_finish_cmd(scmd, shost);
+       }
+       return;
+}
+
+/**
+ * scsi_sleep_done - timer function for scsi_sleep
+ * @sem:       semphore to signal
   *
- * Returns:     SUCCESS         - pass on to upper level.
- *              FAILED          - pass on to error handler thread.
- *              RETRY           - command should be retried.
- *              SOFTERR         - command succeeded, but we need to log
- *                                a soft error.
+ **/
+static
+void scsi_sleep_done(struct semaphore *sem)
+{
+       if (sem != NULL) {
+               up(sem);
+       }
+}
+
+/**
+ * scsi_sleep - sleep for specified timeout
+ * @timeout:   timeout value
   *
- * Notes:       This is *ONLY* called when we are examining the status
- *              after sending out the actual data command.  Any commands
- *              that are queued for error recovery (i.e. TEST_UNIT_READY)
- *              do *NOT* come through here.
+ **/
+void scsi_sleep(int timeout)
+{
+       DECLARE_MUTEX_LOCKED(sem);
+       struct timer_list timer;
+
+       init_timer(&timer);
+       timer.data = (unsigned long) &sem;
+       timer.expires = jiffies + timeout;
+       timer.function = (void (*)(unsigned long)) scsi_sleep_done;
+
+       SCSI_LOG_ERROR_RECOVERY(5, printk("sleeping for timer tics %d\n",
+                                         timeout));
+
+       add_timer(&timer);
+
+       down(&sem);
+       del_timer(&timer);
+}
+
+/**
+ * scsi_decide_disposition - Disposition a cmd on return from LLD.
+ * @scmd:      SCSI cmd to examine.
   *
- *              NOTE - When this routine returns FAILED, it means the error
- *              handler thread is woken.  In cases where the error code
- *              indicates an error that doesn't require the error handler
- *              thread (i.e. we don't need to abort/reset), then this function
- *              should return SUCCESS.
- */
-int scsi_decide_disposition(Scsi_Cmnd * SCpnt)
+ * Notes:
+ *    This is *only* called when we are examining the status after sending
+ *    out the actual data command.  any commands that are queued for error
+ *    recovery (i.e. test_unit_ready) do *not* come through here.
+ *
+ *    When this routine returns failed, it means the error handler thread
+ *    is woken.  in cases where the error code indicates an error that
+ *    doesn't require the error handler read (i.e. we don't need to
+ *    abort/reset), then this function should return SUCCESS.
+ **/
+int scsi_decide_disposition(Scsi_Cmnd *scmd)
  {
         int rtn;
  
         /*
-        * If the device is offline, then we clearly just pass the result back
+        * if the device is offline, then we clearly just pass the result back
          * up to the top level.
          */
-       if (SCpnt->device->online == FALSE) {
-               SCSI_LOG_ERROR_RECOVERY(5, printk("scsi_error.c: device offline - report as SUCCESS\n"));
+       if (scmd->device->online == FALSE) {
+               SCSI_LOG_ERROR_RECOVERY(5, printk("%s: device offline - report"
+                                                 "as SUCCESS\n",
+                                                 __FUNCTION__));
                 return SUCCESS;
         }
         /*
-        * First check the host byte, to see if there is anything in there
+        * first check the host byte, to see if there is anything in there
          * that would indicate what we need to do.
          */
  
-       switch (host_byte(SCpnt->result)) {
+       switch (host_byte(scmd->result)) {
         case DID_PASSTHROUGH:
                 /*
-                * No matter what, pass this through to the upper layer.
-                * Nuke this special code so that it looks like we are saying
-                * DID_OK.
+                * no matter what, pass this through to the upper layer.
+                * nuke this special code so that it looks like we are saying
+                * did_ok.
                  */
-               SCpnt->result &= 0xff00ffff;
+               scmd->result &= 0xff00ffff;
                 return SUCCESS;
         case DID_OK:
                 /*
-                * Looks good.  Drop through, and check the next byte.
+                * looks good.  drop through, and check the next byte.
                  */
                 break;
         case DID_NO_CONNECT:
         case DID_BAD_TARGET:
         case DID_ABORT:
                 /*
-                * Note - this means that we just report the status back to the
-                * top level driver, not that we actually think that it indicates
-                * success.
+                * note - this means that we just report the status back
+                * to the top level driver, not that we actually think
+                * that it indicates SUCCESS.
                  */
                 return SUCCESS;
                 /*
-                * When the low level driver returns DID_SOFT_ERROR,
+                * when the low level driver returns did_soft_error,
                  * it is responsible for keeping an internal retry counter 
-                * in order to avoid endless loops (DB)
+                * in order to avoid endless loops (db)
                  *
-                * Actually this is a bug in this function here.  We should
+                * actually this is a bug in this function here.  we should
                  * be mindful of the maximum number of retries specified
                  * and not get stuck in a loop.
                  */
@@ -979,896 +1288,252 @@ int scsi_decide_disposition(Scsi_Cmnd * SCpnt)
                 goto maybe_retry;
  
         case DID_ERROR:
-               if (msg_byte(SCpnt->result) == COMMAND_COMPLETE &&
-                   status_byte(SCpnt->result) == RESERVATION_CONFLICT)
+               if (msg_byte(scmd->result) == COMMAND_COMPLETE &&
+                   status_byte(scmd->result) == RESERVATION_CONFLICT)
                         /*
                          * execute reservation conflict processing code
                          * lower down
                          */
                         break;
-               /* FALLTHROUGH */
+               /* fallthrough */
  
         case DID_BUS_BUSY:
         case DID_PARITY:
                 goto maybe_retry;
         case DID_TIME_OUT:
                 /*
-                * When we scan the bus, we get timeout messages for
+                * when we scan the bus, we get timeout messages for
                  * these commands if there is no device available.
-                * Other hosts report DID_NO_CONNECT for the same thing.
-                */
-               if ((SCpnt->cmnd[0] == TEST_UNIT_READY ||
-                    SCpnt->cmnd[0] == INQUIRY)) {
-                       return SUCCESS;
-               } else {
-                       return FAILED;
-               }
-       case DID_RESET:
-               /*
-                * In the normal case where we haven't initiated a reset, this is
-                * a failure.
-                */
-               if (SCpnt->flags & IS_RESETTING) {
-                       SCpnt->flags &= ~IS_RESETTING;
-                       goto maybe_retry;
-               }
-               return SUCCESS;
-       default:
-               return FAILED;
-       }
-
-       /*
-        * Next, check the message byte.
-        */
-       if (msg_byte(SCpnt->result) != COMMAND_COMPLETE) {
-               return FAILED;
-       }
-       /*
-        * Now, check the status byte to see if this indicates anything special.
-        */
-       switch (status_byte(SCpnt->result)) {
-       case QUEUE_FULL:
-               /*
-                * The case of trying to send too many commands to a tagged queueing
-                * device.
+                * other hosts report did_no_connect for the same thing.
                  */
-               return ADD_TO_MLQUEUE;
-       case GOOD:
-       case COMMAND_TERMINATED:
-               return SUCCESS;
-       case CHECK_CONDITION:
-               rtn = scsi_check_sense(SCpnt);
-               if (rtn == NEEDS_RETRY) {
-                       goto maybe_retry;
-               }
-               return rtn;
-       case CONDITION_GOOD:
-       case INTERMEDIATE_GOOD:
-       case INTERMEDIATE_C_GOOD:
-               /*
-                * Who knows?  FIXME(eric)
-                */
-               return SUCCESS;
-       case BUSY:
-               goto maybe_retry;
-
-       case RESERVATION_CONFLICT:
-               printk("scsi%d (%d,%d,%d) : RESERVATION CONFLICT\n", 
-                      SCpnt->host->host_no, SCpnt->channel,
-                      SCpnt->device->id, SCpnt->device->lun);
-               return SUCCESS; /* causes immediate I/O error */
-       default:
-               return FAILED;
-       }
-       return FAILED;
-
-      maybe_retry:
-
-       if ((++SCpnt->retries) < SCpnt->allowed) {
-               return NEEDS_RETRY;
-       } else {
-                /*
-                 * No more retries - report this one back to upper level.
-                 */
-               return SUCCESS;
-       }
-}
-
-/*
- * Function:  scsi_eh_completed_normally
- *
- * Purpose:     Examine a command block that has come back from the low-level
- *              and figure out what to do next.
- *
- * Returns:     SUCCESS         - pass on to upper level.
- *              FAILED          - pass on to error handler thread.
- *              RETRY           - command should be retried.
- *              SOFTERR         - command succeeded, but we need to log
- *                                a soft error.
- *
- * Notes:       This is *ONLY* called when we are examining the status
- *              of commands queued during error recovery.  The main
- *              difference here is that we don't allow for the possibility
- *              of retries here, and we are a lot more restrictive about what
- *              we consider acceptable.
- */
-STATIC int scsi_eh_completed_normally(Scsi_Cmnd * SCpnt)
-{
-       int rtn;
-
-       /*
-        * First check the host byte, to see if there is anything in there
-        * that would indicate what we need to do.
-        */
-       if (host_byte(SCpnt->result) == DID_RESET) {
-               if (SCpnt->flags & IS_RESETTING) {
-                       /*
-                        * OK, this is normal.  We don't know whether in fact
-                        * the command in question really needs to be rerun
-                        * or not - if this was the original data command then
-                        * the answer is yes, otherwise we just flag it as
-                        * success.
-                        */
-                       SCpnt->flags &= ~IS_RESETTING;
-                       goto maybe_retry;
+               if ((scmd->cmnd[0] == TEST_UNIT_READY ||
+                    scmd->cmnd[0] == INQUIRY)) {
+                       return SUCCESS;
+               } else {
+                       return FAILED;
                 }
+       case DID_RESET:
                 /*
-                * Rats.  We are already in the error handler, so we now
-                * get to try and figure out what to do next.  If the sense
-                * is valid, we have a pretty good idea of what to do.
-                * If not, we mark it as failed.
+                * in the normal case where we haven't initiated a reset,
+                * this is a failure.
                  */
-               rtn = scsi_check_sense(SCpnt);
-               if (rtn == NEEDS_RETRY)
+               if (scmd->flags & IS_RESETTING) {
+                       scmd->flags &= ~IS_RESETTING;
                         goto maybe_retry;
-               return rtn;
-       }
-       if (host_byte(SCpnt->result) != DID_OK) {
+               }
+               return SUCCESS;
+       default:
                 return FAILED;
         }
+
         /*
-        * Next, check the message byte.
+        * next, check the message byte.
          */
-       if (msg_byte(SCpnt->result) != COMMAND_COMPLETE) {
+       if (msg_byte(scmd->result) != COMMAND_COMPLETE) {
                 return FAILED;
         }
         /*
-        * Now, check the status byte to see if this indicates
-        * anything special.
+        * now, check the status byte to see if this indicates anything special.
          */
-       switch (status_byte(SCpnt->result)) {
+       switch (status_byte(scmd->result)) {
+       case QUEUE_FULL:
+               /*
+                * the case of trying to send too many commands to a
+                * tagged queueing device.
+                */
+               return ADD_TO_MLQUEUE;
         case GOOD:
         case COMMAND_TERMINATED:
                 return SUCCESS;
         case CHECK_CONDITION:
-               rtn = scsi_check_sense(SCpnt);
-               if (rtn == NEEDS_RETRY)
+               rtn = scsi_check_sense(scmd);
+               if (rtn == NEEDS_RETRY) {
                         goto maybe_retry;
+               }
                 return rtn;
         case CONDITION_GOOD:
         case INTERMEDIATE_GOOD:
         case INTERMEDIATE_C_GOOD:
                 /*
-                * Who knows?  FIXME(eric)
+                * who knows?  FIXME(eric)
                  */
                 return SUCCESS;
         case BUSY:
-       case QUEUE_FULL:
+               goto maybe_retry;
+
         case RESERVATION_CONFLICT:
+               printk("scsi%d (%d,%d,%d) : reservation conflict\n", 
+                      scmd->host->host_no, scmd->channel,
+                      scmd->device->id, scmd->device->lun);
+               return SUCCESS; /* causes immediate i/o error */
         default:
                 return FAILED;
         }
         return FAILED;
  
- maybe_retry:
-       if ((++SCpnt->retries) < SCpnt->allowed) {
-               return NEEDS_RETRY;
-       } else {
-               /* No more retries - report this one back to upper level */
-               return SUCCESS;
-       }
-}
-
-/*
- * Function:  scsi_check_sense
- *
- * Purpose:     Examine sense information - give suggestion as to what
- *              we should do with it.
- */
-STATIC int scsi_check_sense(Scsi_Cmnd * SCpnt)
-{
-       if (!scsi_sense_valid(SCpnt)) {
-               return FAILED;
-       }
-       if (SCpnt->sense_buffer[2] & 0xe0)
-               return SUCCESS;
-
-       switch (SCpnt->sense_buffer[2] & 0xf) {
-       case NO_SENSE:
-               return SUCCESS;
-       case RECOVERED_ERROR:
-               return /* SOFT_ERROR */ SUCCESS;
+      maybe_retry:
  
-       case ABORTED_COMMAND:
+       if ((++scmd->retries) < scmd->allowed) {
                 return NEEDS_RETRY;
-       case NOT_READY:
-       case UNIT_ATTENTION:
-               /*
-                * If we are expecting a CC/UA because of a bus reset that we
-                * performed, treat this just as a retry.  Otherwise this is
-                * information that we should pass up to the upper-level driver
-                * so that we can deal with it there.
-                */
-               if (SCpnt->device->expecting_cc_ua) {
-                       SCpnt->device->expecting_cc_ua = 0;
-                       return NEEDS_RETRY;
-               }
+       } else {
                 /*
-                * If the device is in the process of becoming ready, we 
-                * should retry.
+                * no more retries - report this one back to upper level.
                  */
-               if ((SCpnt->sense_buffer[12] == 0x04) &&
-                       (SCpnt->sense_buffer[13] == 0x01)) {
-                       return NEEDS_RETRY;
-               }
-               return SUCCESS;
-
-               /* these three are not supported */
-       case COPY_ABORTED:
-       case VOLUME_OVERFLOW:
-       case MISCOMPARE:
-               return SUCCESS;
-
-       case MEDIUM_ERROR:
-               return NEEDS_RETRY;
-
-       case ILLEGAL_REQUEST:
-       case BLANK_CHECK:
-       case DATA_PROTECT:
-       case HARDWARE_ERROR:
-       default:
                 return SUCCESS;
         }
  }
  
-
-/*
- * Function:  scsi_restart_operations
- *
- * Purpose:     Restart IO operations to the specified host.
- *
- * Arguments:   host  - host that we are restarting
- *
- * Lock status: Assumed that locks are not held upon entry.
+/**
+ * scsi_restart_operations - restart io operations to the specified host.
+ * @shost:     Host we are restarting.
   *
- * Returns:     Nothing
- *
- * Notes:       When we entered the error handler, we blocked all further
- *              I/O to this device.  We need to 'reverse' this process.
- */
-STATIC void scsi_restart_operations(struct Scsi_Host *host)
+ * Notes:
+ *    When we entered the error handler, we blocked all further i/o to
+ *    this device.  we need to 'reverse' this process.
+ **/
+static void scsi_restart_operations(struct Scsi_Host *shost)
  {
-       Scsi_Device *SDpnt;
+       Scsi_Device *sdev;
         unsigned long flags;
  
-       ASSERT_LOCK(host->host_lock, 0);
+       ASSERT_LOCK(shost->host_lock, 0);
  
         /*
-        * Next free up anything directly waiting upon the host.  This will be
-        * requests for character device operations, and also for ioctls to queued
-        * block devices.
+        * next free up anything directly waiting upon the host.  this
+        * will be requests for character device operations, and also for
+        * ioctls to queued block devices.
          */
-       SCSI_LOG_ERROR_RECOVERY(5, printk("scsi_error.c: Waking up host to restart\n"));
+       SCSI_LOG_ERROR_RECOVERY(3, printk("%s: waking up host to restart\n",
+                                         __FUNCTION__));
  
-       wake_up(&host->host_wait);
+       wake_up(&shost->host_wait);
  
         /*
-        * Finally we need to re-initiate requests that may be pending.  We will
+        * finally we need to re-initiate requests that may be pending.  we will
          * have had everything blocked while error handling is taking place, and
          * now that error recovery is done, we will need to ensure that these
          * requests are started.
          */
-       spin_lock_irqsave(host->host_lock, flags);
-       for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
-               request_queue_t *q = &SDpnt->request_queue;
-
-               if ((host->can_queue > 0 && (host->host_busy >= host->can_queue))
-                   || (host->host_blocked)
-                   || (host->host_self_blocked)
-                   || (SDpnt->device_blocked)) {
+       spin_lock_irqsave(shost->host_lock, flags);
+       for (sdev = shost->host_queue; sdev; sdev = sdev->next) {
+               request_queue_t *q = &sdev->request_queue;
+
+               if ((shost->can_queue > 0 &&
+                    (shost->host_busy >= shost->can_queue))
+                   || (shost->host_blocked)
+                   || (shost->host_self_blocked)
+                   || (sdev->device_blocked)) {
                         break;
                 }
  
                 q->request_fn(q);
         }
-       spin_unlock_irqrestore(host->host_lock, flags);
+       spin_unlock_irqrestore(shost->host_lock, flags);
  }
  
-/*
- * Function:  scsi_unjam_host
- *
- * Purpose:     Attempt to fix a host which has a command that failed for
- *              some reason.
- *
- * Arguments:   host    - host that needs unjamming.
- * 
- * Returns:     Nothing
- *
- * Notes:       When we come in here, we *know* that all commands on the
- *              bus have either completed, failed or timed out.  We also
- *              know that no further commands are being sent to the host,
- *              so things are relatively quiet and we have freedom to
- *              fiddle with things as we wish.
+/**
+ * scsi_unjam_host - Attempt to fix a host which has a cmd that failed.
+ * @shost:     Host to unjam.
   *
- * Additional note:  This is only the *default* implementation.  It is possible
- *              for individual drivers to supply their own version of this
- *              function, and if the maintainer wishes to do this, it is
- *              strongly suggested that this function be taken as a template
- *              and modified.  This function was designed to correctly handle
- *              problems for about 95% of the different cases out there, and
- *              it should always provide at least a reasonable amount of error
- *              recovery.
- *
- * Note3:       Any command marked 'FAILED' or 'TIMEOUT' must eventually
- *              have scsi_finish_command() called for it.  We do all of
- *              the retry stuff here, so when we restart the host after we
- *              return it should have an empty queue.
- */
-STATIC int scsi_unjam_host(struct Scsi_Host *host)
+ * Notes:
+ *    When we come in here, we *know* that all commands on the bus have
+ *    either completed, failed or timed out.  we also know that no further
+ *    commands are being sent to the host, so things are relatively quiet
+ *    and we have freedom to fiddle with things as we wish.
+ *
+ *    This is only the *default* implementation.  it is possible for
+ *    individual drivers to supply their own version of this function, and
+ *    if the maintainer wishes to do this, it is strongly suggested that
+ *    this function be taken as a template and modified.  this function
+ *    was designed to correctly handle problems for about 95% of the
+ *    different cases out there, and it should always provide at least a
+ *    reasonable amount of error recovery.
+ *
+ *    Any command marked 'failed' or 'timeout' must eventually have
+ *    scsi_finish_cmd() called for it.  we do all of the retry stuff
+ *    here, so when we restart the host after we return it should have an
+ *    empty queue.
+ **/
+static void scsi_unjam_host(struct Scsi_Host *shost)
  {
-       int devices_failed;
-       int numfailed;
-       int ourrtn;
-       int rtn = FALSE;
-       int result;
-       Scsi_Cmnd *SCloop;
-       Scsi_Cmnd *SCpnt;
-       Scsi_Device *SDpnt;
-       Scsi_Device *SDloop;
-       Scsi_Cmnd *SCdone;
-       int timed_out;
-
-       ASSERT_LOCK(host->host_lock, 0);
-
-       SCdone = NULL;
-
-       /*
-        * First, protect against any sort of race condition.  If any of the outstanding
-        * commands are in states that indicate that we are not yet blocked (i.e. we are
-        * not in a quiet state) then we got woken up in error.  If we ever end up here,
-        * we need to re-examine some of the assumptions.
-        */
-       for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
-               for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) {
-                       if (SCpnt->state == SCSI_STATE_FAILED
-                           || SCpnt->state == SCSI_STATE_TIMEOUT
-                           || SCpnt->state == SCSI_STATE_INITIALIZING
-                           || SCpnt->state == SCSI_STATE_UNUSED) {
-                               continue;
-                       }
-                       /*
-                        * Rats.  Something is still floating around out there.  This could
-                        * be the result of the fact that the upper level drivers are still frobbing
-                        * commands that might have succeeded.  There are two outcomes.  One is that
-                        * the command block will eventually be freed, and the other one is that
-                        * the command will be queued and will be finished along the way.
-                        */
-                       SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler prematurely woken - commands still active (%p %x %d)\n", SCpnt, SCpnt->state, SCpnt->target));
-
-/*
- *        panic("SCSI Error handler woken too early\n");
- *
- * This is no longer a problem, since now the code cares only about
- * SCSI_STATE_TIMEOUT and SCSI_STATE_FAILED.
- * Other states are useful only to release active commands when devices are
- * set offline. If (host->host_active == host->host_busy) we can safely assume
- * that there are no commands in state other then TIMEOUT od FAILED. (DB)
- *
- * FIXME:
- * It is not easy to release correctly commands according to their state when 
- * devices are set offline, when the state is neither TIMEOUT nor FAILED.
- * When a device is set offline, we can have some command with
- * rq_status=RQ_SCSY_BUSY, owner=SCSI_STATE_HIGHLEVEL, 
- * state=SCSI_STATE_INITIALIZING and the driver module cannot be released.
- * (DB, 17 May 1998)
- */
-               }
-       }
-
-       /*
-        * Next, see if we need to request sense information.  if so,
-        * then get it now, so we have a better idea of what to do.
-        * FIXME(eric) this has the unfortunate side effect that if a host
-        * adapter does not automatically request sense information, that we end
-        * up shutting it down before we request it.  All hosts should be doing this
-        * anyways, so for now all I have to say is tough noogies if you end up in here.
-        * On second thought, this is probably a good idea.  We *really* want to give
-        * authors an incentive to automatically request this.
-        */
-       SCSI_LOG_ERROR_RECOVERY(3, printk("scsi_unjam_host: Checking to see if we need to request sense\n"));
-
-       for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
-               for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) {
-                       if (SCpnt->state != SCSI_STATE_FAILED || scsi_sense_valid(SCpnt)) {
-                               continue;
-                       }
-                       SCSI_LOG_ERROR_RECOVERY(2, printk("scsi_unjam_host: Requesting sense for %d\n",
-                                                         SCpnt->target));
-                       rtn = scsi_request_sense(SCpnt);
-                       if (rtn != SUCCESS) {
-                               continue;
-                       }
-                       SCSI_LOG_ERROR_RECOVERY(3, printk("Sense requested for %p - result %x\n",
-                                                 SCpnt, SCpnt->result));
-                       SCSI_LOG_ERROR_RECOVERY(3, print_sense("bh", SCpnt));
-
-                       result = scsi_decide_disposition(SCpnt);
-
-                       /*
-                        * If the result was normal, then just pass it along to the
-                        * upper level.
-                        */
-                       if (result == SUCCESS) {
-                               SCpnt->host->host_failed--;
-                               scsi_eh_finish_command(&SCdone, SCpnt);
-                       }
-                       if (result != NEEDS_RETRY) {
-                               continue;
-                       }
-                       /* 
-                        * We only come in here if we want to retry a
-                        * command.  The test to see whether the command
-                        * should be retried should be keeping track of the
-                        * number of tries, so we don't end up looping, of
-                        * course.  
-                        */
-                       SCpnt->state = NEEDS_RETRY;
-                       rtn = scsi_eh_retry_command(SCpnt);
-                       if (rtn != SUCCESS) {
-                               continue;
-                       }
-                       /*
-                        * We eventually hand this one back to the top level.
-                        */
-                       SCpnt->host->host_failed--;
-                       scsi_eh_finish_command(&SCdone, SCpnt);
-               }
-       }
-
-       /*
-        * Go through the list of commands and figure out where we stand and how bad things
-        * really are.
-        */
-       numfailed = 0;
-       timed_out = 0;
-       devices_failed = 0;
-       for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
-               unsigned int device_error = 0;
-
-               for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) {
-                       if (SCpnt->state == SCSI_STATE_FAILED) {
-                               SCSI_LOG_ERROR_RECOVERY(5, printk("Command to ID %d failed\n",
-                                                        SCpnt->target));
-                               numfailed++;
-                               device_error++;
-                       }
-                       if (SCpnt->state == SCSI_STATE_TIMEOUT) {
-                               SCSI_LOG_ERROR_RECOVERY(5, printk("Command to ID %d timedout\n",
-                                                        SCpnt->target));
-                               timed_out++;
-                               device_error++;
-                       }
-               }
-               if (device_error > 0) {
-                       devices_failed++;
-               }
-       }
-
-       SCSI_LOG_ERROR_RECOVERY(2, printk("Total of %d+%d commands on %d devices require eh work\n",
-                                 numfailed, timed_out, devices_failed));
-
-       if (host->host_failed == 0) {
-               ourrtn = TRUE;
-               goto leave;
-       }
-       /*
-        * Next, try and see whether or not it makes sense to try and abort
-        * the running command.  This only works out to be the case if we have
-        * one command that has timed out.  If the command simply failed, it
-        * makes no sense to try and abort the command, since as far as the
-        * host adapter is concerned, it isn't running.
-        */
-
-       SCSI_LOG_ERROR_RECOVERY(3, printk("scsi_unjam_host: Checking to see if we want to try abort\n"));
-
-       for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
-               for (SCloop = SDpnt->device_queue; SCloop; SCloop = SCloop->next) {
-                       if (SCloop->state != SCSI_STATE_TIMEOUT) {
-                               continue;
-                       }
-                       rtn = scsi_try_to_abort_command(SCloop, ABORT_TIMEOUT);
-                       if (rtn == SUCCESS) {
-                               rtn = scsi_test_unit_ready(SCloop);
-
-                               if (rtn == SUCCESS && scsi_unit_is_ready(SCloop)) {
-                                       rtn = scsi_eh_retry_command(SCloop);
-
-                                       if (rtn == SUCCESS) {
-                                               SCloop->host->host_failed--;
-                                               scsi_eh_finish_command(&SCdone, SCloop);
-                                       }
-                               }
-                       }
-               }
-       }
-
-       /*
-        * If we have corrected all of the problems, then we are done.
-        */
-       if (host->host_failed == 0) {
-               ourrtn = TRUE;
-               goto leave;
-       }
-       /*
-        * Either the abort wasn't appropriate, or it didn't succeed.
-        * Now try a bus device reset.  Still, look to see whether we have
-        * multiple devices that are jammed or not - if we have multiple devices,
-        * it makes no sense to try BUS_DEVICE_RESET - we really would need
-        * to try a BUS_RESET instead.
-        *
-        * Does this make sense - should we try BDR on each device individually?
-        * Yes, definitely.
-        */
-       SCSI_LOG_ERROR_RECOVERY(3, printk("scsi_unjam_host: Checking to see if we want to try BDR\n"));
-
-       for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
-               for (SCloop = SDpnt->device_queue; SCloop; SCloop = SCloop->next) {
-                       if (SCloop->state == SCSI_STATE_FAILED
-                           || SCloop->state == SCSI_STATE_TIMEOUT) {
-                               break;
-                       }
-               }
-
-               if (SCloop == NULL) {
-                       continue;
-               }
-               /*
-                * OK, we have a device that is having problems.  Try and send
-                * a bus device reset to it.
-                *
-                * FIXME(eric) - make sure we handle the case where multiple
-                * commands to the same device have failed. They all must
-                * get properly restarted.
-                */
-               rtn = scsi_try_bus_device_reset(SCloop, RESET_TIMEOUT);
-
-               if (rtn == SUCCESS) {
-                       rtn = scsi_test_unit_ready(SCloop);
-
-                       if (rtn == SUCCESS && scsi_unit_is_ready(SCloop)) {
-                               rtn = scsi_eh_retry_command(SCloop);
-
-                               if (rtn == SUCCESS) {
-                                       SCloop->host->host_failed--;
-                                       scsi_eh_finish_command(&SCdone, SCloop);
-                               }
-                       }
-               }
-       }
-
-       if (host->host_failed == 0) {
-               ourrtn = TRUE;
-               goto leave;
-       }
-       /*
-        * If we ended up here, we have serious problems.  The only thing left
-        * to try is a full bus reset.  If someone has grabbed the bus and isn't
-        * letting go, then perhaps this will help.
-        */
-       SCSI_LOG_ERROR_RECOVERY(3, printk("scsi_unjam_host: Try hard bus reset\n"));
-
-       /* 
-        * We really want to loop over the various channels, and do this on
-        * a channel by channel basis.  We should also check to see if any
-        * of the failed commands are on soft_reset devices, and if so, skip
-        * the reset.  
-        */
-       for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
-             next_device:
-               for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) {
-                       if (SCpnt->state != SCSI_STATE_FAILED
-                           && SCpnt->state != SCSI_STATE_TIMEOUT) {
-                               continue;
-                       }
-                       /*
-                        * We have a failed command.  Make sure there are no other failed
-                        * commands on the same channel that are timed out and implement a
-                        * soft reset.
-                        */
-                       for (SDloop = host->host_queue; SDloop; SDloop = SDloop->next) {
-                               for (SCloop = SDloop->device_queue; SCloop; SCloop = SCloop->next) {
-                                       if (SCloop->channel != SCpnt->channel) {
-                                               continue;
-                                       }
-                                       if (SCloop->state != SCSI_STATE_FAILED
-                                           && SCloop->state != SCSI_STATE_TIMEOUT) {
-                                               continue;
-                                       }
-                                       if (SDloop->soft_reset && SCloop->state == SCSI_STATE_TIMEOUT) {
-                                               /* 
-                                                * If this device uses the soft reset option, and this
-                                                * is one of the devices acting up, then our only
-                                                * option is to wait a bit, since the command is
-                                                * supposedly still running.  
-                                                *
-                                                * FIXME(eric) - right now we will just end up falling
-                                                * through to the 'take device offline' case.
-                                                *
-                                                * FIXME(eric) - It is possible that the command completed
-                                                * *after* the error recovery procedure started, and if this
-                                                * is the case, we are worrying about nothing here.
-                                                */
-
-                                               scsi_sleep(1 * HZ);
-                                               goto next_device;
-                                       }
-                               }
-                       }
-
-                       /*
-                        * We now know that we are able to perform a reset for the
-                        * bus that SCpnt points to.  There are no soft-reset devices
-                        * with outstanding timed out commands.
-                        */
-                       rtn = scsi_try_bus_reset(SCpnt);
-                       if (rtn == SUCCESS) {
-                               for (SDloop = host->host_queue; SDloop; SDloop = SDloop->next) {
-                                       for (SCloop = SDloop->device_queue; SCloop; SCloop = SCloop->next) {
-                                               if (SCloop->channel != SCpnt->channel) {
-                                                       continue;
-                                               }
-                                               if (SCloop->state != SCSI_STATE_FAILED
-                                                   && SCloop->state != SCSI_STATE_TIMEOUT) {
-                                                       continue;
-                                               }
-                                               rtn = scsi_test_unit_ready(SCloop);
-
-                                               if (rtn == SUCCESS && scsi_unit_is_ready(SCloop)) {
-                                                       rtn = scsi_eh_retry_command(SCloop);
-
-                                                       if (rtn == SUCCESS) {
-                                                               SCpnt->host->host_failed--;
-                                                               scsi_eh_finish_command(&SCdone, SCloop);
-                                                       }
-                                               }
-                                               /*
-                                                * If the bus reset worked, but we are still unable to
-                                                * talk to the device, take it offline.
-                                                * FIXME(eric) - is this really the correct thing to do?
-                                                */
-                                               if (rtn != SUCCESS) {
-                                                       printk(KERN_INFO "scsi: device set offline - not ready or command retry failed after bus reset: host %d channel %d id %d lun %d\n", SDloop->host->host_no, SDloop->channel, SDloop->id, SDloop->lun);
-
-                                                       SDloop->online = FALSE;
-                                                       SDloop->host->host_failed--;
-                                                       scsi_eh_finish_command(&SCdone, SCloop);
-                                               }
-                                       }
-                               }
-                       }
-               }
-       }
-
-       if (host->host_failed == 0) {
-               ourrtn = TRUE;
-               goto leave;
-       }
-       /*
-        * If we ended up here, we have serious problems.  The only thing left
-        * to try is a full host reset - perhaps the firmware on the device
-        * crashed, or something like that.
-        *
-        * It is assumed that a succesful host reset will cause *all* information
-        * about the command to be flushed from both the host adapter *and* the
-        * device.
-        *
-        * FIXME(eric) - it isn't clear that devices that implement the soft reset
-        * option can ever be cleared except via cycling the power.  The problem is
-        * that sending the host reset command will cause the host to forget
-        * about the pending command, but the device won't forget.  For now, we
-        * skip the host reset option if any of the failed devices are configured
-        * to use the soft reset option.
-        */
-       for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
-             next_device2:
-               for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) {
-                       if (SCpnt->state != SCSI_STATE_FAILED
-                           && SCpnt->state != SCSI_STATE_TIMEOUT) {
-                               continue;
-                       }
-                       if (SDpnt->soft_reset && SCpnt->state == SCSI_STATE_TIMEOUT) {
-                               /* 
-                                * If this device uses the soft reset option, and this
-                                * is one of the devices acting up, then our only
-                                * option is to wait a bit, since the command is
-                                * supposedly still running.  
-                                *
-                                * FIXME(eric) - right now we will just end up falling
-                                * through to the 'take device offline' case.
-                                */
-                               SCSI_LOG_ERROR_RECOVERY(3,
-                                                       printk("scsi_unjam_host: Unable to try hard host reset\n"));
-
-                               /*
-                                * Due to the spinlock, we will never get out of this
-                                * loop without a proper wait. (DB)
-                                */
-                               scsi_sleep(1 * HZ);
-
-                               goto next_device2;
-                       }
-                       SCSI_LOG_ERROR_RECOVERY(3, printk("scsi_unjam_host: Try hard host reset\n"));
-
-                       /*
-                        * FIXME(eric) - we need to obtain a valid SCpnt to perform this call.
-                        */
-                       rtn = scsi_try_host_reset(SCpnt);
-                       if (rtn == SUCCESS) {
-                               /*
-                                * FIXME(eric) we assume that all commands are flushed from the
-                                * controller.  We should get a DID_RESET for all of the commands
-                                * that were pending.  We should ignore these so that we can
-                                * guarantee that we are in a consistent state.
-                                *
-                                * I believe this to be the case right now, but this needs to be
-                                * tested.
-                                */
-                               for (SDloop = host->host_queue; SDloop; SDloop = SDloop->next) {
-                                       for (SCloop = SDloop->device_queue; SCloop; SCloop = SCloop->next) {
-                                               if (SCloop->state != SCSI_STATE_FAILED
-                                                   && SCloop->state != SCSI_STATE_TIMEOUT) {
-                                                       continue;
-                                               }
-                                               rtn = scsi_test_unit_ready(SCloop);
-
-                                               if (rtn == SUCCESS && scsi_unit_is_ready(SCloop)) {
-                                                       rtn = scsi_eh_retry_command(SCloop);
-
-                                                       if (rtn == SUCCESS) {
-                                                               SCpnt->host->host_failed--;
-                                                               scsi_eh_finish_command(&SCdone, SCloop);
-                                                       }
-                                               }
-                                               if (rtn != SUCCESS) {
-                                                       printk(KERN_INFO "scsi: device set offline - not ready or command retry failed after host reset: host %d channel %d id %d lun %d\n", SDloop->host->host_no, SDloop->channel, SDloop->id, SDloop->lun);
-                                                       SDloop->online = FALSE;
-                                                       SDloop->host->host_failed--;
-                                                       scsi_eh_finish_command(&SCdone, SCloop);
-                                               }
-                                       }
-                               }
-                       }
-               }
-       }
+       Scsi_Cmnd *sc_todo = NULL;
+       Scsi_Cmnd *scmd;
  
         /*
-        * If we solved all of the problems, then let's rev up the engines again.
-        */
-       if (host->host_failed == 0) {
-               ourrtn = TRUE;
-               goto leave;
-       }
-       /*
-        * If the HOST RESET failed, then for now we assume that the entire host
-        * adapter is too hosed to be of any use.  For our purposes, however, it is
-        * easier to simply take the devices offline that correspond to commands
-        * that failed.
+        * Is this assert really ok anymore (andmike). Should we at least
+        * be using spin_lock_unlocked.
          */
-       SCSI_LOG_ERROR_RECOVERY(1, printk("scsi_unjam_host: Take device offline\n"));
-
-       for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
-               for (SCloop = SDpnt->device_queue; SCloop; SCloop = SCloop->next) {
-                       if (SCloop->state == SCSI_STATE_FAILED || SCloop->state == SCSI_STATE_TIMEOUT) {
-                               SDloop = SCloop->device;
-                               if (SDloop->online == TRUE) {
-                                       printk(KERN_INFO "scsi: device set offline - command error recover failed: host %d channel %d id %d lun %d\n", SDloop->host->host_no, SDloop->channel, SDloop->id, SDloop->lun);
-                                       SDloop->online = FALSE;
-                               }
-
-                               /*
-                                * This should pass the failure up to the top level driver, and
-                                * it will have to try and do something intelligent with it.
-                                */
-                               SCloop->host->host_failed--;
-
-                               if (SCloop->state == SCSI_STATE_TIMEOUT) {
-                                       SCloop->result |= (DRIVER_TIMEOUT << 24);
-                               }
-                               SCSI_LOG_ERROR_RECOVERY(3, printk("Finishing command for device %d %x\n",
-                                   SDloop->id, SCloop->result));
+       ASSERT_LOCK(shost->host_lock, 0);
  
-                               scsi_eh_finish_command(&SCdone, SCloop);
-                       }
-               }
-       }
+       scsi_eh_get_failed(&sc_todo, shost);
  
-       if (host->host_failed != 0) {
-               panic("scsi_unjam_host: Miscount of number of failed commands.\n");
-       }
-       SCSI_LOG_ERROR_RECOVERY(3, printk("scsi_unjam_host: Returning\n"));
+       if (scsi_eh_get_sense(sc_todo, shost))
+               if (scsi_eh_abort_cmd(sc_todo, shost))
+                       if (scsi_eh_bus_device_reset(sc_todo, shost))
+                               if(scsi_eh_bus_host_reset(sc_todo, shost))
+                                       scsi_eh_offline_sdevs(sc_todo, shost);
  
-       ourrtn = FALSE;
+       BUG_ON(shost->host_failed);
  
-      leave:
  
         /*
-        * We should have a list of commands that we 'finished' during the course of
-        * error recovery.  This should be the same as the list of commands that timed out
-        * or failed.  We are currently holding these things in a linked list - we didn't
-        * put them in the bottom half queue because we wanted to keep things quiet while
-        * we were working on recovery, and passing them up to the top level could easily
-        * cause the top level to try and queue something else again.
+        * We are currently holding these things in a linked list - we
+        * didn't put them in the bottom half queue because we wanted to
+        * keep things quiet while we were working on recovery, and
+        * passing them up to the top level could easily cause the top
+        * level to try and queue something else again.
          *
-        * Start by marking that the host is no longer in error recovery.
+        * start by marking that the host is no longer in error recovery.
          */
-       host->in_recovery = 0;
+       shost->in_recovery = 0;
  
         /*
-        * Take the list of commands, and stick them in the bottom half queue.
-        * The current implementation of scsi_done will do this for us - if need
+        * take the list of commands, and stick them in the bottom half queue.
+        * the current implementation of scsi_done will do this for us - if need
          * be we can create a special version of this function to do the
          * same job for us.
          */
-       for (SCpnt = SCdone; SCpnt != NULL; SCpnt = SCdone) {
-               SCdone = SCpnt->bh_next;
-               SCpnt->bh_next = NULL;
-                /*
-                 * Oh, this is a vile hack.  scsi_done() expects a timer
-                 * to be running on the command.  If there isn't, it assumes
-                 * that the command has actually timed out, and a timer
-                 * handler is running.  That may well be how we got into
-                 * this fix, but right now things are stable.  We add
-                 * a timer back again so that we can report completion.
-                 * scsi_done() will immediately remove said timer from
-                 * the command, and then process it.
-                 */
-               scsi_add_timer(SCpnt, 100, scsi_eh_times_out);
-               scsi_done(SCpnt);
+       for (scmd = sc_todo; scmd; scmd = sc_todo) {
+               sc_todo = scmd->bh_next;
+               scmd->bh_next = NULL;
+               /*
+                * Oh, this is a vile hack.  scsi_done() expects a timer
+                * to be running on the command.  If there isn't, it assumes
+                * that the command has actually timed out, and a timer
+                * handler is running.  That may well be how we got into
+                * this fix, but right now things are stable.  We add
+                * a timer back again so that we can report completion.
+                * scsi_done() will immediately remove said timer from
+                * the command, and then process it.
+                */
+               scsi_add_timer(scmd, 100, scsi_eh_times_out);
+               scsi_done(scmd);
         }
  
-       return (ourrtn);
  }
  
-
-/*
- * Function:  scsi_error_handler
- *
- * Purpose:     Handle errors/timeouts of scsi commands, try and clean up
- *              and unjam the bus, and restart things.
- *
- * Arguments:   host    - host for which we are running.
- *
- * Returns:     Never returns.
- *
- * Notes:       This is always run in the context of a kernel thread.  The
- *              idea is that we start this thing up when the kernel starts
- *              up (one per host that we detect), and it immediately goes to
- *              sleep and waits for some event (i.e. failure).  When this
- *              takes place, we have the job of trying to unjam the bus
- *              and restarting things.
+/**
+ * scsi_error_handler - Handle errors/timeouts of SCSI cmds.
+ * @data:      Host for which we are running.
   *
- */
+ * Notes:
+ *    This is always run in the context of a kernel thread.  The idea is
+ *    that we start this thing up when the kernel starts up (one per host
+ *    that we detect), and it immediately goes to sleep and waits for some
+ *    event (i.e. failure).  When this takes place, we have the job of
+ *    trying to unjam the bus and restarting things.
+ **/
  void scsi_error_handler(void *data)
  {
-       struct Scsi_Host *host = (struct Scsi_Host *) data;
+       struct Scsi_Host *shost = (struct Scsi_Host *) data;
         int rtn;
         DECLARE_MUTEX_LOCKED(sem);
  
-        /*
-         * We only listen to signals if the HA was loaded as a module.
-         * If the HA was compiled into the kernel, then we don't listen
-         * to any signals.
-         */
+       /*
+        * We only listen to signals if the HA was loaded as a module.
+        * If the HA was compiled into the kernel, then we don't listen
+        * to any signals.
+        */
         siginitsetinv(&current->blocked, SHUTDOWN_SIGS);
  
         lock_kernel();
@@ -1883,19 +1548,20 @@ void scsi_error_handler(void *data)
          * Set the name of this process.
          */
  
-       sprintf(current->comm, "scsi_eh_%d", host->host_no);
+       sprintf(current->comm, "scsi_eh_%d", shost->host_no);
  
-       host->eh_wait = &sem;
-       host->ehandler = current;
+       shost->eh_wait = &sem;
+       shost->ehandler = current;
  
         unlock_kernel();
  
         /*
          * Wake up the thread that created us.
          */
-       SCSI_LOG_ERROR_RECOVERY(3, printk("Wake up parent %d\n", host->eh_notify->count.counter));
+       SCSI_LOG_ERROR_RECOVERY(3, printk("Wake up parent %d\n",
+                                         shost->eh_notify->count.counter));
  
-       up(host->eh_notify);
+       up(shost->eh_notify);
  
         while (1) {
                 /*
@@ -1920,20 +1586,20 @@ void scsi_error_handler(void *data)
  
                 SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler waking up\n"));
  
-               host->eh_active = 1;
+               shost->eh_active = 1;
  
                 /*
                  * We have a host that is failing for some reason.  Figure out
                  * what we need to do to get it up and online again (if we can).
                  * If we fail, we end up taking the thing offline.
                  */
-               if (host->hostt->eh_strategy_handler != NULL) {
-                       rtn = host->hostt->eh_strategy_handler(host);
+               if (shost->hostt->eh_strategy_handler != NULL) {
+                       rtn = shost->hostt->eh_strategy_handler(shost);
                 } else {
-                       rtn = scsi_unjam_host(host);
+                       scsi_unjam_host(shost);
                 }
  
-               host->eh_active = 0;
+               shost->eh_active = 0;
  
                 /*
                  * Note - if the above fails completely, the action is to take
@@ -1942,7 +1608,7 @@ void scsi_error_handler(void *data)
                  * restart, we restart any I/O to any other devices on the bus
                  * which are still online.
                  */
-               scsi_restart_operations(host);
+               scsi_restart_operations(shost);
  
         }
  
@@ -1951,7 +1617,7 @@ void scsi_error_handler(void *data)
         /*
          * Make sure that nobody tries to wake us up again.
          */
-       host->eh_wait = NULL;
+       shost->eh_wait = NULL;
  
         /*
          * Knock this down too.  From this point on, the host is flying
@@ -1959,9 +1625,9 @@ void scsi_error_handler(void *data)
          * that's fine.  If the user sent a signal to this thing, we are
          * potentially in real danger.
          */
-       host->in_recovery = 0;
-       host->eh_active = 0;
-       host->ehandler = NULL;
+       shost->in_recovery = 0;
+       shost->eh_active = 0;
+       shost->ehandler = NULL;
  
         /*
          * If anyone is waiting for us to exit (i.e. someone trying to unload
@@ -1971,41 +1637,39 @@ void scsi_error_handler(void *data)
          * the error handling thread wakes up that it would just exit without
          * needing to touch any memory associated with the driver itself.
          */
-       if (host->eh_notify != NULL)
-               up(host->eh_notify);
+       if (shost->eh_notify != NULL)
+               up(shost->eh_notify);
  }
  
-/*
- * Function:   scsi_new_reset
- *
- * Purpose:    Send requested reset to a bus or device at any phase.
+/**
+ * scsi_new_reset - Send reset to a bus or device at any phase.
+ * @scmd:      Cmd to send reset with (usually a dummy)
+ * @flag:      Reset type.
   *
- * Arguments:  SCpnt   - command ptr to send reset with (usually a dummy)
- *             flag - reset type (see scsi.h)
+ * Description:
+ *    This is used by the SCSI Generic driver to provide Bus/Device reset
+ *    capability.
   *
- * Returns:    SUCCESS/FAILURE.
- *
- * Notes:      This is used by the SCSI Generic driver to provide
- *             Bus/Device reset capability.
- */
-int
-scsi_new_reset(Scsi_Cmnd *SCpnt, int flag)
+ * Return value:
+ *    SUCCESS/FAILED.
+ **/
+int scsi_new_reset(Scsi_Cmnd *scmd, int flag)
  {
         int rtn;
  
         switch(flag) {
         case SCSI_TRY_RESET_DEVICE:
-               rtn = scsi_try_bus_device_reset(SCpnt, 0);
+               rtn = scsi_try_bus_device_reset(scmd);
                 if (rtn == SUCCESS)
                         break;
                 /* FALLTHROUGH */
         case SCSI_TRY_RESET_BUS:
-               rtn = scsi_try_bus_reset(SCpnt);
+               rtn = scsi_try_bus_reset(scmd);
                 if (rtn == SUCCESS)
                         break;
                 /* FALLTHROUGH */
         case SCSI_TRY_RESET_HOST:
-               rtn = scsi_try_host_reset(SCpnt);
+               rtn = scsi_try_host_reset(scmd);
                 break;
         default:
                 rtn = FAILED;
author	Mike Anderson <andmike@us.ibm.com>
	Mon, 30 Sep 2002 13:43:45 +0000 (09:43 -0400)
committer	James Bottomley <jejb@mulgrave.(none)>
	Mon, 30 Sep 2002 13:43:45 +0000 (09:43 -0400)
drivers/scsi/hosts.c		patch \| blob \| history
drivers/scsi/hosts.h		patch \| blob \| history
drivers/scsi/scsi.c		patch \| blob \| history
drivers/scsi/scsi.h		patch \| blob \| history
drivers/scsi/scsi_error.c		patch \| blob \| history