]> git.hungrycats.org Git - linux/commitdiff
scsi: sd: Fix TCG OPAL unlock on system resume
authorDamien Le Moal <dlemoal@kernel.org>
Tue, 19 Mar 2024 07:12:09 +0000 (16:12 +0900)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 3 Apr 2024 13:11:54 +0000 (15:11 +0200)
commit 0c76106cb97548810214def8ee22700bbbb90543 upstream.

Commit 3cc2ffe5c16d ("scsi: sd: Differentiate system and runtime start/stop
management") introduced the manage_system_start_stop scsi_device flag to
allow libata to indicate to the SCSI disk driver that nothing should be
done when resuming a disk on system resume. This change turned the
execution of sd_resume() into a no-op for ATA devices on system
resume. While this solved deadlock issues during device resume, this change
also wrongly removed the execution of opal_unlock_from_suspend().  As a
result, devices with TCG OPAL locking enabled remain locked and
inaccessible after a system resume from sleep.

To fix this issue, introduce the SCSI driver resume method and implement it
with the sd_resume() function calling opal_unlock_from_suspend(). The
former sd_resume() function is renamed to sd_resume_common() and modified
to call the new sd_resume() function. For non-ATA devices, this result in
no functional changes.

In order for libata to explicitly execute sd_resume() when a device is
resumed during system restart, the function scsi_resume_device() is
introduced. libata calls this function from the revalidation work executed
on devie resume, a state that is indicated with the new device flag
ATA_DFLAG_RESUMING. Doing so, locked TCG OPAL enabled devices are unlocked
on resume, allowing normal operation.

Fixes: 3cc2ffe5c16d ("scsi: sd: Differentiate system and runtime start/stop management")
Link: https://bugzilla.kernel.org/show_bug.cgi?id=218538
Cc: stable@vger.kernel.org
Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Link: https://lore.kernel.org/r/20240319071209.1179257-1-dlemoal@kernel.org
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
drivers/ata/libata-eh.c
drivers/ata/libata-scsi.c
drivers/scsi/scsi_scan.c
drivers/scsi/sd.c
include/linux/libata.h
include/scsi/scsi_driver.h
include/scsi/scsi_host.h

index b0d6e69c4a5b2e0fd66c19804aed7c9c46ab9c21..214b935c2ced796fbcb7bbf70a8b8bee27fd9e3c 100644 (file)
@@ -712,8 +712,10 @@ void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap)
                                ehc->saved_ncq_enabled |= 1 << devno;
 
                        /* If we are resuming, wake up the device */
-                       if (ap->pflags & ATA_PFLAG_RESUMING)
+                       if (ap->pflags & ATA_PFLAG_RESUMING) {
+                               dev->flags |= ATA_DFLAG_RESUMING;
                                ehc->i.dev_action[devno] |= ATA_EH_SET_ACTIVE;
+                       }
                }
        }
 
@@ -3169,6 +3171,7 @@ static int ata_eh_revalidate_and_attach(struct ata_link *link,
        return 0;
 
  err:
+       dev->flags &= ~ATA_DFLAG_RESUMING;
        *r_failed_dev = dev;
        return rc;
 }
index 0a0f483124c3a56af47abb498e67193236416b18..2f4c58837641077f3ad91974cd11affbe6dcd1e8 100644 (file)
@@ -4730,6 +4730,7 @@ void ata_scsi_dev_rescan(struct work_struct *work)
        struct ata_link *link;
        struct ata_device *dev;
        unsigned long flags;
+       bool do_resume;
        int ret = 0;
 
        mutex_lock(&ap->scsi_scan_mutex);
@@ -4751,7 +4752,15 @@ void ata_scsi_dev_rescan(struct work_struct *work)
                        if (scsi_device_get(sdev))
                                continue;
 
+                       do_resume = dev->flags & ATA_DFLAG_RESUMING;
+
                        spin_unlock_irqrestore(ap->lock, flags);
+                       if (do_resume) {
+                               ret = scsi_resume_device(sdev);
+                               if (ret == -EWOULDBLOCK)
+                                       goto unlock;
+                               dev->flags &= ~ATA_DFLAG_RESUMING;
+                       }
                        ret = scsi_rescan_device(sdev);
                        scsi_device_put(sdev);
                        spin_lock_irqsave(ap->lock, flags);
index 44680f65ea1455daec91d4c6dd98d2cb2431f4e2..ca99be7341d9be49fd19e8a5103ece997cac5d91 100644 (file)
@@ -1619,6 +1619,40 @@ int scsi_add_device(struct Scsi_Host *host, uint channel,
 }
 EXPORT_SYMBOL(scsi_add_device);
 
+int scsi_resume_device(struct scsi_device *sdev)
+{
+       struct device *dev = &sdev->sdev_gendev;
+       int ret = 0;
+
+       device_lock(dev);
+
+       /*
+        * Bail out if the device or its queue are not running. Otherwise,
+        * the rescan may block waiting for commands to be executed, with us
+        * holding the device lock. This can result in a potential deadlock
+        * in the power management core code when system resume is on-going.
+        */
+       if (sdev->sdev_state != SDEV_RUNNING ||
+           blk_queue_pm_only(sdev->request_queue)) {
+               ret = -EWOULDBLOCK;
+               goto unlock;
+       }
+
+       if (dev->driver && try_module_get(dev->driver->owner)) {
+               struct scsi_driver *drv = to_scsi_driver(dev->driver);
+
+               if (drv->resume)
+                       ret = drv->resume(dev);
+               module_put(dev->driver->owner);
+       }
+
+unlock:
+       device_unlock(dev);
+
+       return ret;
+}
+EXPORT_SYMBOL(scsi_resume_device);
+
 int scsi_rescan_device(struct scsi_device *sdev)
 {
        struct device *dev = &sdev->sdev_gendev;
index a12ff43ac8fd0e4d9257ca10e43852d630dab6d2..f76dbeade0c4e71c22eb3e725c420804add809e5 100644 (file)
@@ -3948,7 +3948,21 @@ static int sd_suspend_runtime(struct device *dev)
        return sd_suspend_common(dev, true);
 }
 
-static int sd_resume(struct device *dev, bool runtime)
+static int sd_resume(struct device *dev)
+{
+       struct scsi_disk *sdkp = dev_get_drvdata(dev);
+
+       sd_printk(KERN_NOTICE, sdkp, "Starting disk\n");
+
+       if (opal_unlock_from_suspend(sdkp->opal_dev)) {
+               sd_printk(KERN_NOTICE, sdkp, "OPAL unlock failed\n");
+               return -EIO;
+       }
+
+       return 0;
+}
+
+static int sd_resume_common(struct device *dev, bool runtime)
 {
        struct scsi_disk *sdkp = dev_get_drvdata(dev);
        int ret;
@@ -3964,7 +3978,7 @@ static int sd_resume(struct device *dev, bool runtime)
        sd_printk(KERN_NOTICE, sdkp, "Starting disk\n");
        ret = sd_start_stop_device(sdkp, 1);
        if (!ret) {
-               opal_unlock_from_suspend(sdkp->opal_dev);
+               sd_resume(dev);
                sdkp->suspended = false;
        }
 
@@ -3983,7 +3997,7 @@ static int sd_resume_system(struct device *dev)
                return 0;
        }
 
-       return sd_resume(dev, false);
+       return sd_resume_common(dev, false);
 }
 
 static int sd_resume_runtime(struct device *dev)
@@ -4010,7 +4024,7 @@ static int sd_resume_runtime(struct device *dev)
                                  "Failed to clear sense data\n");
        }
 
-       return sd_resume(dev, true);
+       return sd_resume_common(dev, true);
 }
 
 static const struct dev_pm_ops sd_pm_ops = {
@@ -4033,6 +4047,7 @@ static struct scsi_driver sd_template = {
                .pm             = &sd_pm_ops,
        },
        .rescan                 = sd_rescan,
+       .resume                 = sd_resume,
        .init_command           = sd_init_command,
        .uninit_command         = sd_uninit_command,
        .done                   = sd_done,
index 1dbb14daccfaf326af0c54c89ff61afb50e07982..1d2afcd47523d45410aaa865f45af86f7ca9bacd 100644 (file)
@@ -107,6 +107,7 @@ enum {
 
        ATA_DFLAG_NCQ_PRIO_ENABLED = (1 << 20), /* Priority cmds sent to dev */
        ATA_DFLAG_CDL_ENABLED   = (1 << 21), /* cmd duration limits is enabled */
+       ATA_DFLAG_RESUMING      = (1 << 22),  /* Device is resuming */
        ATA_DFLAG_DETACH        = (1 << 24),
        ATA_DFLAG_DETACHED      = (1 << 25),
        ATA_DFLAG_DA            = (1 << 26), /* device supports Device Attention */
index 4ce1988b2ba01c6a92a09f998346ea4c0ea29b19..f40915d2eceef44b46bb209270a878906255c998 100644 (file)
@@ -12,6 +12,7 @@ struct request;
 struct scsi_driver {
        struct device_driver    gendrv;
 
+       int (*resume)(struct device *);
        void (*rescan)(struct device *);
        blk_status_t (*init_command)(struct scsi_cmnd *);
        void (*uninit_command)(struct scsi_cmnd *);
index 3b907fc2ef08fe7e668b92d60236e11ece0c6df5..510f594b0636898c17c86933044b5fd67c03e733 100644 (file)
@@ -767,6 +767,7 @@ scsi_template_proc_dir(const struct scsi_host_template *sht);
 #define scsi_template_proc_dir(sht) NULL
 #endif
 extern void scsi_scan_host(struct Scsi_Host *);
+extern int scsi_resume_device(struct scsi_device *sdev);
 extern int scsi_rescan_device(struct scsi_device *sdev);
 extern void scsi_remove_host(struct Scsi_Host *);
 extern struct Scsi_Host *scsi_host_get(struct Scsi_Host *);