]> git.hungrycats.org Git - linux/commitdiff
net/mlx5: Cancel delayed recovery work when unloading the driver
authorMohamad Haj Yahia <mohamad@mellanox.com>
Thu, 30 Mar 2017 14:09:00 +0000 (17:09 +0300)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 21 Jul 2017 05:42:18 +0000 (07:42 +0200)
commit 2a0165a034ac024b60cca49c61e46f4afa2e4d98 upstream.

Draining the health workqueue will ignore future health works including
the one that report hardware failure and thus we can't enter error state
Instead cancel the recovery flow and make sure only recovery flow won't
be scheduled.

Fixes: 5e44fca50470 ('net/mlx5: Only cancel recovery work when cleaning up device')
Signed-off-by: Mohamad Haj Yahia <mohamad@mellanox.com>
Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
drivers/net/ethernet/mellanox/mlx5/core/health.c
drivers/net/ethernet/mellanox/mlx5/core/main.c
include/linux/mlx5/driver.h

index 2115c8aacc5b3a6a16f4e3431be1482e7bbe154f..8beecd615a219c2aefd00703a224213b182910b5 100644 (file)
@@ -67,6 +67,7 @@ enum {
 
 enum {
        MLX5_DROP_NEW_HEALTH_WORK,
+       MLX5_DROP_NEW_RECOVERY_WORK,
 };
 
 static u8 get_nic_state(struct mlx5_core_dev *dev)
@@ -193,7 +194,7 @@ static void health_care(struct work_struct *work)
        mlx5_handle_bad_state(dev);
 
        spin_lock(&health->wq_lock);
-       if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags))
+       if (!test_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags))
                schedule_delayed_work(&health->recover_work, recover_delay);
        else
                dev_err(&dev->pdev->dev,
@@ -328,6 +329,7 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev)
        init_timer(&health->timer);
        health->sick = 0;
        clear_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
+       clear_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
        health->health = &dev->iseg->health;
        health->health_counter = &dev->iseg->health_counter;
 
@@ -350,11 +352,22 @@ void mlx5_drain_health_wq(struct mlx5_core_dev *dev)
 
        spin_lock(&health->wq_lock);
        set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
+       set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
        spin_unlock(&health->wq_lock);
        cancel_delayed_work_sync(&health->recover_work);
        cancel_work_sync(&health->work);
 }
 
+void mlx5_drain_health_recovery(struct mlx5_core_dev *dev)
+{
+       struct mlx5_core_health *health = &dev->priv.health;
+
+       spin_lock(&health->wq_lock);
+       set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
+       spin_unlock(&health->wq_lock);
+       cancel_delayed_work_sync(&dev->priv.health.recover_work);
+}
+
 void mlx5_health_cleanup(struct mlx5_core_dev *dev)
 {
        struct mlx5_core_health *health = &dev->priv.health;
index 5bea0bf2b48495df6845fd7f0035757cd8d844f0..b3309f2ed7dc435003fd3e448335acb0c615f620 100644 (file)
@@ -1169,7 +1169,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
        int err = 0;
 
        if (cleanup)
-               mlx5_drain_health_wq(dev);
+               mlx5_drain_health_recovery(dev);
 
        mutex_lock(&dev->intf_state_mutex);
        if (test_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state)) {
index e1a903a5bb3ecfab8caeff3fe71b493a30a4fec6..6a620e01b04090de4a969d8f4c205add0d9cb71d 100644 (file)
@@ -788,6 +788,7 @@ int mlx5_health_init(struct mlx5_core_dev *dev);
 void mlx5_start_health_poll(struct mlx5_core_dev *dev);
 void mlx5_stop_health_poll(struct mlx5_core_dev *dev);
 void mlx5_drain_health_wq(struct mlx5_core_dev *dev);
+void mlx5_drain_health_recovery(struct mlx5_core_dev *dev);
 int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size,
                        struct mlx5_buf *buf, int node);
 int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf);