]> git.hungrycats.org Git - linux/commitdiff
zygo: btrfs: don't let btrfs_recover_relocation get stuck waiting for cleaner_kthread...
authorZygo Blaxell <zblaxell@thirteen.furryterror.org>
Sun, 1 May 2016 04:03:14 +0000 (00:03 -0400)
committerZygo Blaxell <zblaxell@thirteen.furryterror.org>
Sun, 1 May 2016 04:03:14 +0000 (00:03 -0400)
mounting a filesystem with large deleted snapshots gets stuck at
open_ctree + 0x212b, which is the mutex_lock here:

mutex_lock(&fs_info->cleaner_mutex);
ret = btrfs_recover_relocation(tree_root);
mutex_unlock(&fs_info->cleaner_mutex);

The cleaner_kthread is holding cleaner_mutex for the entire time
it deletes one snapshot, which blocks the mount from proceeding
to completion.

It also seems insane to run cleaner_kthread *before* we've recovered
relocation, but I don't know precisely what btrfs_recover_relocation
does.

I also don't know if there are ordering dependencies between
cleaner_kthread and transaction_kthread; however, I do know that
one can wait indefinitely for the other.  I keep the same startup
order but make sure that btrfs_recover_relocation wins the race
for cleaner_mutex by cheating.

fs/btrfs/disk-io.c

index 07c1ad6444d5cdf16e78a7beae02b05db79c0c8b..af5ea1dd8f48aeed10129e3c2ab52291549fb33b 100644 (file)
@@ -2927,6 +2927,10 @@ retry_root_backup:
                        "too many missing devices, writeable mount should not be allowed\n");
        }
 
+       /* Hold the cleaner_mutex thread here so that we don't block
+        * on btrfs_recover_relocation later on.  cleaner_kthread
+        * blocks on us instead. */
+       mutex_lock(&fs_info->cleaner_mutex);
        fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
                                               "btrfs-cleaner");
        if (IS_ERR(fs_info->cleaner_kthread))
@@ -2986,9 +2990,8 @@ retry_root_backup:
                if (ret)
                        goto fail_qgroup;
 
-               mutex_lock(&fs_info->cleaner_mutex);
+               /* We grabbed this mutex before we created the cleaner_kthread */
                ret = btrfs_recover_relocation(tree_root);
-               mutex_unlock(&fs_info->cleaner_mutex);
                if (ret < 0) {
                        printk(KERN_WARNING
                               "BTRFS: failed to recover relocation\n");
@@ -2996,6 +2999,7 @@ retry_root_backup:
                        goto fail_qgroup;
                }
        }
+       mutex_unlock(&fs_info->cleaner_mutex);
 
        location.objectid = BTRFS_FS_TREE_OBJECTID;
        location.type = BTRFS_ROOT_ITEM_KEY;
@@ -3079,6 +3083,7 @@ fail_cleaner:
        filemap_write_and_wait(fs_info->btree_inode->i_mapping);
 
 fail_sysfs:
+       mutex_unlock(&fs_info->cleaner_mutex);
        btrfs_sysfs_remove_one(fs_info);
 
 fail_block_groups: