]> git.hungrycats.org Git - linux/commitdiff
btrfs: should block unused block groups deletion work when allocating data space
authorWang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
Fri, 9 Sep 2016 08:17:48 +0000 (16:17 +0800)
committerZygo Blaxell <zblaxell@thirteen.furryterror.org>
Tue, 1 Nov 2016 01:31:47 +0000 (21:31 -0400)
cleaner_kthread() may run at any time, in which it'll call btrfs_delete_unused_bgs()
to delete unused block groups. Because this work is asynchronous, it may also result
in false ENOSPC error. Please see below race window:

               CPU1                           |             CPU2
                                              |
|-> btrfs_alloc_data_chunk_ondemand()         |-> cleaner_kthread()
    |-> do_chunk_alloc()                      |   |
    |   assume it returns ENOSPC, which means |   |
    |   btrfs_space_info is full and have free|   |
    |   space to satisfy data request.        |   |
    |                                         |   |- > btrfs_delete_unused_bgs()
    |                                         |   |    it will decrease btrfs_space_info
    |                                         |   |    total_bytes and make
    |                                         |   |    btrfs_space_info is not full.
    |                                         |   |
In this case, we may get ENOSPC error, but btrfs_space_info is not full.

To fix this issue, in btrfs_alloc_data_chunk_ondemand(), if we need to call
do_chunk_alloc() to allocating new chunk, we should block btrfs_delete_unused_bgs().
Here we introduce a new struct rw_semaphore bg_delete_sem to do this job.

Indeed there is already a "struct mutex delete_unused_bgs_mutex", but it's mutex,
we can not use it for this purpose. Of course, we can re-define it to be struct
rw_semaphore, then use it in btrfs_alloc_data_chunk_ondemand(). Either method will
work.

But given that delete_unused_bgs_mutex's name length is longer than bg_delete_sem,
I choose the first method, to create a new struct rw_semaphore bg_delete_sem and
delete delete_unused_bgs_mutex :)

Reported-by: Stefan Priebe <s.priebe@profihost.ag>
Signed-off-by: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
(cherry picked from commit 7ec2d3f525eca457e1aca07650ea3a043d527194)
(cherry picked from commit b96b00666cf37eb0b419381c221d322181ad7542)

Conflicts:
fs/btrfs/extent-tree.c

fs/btrfs/extent-tree.c

index 77b2f9479a16c8284749bcb1fefe6a45b0e37c37..69b8b0651f2fd0a26e63b4843eb779b1c08df29c 100644 (file)
@@ -4068,19 +4068,32 @@ int btrfs_alloc_data_chunk_ondemand(struct inode *inode, u64 bytes)
        int need_commit = 2;
        int have_pinned_space;
        int have_bg_delete_sem = 0;
+       bool free_space_inode = btrfs_is_free_space_inode(inode);
 
        /* make sure bytes are sectorsize aligned */
        bytes = ALIGN(bytes, root->sectorsize);
 
-       if (btrfs_is_free_space_inode(inode)) {
+       if (free_space_inode) {
                need_commit = 0;
                ASSERT(current->journal_info);
        }
 
+       /*
+        * Here we shouldn't call down_read(bg_delete_sem) for free space inode,
+        * there is lock order between bg_delete_sem and "wait current trans
+        * finished". Meanwhile because we only do the data space reservation
+        * for free space cache in the transaction context,
+        * btrfs_delete_unused_bgs() will either have finished its job, or start
+        * a new transaction waiting current transaction to complete, there will
+        * be no unused block groups to be deleted, so it's safe to not call
+        * down_read(bg_delete_sem).
+        */
        data_sinfo = fs_info->data_sinfo;
        if (!data_sinfo) {
-               down_read(&root->fs_info->bg_delete_sem);
-               have_bg_delete_sem = 1;
+               if (!free_space_inode) {
+                       down_read(&root->fs_info->bg_delete_sem);
+                       have_bg_delete_sem = 1;
+               }
                goto alloc;
        }
 
@@ -4098,7 +4111,7 @@ again:
                 * We may need to allocate new chunk, so we should block
                 * btrfs_delete_unused_bgs()
                 */
-               if (!have_bg_delete_sem) {
+               if (!have_bg_delete_sem && !free_space_inode) {
                        spin_unlock(&data_sinfo->lock);
                        down_read(&root->fs_info->bg_delete_sem);
                        have_bg_delete_sem = 1;
@@ -4128,8 +4141,8 @@ alloc:
                         */
                        trans = btrfs_join_transaction(root);
                        if (IS_ERR(trans)) {
-                               up_read(&root->fs_info->bg_delete_sem);
-                               return PTR_ERR(trans);
+                               ret = PTR_ERR(trans);
+                               goto out;
                        }
 
                        ret = do_chunk_alloc(trans, root->fs_info->extent_root,
@@ -4137,10 +4150,9 @@ alloc:
                                             CHUNK_ALLOC_NO_FORCE);
                        btrfs_end_transaction(trans, root);
                        if (ret < 0) {
-                               if (ret != -ENOSPC) {
-                                       up_read(&root->fs_info->bg_delete_sem);
-                                       return ret;
-                               } else {
+                               if (ret != -ENOSPC)
+                                       goto out;
+                               else {
                                        have_pinned_space = 1;
                                        goto commit_trans;
                                }
@@ -4175,18 +4187,16 @@ commit_trans:
 
                        trans = btrfs_join_transaction(root);
                        if (IS_ERR(trans)) {
-                               up_read(&root->fs_info->bg_delete_sem);
-                               return PTR_ERR(trans);
+                               ret = PTR_ERR(trans);
+                               goto out;
                        }
                        if (have_pinned_space >= 0 ||
                            test_bit(BTRFS_TRANS_HAVE_FREE_BGS,
                                     &trans->transaction->flags) ||
                            need_commit > 0) {
                                ret = btrfs_commit_transaction(trans, root);
-                               if (ret) {
-                                       up_read(&root->fs_info->bg_delete_sem);
-                                       return ret;
-                               }
+                               if (ret)
+                                       goto out;
                                /*
                                 * The cleaner kthread might still be doing iput
                                 * operations. Wait for it to finish so that
@@ -4203,15 +4213,16 @@ commit_trans:
                trace_btrfs_space_reservation(root->fs_info,
                                              "space_info:enospc",
                                              data_sinfo->flags, bytes, 1);
-               up_read(&root->fs_info->bg_delete_sem);
-               return -ENOSPC;
+               ret = -ENOSPC;
+               goto out;
        }
        data_sinfo->bytes_may_use += bytes;
        trace_btrfs_space_reservation(root->fs_info, "space_info",
                                      data_sinfo->flags, bytes, 1);
        spin_unlock(&data_sinfo->lock);
 
-       if (have_bg_delete_sem)
+out:
+       if (have_bg_delete_sem && !free_space_inode)
                up_read(&root->fs_info->bg_delete_sem);
 
        return ret;