]> git.hungrycats.org Git - linux/commitdiff
btrfs: improve global reserve stealing logic
authorJosef Bacik <josef@toxicpanda.com>
Fri, 13 Mar 2020 19:58:05 +0000 (15:58 -0400)
committerZygo Blaxell <ce3g8jdj@umail.furryterror.org>
Tue, 1 Sep 2020 04:40:58 +0000 (00:40 -0400)
[ Upstream commit 7f9fe614407692f670601a634621138233ac00d7 ]

For unlink transactions and block group removal
btrfs_start_transaction_fallback_global_rsv will first try to start an
ordinary transaction and if it fails it will fall back to reserving the
required amount by stealing from the global reserve. This is problematic
because of all the same reasons we had with previous iterations of the
ENOSPC handling, thundering herd.  We get a bunch of failures all at
once, everybody tries to allocate from the global reserve, some win and
some lose, we get an ENSOPC.

Fix this behavior by introducing BTRFS_RESERVE_FLUSH_ALL_STEAL. It's
used to mark unlink reservation. To fix this we need to integrate this
logic into the normal ENOSPC infrastructure.  We still go through all of
the normal flushing work, and at the moment we begin to fail all the
tickets we try to satisfy any tickets that are allowed to steal by
stealing from the global reserve.  If this works we start the flushing
system over again just like we would with a normal ticket satisfaction.
This serializes our global reserve stealing, so we don't have the
thundering herd problem.

Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Tested-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
(cherry picked from commit 1e42a1857bcff0820498d95c0803ad0e482b8f05)

fs/btrfs/block-group.c
fs/btrfs/ctree.h
fs/btrfs/inode.c
fs/btrfs/space-info.c
fs/btrfs/space-info.h
fs/btrfs/transaction.c
fs/btrfs/transaction.h

index 319e13fd84c344ded4716ee5224f096eb825860b..abf0a84b8a18a4e62f4770f4beed13edb146589a 100644 (file)
@@ -1168,7 +1168,7 @@ struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
        free_extent_map(em);
 
        return btrfs_start_transaction_fallback_global_rsv(fs_info->extent_root,
-                                                          num_items, 1);
+                                                          num_items);
 }
 
 /*
index 5884ba586f515b0690f897e5f12a3146a6660a33..2d47f976ab951d1f7c41a5d29b98ba04266efea7 100644 (file)
@@ -2471,6 +2471,7 @@ enum btrfs_reserve_flush_enum {
        BTRFS_RESERVE_FLUSH_LIMIT,
        BTRFS_RESERVE_FLUSH_EVICT,
        BTRFS_RESERVE_FLUSH_ALL,
+       BTRFS_RESERVE_FLUSH_ALL_STEAL,
 };
 
 enum btrfs_flush_state {
index 721effb468e80e0e327225266ba976a0e029ee65..e8f00c3a5ef5b6103e5e5b012bbbdfd06df0eb42 100644 (file)
@@ -4273,7 +4273,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir)
         * 1 for the inode ref
         * 1 for the inode
         */
-       return btrfs_start_transaction_fallback_global_rsv(root, 5, 5);
+       return btrfs_start_transaction_fallback_global_rsv(root, 5);
 }
 
 static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
index f27ddbfb62abe413dd93d54236d5836bda39ce9a..93169e95af72fc3b067509e1b93633defff3f234 100644 (file)
@@ -687,6 +687,34 @@ static inline int need_do_async_reclaim(struct btrfs_fs_info *fs_info,
                !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
 }
 
+static bool steal_from_global_rsv(struct btrfs_fs_info *fs_info,
+                                 struct btrfs_space_info *space_info,
+                                 struct reserve_ticket *ticket)
+{
+       struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
+       u64 min_bytes;
+
+       if (global_rsv->space_info != space_info)
+               return false;
+
+       spin_lock(&global_rsv->lock);
+       min_bytes = div_factor(global_rsv->size, 5);
+       if (global_rsv->reserved < min_bytes + ticket->bytes) {
+               spin_unlock(&global_rsv->lock);
+               return false;
+       }
+       global_rsv->reserved -= ticket->bytes;
+       ticket->bytes = 0;
+       list_del_init(&ticket->list);
+       wake_up(&ticket->wait);
+       space_info->tickets_id++;
+       if (global_rsv->reserved < global_rsv->size)
+               global_rsv->full = 0;
+       spin_unlock(&global_rsv->lock);
+
+       return true;
+}
+
 /*
  * maybe_fail_all_tickets - we've exhausted our flushing, start failing tickets
  * @fs_info - fs_info for this fs
@@ -719,6 +747,10 @@ static bool maybe_fail_all_tickets(struct btrfs_fs_info *fs_info,
                ticket = list_first_entry(&space_info->tickets,
                                          struct reserve_ticket, list);
 
+               if (ticket->steal &&
+                   steal_from_global_rsv(fs_info, space_info, ticket))
+                       return true;
+
                /*
                 * may_commit_transaction will avoid committing the transaction
                 * if it doesn't feel like the space reclaimed by the commit
@@ -938,6 +970,7 @@ static int handle_reserve_ticket(struct btrfs_fs_info *fs_info,
 
        switch (flush) {
        case BTRFS_RESERVE_FLUSH_ALL:
+       case BTRFS_RESERVE_FLUSH_ALL_STEAL:
                wait_reserve_ticket(fs_info, space_info, ticket);
                break;
        case BTRFS_RESERVE_FLUSH_LIMIT:
@@ -1037,7 +1070,9 @@ static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
                ticket.bytes = orig_bytes;
                ticket.error = 0;
                init_waitqueue_head(&ticket.wait);
-               if (flush == BTRFS_RESERVE_FLUSH_ALL) {
+               ticket.steal = (flush == BTRFS_RESERVE_FLUSH_ALL_STEAL);
+               if (flush == BTRFS_RESERVE_FLUSH_ALL ||
+                   flush == BTRFS_RESERVE_FLUSH_ALL_STEAL) {
                        list_add_tail(&ticket.list, &space_info->tickets);
                        if (!space_info->flush) {
                                space_info->flush = 1;
index 1a349e3f9cc120d3d3034b3fbe52e534d05a20f1..a7366e579922001174de4f5287d0e062838875a9 100644 (file)
@@ -71,6 +71,7 @@ struct btrfs_space_info {
 struct reserve_ticket {
        u64 bytes;
        int error;
+       bool steal;
        struct list_head list;
        wait_queue_head_t wait;
 };
index a209e2ef547f468cbe923ec46f519af22a6d69de..270c076149dc0999358ffd954583e12752221c65 100644 (file)
@@ -561,7 +561,8 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
                 * refill that amount for whatever is missing in the reserve.
                 */
                num_bytes = btrfs_calc_insert_metadata_size(fs_info, num_items);
-               if (delayed_refs_rsv->full == 0) {
+               if (flush == BTRFS_RESERVE_FLUSH_ALL &&
+                   delayed_refs_rsv->full == 0) {
                        delayed_refs_bytes = num_bytes;
                        num_bytes <<= 1;
                }
@@ -697,43 +698,10 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
 
 struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv(
                                        struct btrfs_root *root,
-                                       unsigned int num_items,
-                                       int min_factor)
+                                       unsigned int num_items)
 {
-       struct btrfs_fs_info *fs_info = root->fs_info;
-       struct btrfs_trans_handle *trans;
-       u64 num_bytes;
-       int ret;
-
-       /*
-        * We have two callers: unlink and block group removal.  The
-        * former should succeed even if we will temporarily exceed
-        * quota and the latter operates on the extent root so
-        * qgroup enforcement is ignored anyway.
-        */
-       trans = start_transaction(root, num_items, TRANS_START,
-                                 BTRFS_RESERVE_FLUSH_ALL, false);
-       if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
-               return trans;
-
-       trans = btrfs_start_transaction(root, 0);
-       if (IS_ERR(trans))
-               return trans;
-
-       num_bytes = btrfs_calc_insert_metadata_size(fs_info, num_items);
-       ret = btrfs_cond_migrate_bytes(fs_info, &fs_info->trans_block_rsv,
-                                      num_bytes, min_factor);
-       if (ret) {
-               btrfs_end_transaction(trans);
-               return ERR_PTR(ret);
-       }
-
-       trans->block_rsv = &fs_info->trans_block_rsv;
-       trans->bytes_reserved = num_bytes;
-       trace_btrfs_space_reservation(fs_info, "transaction",
-                                     trans->transid, num_bytes, 1);
-
-       return trans;
+       return start_transaction(root, num_items, TRANS_START,
+                                BTRFS_RESERVE_FLUSH_ALL_STEAL, false);
 }
 
 struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root)
index 49f7196368f5a5d3893a2a7d45c9d1ddcae38046..972c38e6b197455d22bfe69fa8046c97fdec590b 100644 (file)
@@ -180,8 +180,7 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
                                                   unsigned int num_items);
 struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv(
                                        struct btrfs_root *root,
-                                       unsigned int num_items,
-                                       int min_factor);
+                                       unsigned int num_items);
 struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root);
 struct btrfs_trans_handle *btrfs_join_transaction_spacecache(struct btrfs_root *root);
 struct btrfs_trans_handle *btrfs_join_transaction_nostart(struct btrfs_root *root);