]> git.hungrycats.org Git - linux/commitdiff
Btrfs: account for large extents with enospc
authorJosef Bacik <jbacik@fb.com>
Wed, 11 Feb 2015 20:08:59 +0000 (15:08 -0500)
committerZygo Blaxell <zblaxell@serenity.furryterror.org>
Fri, 27 Mar 2015 01:20:19 +0000 (21:20 -0400)
On our gluster boxes we stream large tar balls of backups onto our fses.  With
160gb of ram this means we get really large contiguous ranges of dirty data, but
the way our ENOSPC stuff works is that as long as it's contiguous we only hold
metadata reservation for one extent.  The problem is we limit our extents to
128mb, so we'll end up with at least 800 extents so our enospc accounting is
quite a bit lower than what we need.  To keep track of this make sure we
increase outstanding_extents for every multiple of the max extent size so we can
be sure to have enough reserved metadata space.  Thanks,

Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Chris Mason <clm@fb.com>
(cherry picked from commit dcab6a3b2ae657a2017637083c28ee303b6b1b8e)

fs/btrfs/ctree.h
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/inode.c

index f88e35591cb8470226ce34efb88ee0e521ce621d..73b45690c4a6d82f1c7471f20324b3d04bf6dce3 100644 (file)
@@ -198,6 +198,8 @@ static int btrfs_csum_sizes[] = { 4, 0 };
 
 #define BTRFS_DIRTY_METADATA_THRESH    (32 * 1024 * 1024)
 
+#define BTRFS_MAX_EXTENT_SIZE (128 * 1024 * 1024)
+
 /*
  * The key defines the order in the tree, and so it also defines (optimal)
  * block layout.
index 48fad009384103841f506376d2ff5f38c0b193b8..03d35efc3f9797a8a9abbadfa58c325bb48896ce 100644 (file)
@@ -5027,19 +5027,25 @@ void btrfs_subvolume_release_metadata(struct btrfs_root *root,
 /**
  * drop_outstanding_extent - drop an outstanding extent
  * @inode: the inode we're dropping the extent for
+ * @num_bytes: the number of bytes we're relaseing.
  *
  * This is called when we are freeing up an outstanding extent, either called
  * after an error or after an extent is written.  This will return the number of
  * reserved extents that need to be freed.  This must be called with
  * BTRFS_I(inode)->lock held.
  */
-static unsigned drop_outstanding_extent(struct inode *inode)
+static unsigned drop_outstanding_extent(struct inode *inode, u64 num_bytes)
 {
        unsigned drop_inode_space = 0;
        unsigned dropped_extents = 0;
+       unsigned num_extents = 0;
 
-       BUG_ON(!BTRFS_I(inode)->outstanding_extents);
-       BTRFS_I(inode)->outstanding_extents--;
+       num_extents = (unsigned)div64_u64(num_bytes +
+                                         BTRFS_MAX_EXTENT_SIZE - 1,
+                                         BTRFS_MAX_EXTENT_SIZE);
+       ASSERT(num_extents);
+       ASSERT(BTRFS_I(inode)->outstanding_extents >= num_extents);
+       BTRFS_I(inode)->outstanding_extents -= num_extents;
 
        if (BTRFS_I(inode)->outstanding_extents == 0 &&
            test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
@@ -5210,7 +5216,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 
 out_fail:
        spin_lock(&BTRFS_I(inode)->lock);
-       dropped = drop_outstanding_extent(inode);
+       dropped = drop_outstanding_extent(inode, num_bytes);
        /*
         * If the inodes csum_bytes is the same as the original
         * csum_bytes then we know we haven't raced with any free()ers
@@ -5289,7 +5295,7 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
 
        num_bytes = ALIGN(num_bytes, root->sectorsize);
        spin_lock(&BTRFS_I(inode)->lock);
-       dropped = drop_outstanding_extent(inode);
+       dropped = drop_outstanding_extent(inode, num_bytes);
 
        if (num_bytes)
                to_free = calc_csum_metadata_size(inode, num_bytes, 0);
index 5f7609ecf0b18cd2f9a50346971677ec77a365dd..5afef1e7a6164bd17dcbdc921b3572c9fb18ac02 100644 (file)
@@ -3214,7 +3214,7 @@ static noinline_for_stack int writepage_delalloc(struct inode *inode,
                                               page,
                                               &delalloc_start,
                                               &delalloc_end,
-                                              128 * 1024 * 1024);
+                                              BTRFS_MAX_EXTENT_SIZE);
                if (nr_delalloc == 0) {
                        delalloc_start = delalloc_end + 1;
                        continue;
index 05c6ef553d84c4ec3dc02120ae1d485bda86f64e..97f351299c5a6128ecb18a491f0e735a92521f54 100644 (file)
@@ -1511,10 +1511,45 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
 static void btrfs_split_extent_hook(struct inode *inode,
                                    struct extent_state *orig, u64 split)
 {
+       u64 size;
+
        /* not delalloc, ignore it */
        if (!(orig->state & EXTENT_DELALLOC))
                return;
 
+       size = orig->end - orig->start + 1;
+       if (size > BTRFS_MAX_EXTENT_SIZE) {
+               u64 num_extents;
+               u64 new_size;
+
+               /*
+                * We need the largest size of the remaining extent to see if we
+                * need to add a new outstanding extent.  Think of the following
+                * case
+                *
+                * [MEAX_EXTENT_SIZEx2 - 4k][4k]
+                *
+                * The new_size would just be 4k and we'd think we had enough
+                * outstanding extents for this if we only took one side of the
+                * split, same goes for the other direction.  We need to see if
+                * the larger size still is the same amount of extents as the
+                * original size, because if it is we need to add a new
+                * outstanding extent.  But if we split up and the larger size
+                * is less than the original then we are good to go since we've
+                * already accounted for the extra extent in our original
+                * accounting.
+                */
+               new_size = orig->end - split + 1;
+               if ((split - orig->start) > new_size)
+                       new_size = split - orig->start;
+
+               num_extents = div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1,
+                                       BTRFS_MAX_EXTENT_SIZE);
+               if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
+                             BTRFS_MAX_EXTENT_SIZE) < num_extents)
+                       return;
+       }
+
        spin_lock(&BTRFS_I(inode)->lock);
        BTRFS_I(inode)->outstanding_extents++;
        spin_unlock(&BTRFS_I(inode)->lock);
@@ -1530,10 +1565,34 @@ static void btrfs_merge_extent_hook(struct inode *inode,
                                    struct extent_state *new,
                                    struct extent_state *other)
 {
+       u64 new_size, old_size;
+       u64 num_extents;
+
        /* not delalloc, ignore it */
        if (!(other->state & EXTENT_DELALLOC))
                return;
 
+       old_size = other->end - other->start + 1;
+       new_size = old_size + (new->end - new->start + 1);
+
+       /* we're not bigger than the max, unreserve the space and go */
+       if (new_size <= BTRFS_MAX_EXTENT_SIZE) {
+               spin_lock(&BTRFS_I(inode)->lock);
+               BTRFS_I(inode)->outstanding_extents--;
+               spin_unlock(&BTRFS_I(inode)->lock);
+               return;
+       }
+
+       /*
+        * If we grew by another max_extent, just return, we want to keep that
+        * reserved amount.
+        */
+       num_extents = div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
+                               BTRFS_MAX_EXTENT_SIZE);
+       if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
+                     BTRFS_MAX_EXTENT_SIZE) > num_extents)
+               return;
+
        spin_lock(&BTRFS_I(inode)->lock);
        BTRFS_I(inode)->outstanding_extents--;
        spin_unlock(&BTRFS_I(inode)->lock);
@@ -1629,6 +1688,8 @@ static void btrfs_clear_bit_hook(struct inode *inode,
                                 unsigned long *bits)
 {
        u64 len = state->end + 1 - state->start;
+       u64 num_extents = div64_u64(len + BTRFS_MAX_EXTENT_SIZE -1,
+                                   BTRFS_MAX_EXTENT_SIZE);
 
        spin_lock(&BTRFS_I(inode)->lock);
        if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG))
@@ -1648,7 +1709,7 @@ static void btrfs_clear_bit_hook(struct inode *inode,
                        *bits &= ~EXTENT_FIRST_DELALLOC;
                } else if (!(*bits & EXTENT_DO_ACCOUNTING)) {
                        spin_lock(&BTRFS_I(inode)->lock);
-                       BTRFS_I(inode)->outstanding_extents--;
+                       BTRFS_I(inode)->outstanding_extents -= num_extents;
                        spin_unlock(&BTRFS_I(inode)->lock);
                }