]> git.hungrycats.org Git - linux/commitdiff
Btrfs: add fragment=* debug mount option
authorJosef Bacik <jbacik@fb.com>
Wed, 23 Sep 2015 18:54:14 +0000 (14:54 -0400)
committerZygo Blaxell <zblaxell@thirteen.furryterror.org>
Thu, 22 Oct 2015 01:09:00 +0000 (21:09 -0400)
In tracking down these weird bitmap problems it was helpful to artificially
create an extremely fragmented file system.  These mount options let us either
fragment data or metadata or both.  With these options I could reproduce all
sorts of weird latencies and hangs that occur under extreme fragmentation and
get them fixed.  Thanks,

Signed-off-by: Josef Bacik <jbacik@fb.com>
(cherry picked from commit 6154ded9eb941f321f771f5fd848cd5c59e1418c)

Conflicts:
fs/btrfs/super.c

fs/btrfs/ctree.h
fs/btrfs/extent-tree.c
fs/btrfs/free-space-cache.c
fs/btrfs/super.c
fs/btrfs/tests/free-space-tests.c

index 4f02af9c6af943c3e1778f17b31b4163f53c3e12..1be9f29dae931204e41cdecc341ea8b469315e1f 100644 (file)
@@ -2158,6 +2158,8 @@ struct btrfs_ioctl_defrag_range_args {
 #define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21)
 #define BTRFS_MOUNT_PANIC_ON_FATAL_ERROR       (1 << 22)
 #define BTRFS_MOUNT_RESCAN_UUID_TREE   (1 << 23)
+#define BTRFS_MOUNT_FRAGMENT_DATA      (1 << 24)
+#define BTRFS_MOUNT_FRAGMENT_METADATA  (1 << 25)
 
 #define BTRFS_DEFAULT_COMMIT_INTERVAL  (30)
 #define BTRFS_DEFAULT_MAX_INLINE       (8192)
@@ -2182,6 +2184,18 @@ struct btrfs_ioctl_defrag_range_args {
        btrfs_clear_opt(root->fs_info->mount_opt, opt);                 \
 }
 
+#ifdef CONFIG_BTRFS_DEBUG
+static inline int
+btrfs_should_fragment_free_space(struct btrfs_root *root,
+                                struct btrfs_block_group_cache *block_group)
+{
+       return (btrfs_test_opt(root, FRAGMENT_METADATA) &&
+               block_group->flags & BTRFS_BLOCK_GROUP_METADATA) ||
+              (btrfs_test_opt(root, FRAGMENT_DATA) &&
+               block_group->flags &  BTRFS_BLOCK_GROUP_DATA);
+}
+#endif
+
 /*
  * Requests for changes that need to be done during transaction commit.
  *
index fb11408e4aee10b6441cf8c1530ff4a7087aa15e..b7381e74a7a69599af1a78c3fee663d9a63b6386 100644 (file)
@@ -342,6 +342,27 @@ static void put_caching_control(struct btrfs_caching_control *ctl)
                kfree(ctl);
 }
 
+#ifdef CONFIG_BTRFS_DEBUG
+static void fragment_free_space(struct btrfs_root *root,
+                               struct btrfs_block_group_cache *block_group)
+{
+       u64 start = block_group->key.objectid;
+       u64 len = block_group->key.offset;
+       u64 chunk = block_group->flags & BTRFS_BLOCK_GROUP_METADATA ?
+               root->nodesize : root->sectorsize;
+       u64 step = chunk << 1;
+
+       while (len > chunk) {
+               btrfs_remove_free_space(block_group, start, chunk);
+               start += step;
+               if (len < step)
+                       len = 0;
+               else
+                       len -= step;
+       }
+}
+#endif
+
 /*
  * this is only called by cache_block_group, since we could have freed extents
  * we need to check the pinned_extents for any extents that can't be used yet
@@ -398,6 +419,7 @@ static noinline void caching_thread(struct btrfs_work *work)
        u64 last = 0;
        u32 nritems;
        int ret = -ENOMEM;
+       bool wakeup = true;
 
        caching_ctl = container_of(work, struct btrfs_caching_control, work);
        block_group = caching_ctl->block_group;
@@ -410,6 +432,15 @@ static noinline void caching_thread(struct btrfs_work *work)
 
        last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
 
+#ifdef CONFIG_BTRFS_DEBUG
+       /*
+        * If we're fragmenting we don't want to make anybody think we can
+        * allocate from this block group until we've had a chance to fragment
+        * the free space.
+        */
+       if (btrfs_should_fragment_free_space(extent_root, block_group))
+               wakeup = false;
+#endif
        /*
         * We don't want to deadlock with somebody trying to allocate a new
         * extent for the extent root while also trying to search the extent
@@ -451,7 +482,8 @@ next:
 
                        if (need_resched() ||
                            rwsem_is_contended(&fs_info->commit_root_sem)) {
-                               caching_ctl->progress = last;
+                               if (wakeup)
+                                       caching_ctl->progress = last;
                                btrfs_release_path(path);
                                up_read(&fs_info->commit_root_sem);
                                mutex_unlock(&caching_ctl->mutex);
@@ -474,7 +506,8 @@ next:
                        key.offset = 0;
                        key.type = BTRFS_EXTENT_ITEM_KEY;
 
-                       caching_ctl->progress = last;
+                       if (wakeup)
+                               caching_ctl->progress = last;
                        btrfs_release_path(path);
                        goto next;
                }
@@ -501,7 +534,8 @@ next:
 
                        if (total_found > (1024 * 1024 * 2)) {
                                total_found = 0;
-                               wake_up(&caching_ctl->wait);
+                               if (wakeup)
+                                       wake_up(&caching_ctl->wait);
                        }
                }
                path->slots[0]++;
@@ -511,13 +545,27 @@ next:
        total_found += add_new_free_space(block_group, fs_info, last,
                                          block_group->key.objectid +
                                          block_group->key.offset);
-       caching_ctl->progress = (u64)-1;
-
        spin_lock(&block_group->lock);
        block_group->caching_ctl = NULL;
        block_group->cached = BTRFS_CACHE_FINISHED;
        spin_unlock(&block_group->lock);
 
+#ifdef CONFIG_BTRFS_DEBUG
+       if (btrfs_should_fragment_free_space(extent_root, block_group)) {
+               u64 bytes_used;
+
+               spin_lock(&block_group->space_info->lock);
+               spin_lock(&block_group->lock);
+               bytes_used = block_group->key.offset -
+                       btrfs_block_group_used(&block_group->item);
+               block_group->space_info->bytes_used += bytes_used >> 1;
+               spin_unlock(&block_group->lock);
+               spin_unlock(&block_group->space_info->lock);
+               fragment_free_space(extent_root, block_group);
+       }
+#endif
+
+       caching_ctl->progress = (u64)-1;
 err:
        btrfs_free_path(path);
        up_read(&fs_info->commit_root_sem);
@@ -617,6 +665,22 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
                        }
                }
                spin_unlock(&cache->lock);
+#ifdef CONFIG_BTRFS_DEBUG
+               if (ret == 1 &&
+                   btrfs_should_fragment_free_space(fs_info->extent_root,
+                                                    cache)) {
+                       u64 bytes_used;
+
+                       spin_lock(&cache->space_info->lock);
+                       spin_lock(&cache->lock);
+                       bytes_used = cache->key.offset -
+                               btrfs_block_group_used(&cache->item);
+                       cache->space_info->bytes_used += bytes_used >> 1;
+                       spin_unlock(&cache->lock);
+                       spin_unlock(&cache->space_info->lock);
+                       fragment_free_space(fs_info->extent_root, cache);
+               }
+#endif
                mutex_unlock(&caching_ctl->mutex);
 
                wake_up(&caching_ctl->wait);
@@ -9745,6 +9809,14 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
 
        free_excluded_extents(root, cache);
 
+#ifdef CONFIG_BTRFS_DEBUG
+       if (btrfs_should_fragment_free_space(root, cache)) {
+               u64 new_bytes_used = size - bytes_used;
+
+               bytes_used += new_bytes_used >> 1;
+               fragment_free_space(root, cache);
+       }
+#endif
        /*
         * Call to ensure the corresponding space_info object is created and
         * assigned to our block group, but don't update its counters just yet.
index fbdb34df381eb0dcd9a7c4158bd1ae1e83f4873b..57c53594ff4673a659928b26a426286eba501175 100644 (file)
@@ -1953,12 +1953,19 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
                      struct btrfs_free_space *info)
 {
        struct btrfs_block_group_cache *block_group = ctl->private;
+       bool forced = false;
+
+#ifdef CONFIG_BTRFS_DEBUG
+       if (btrfs_should_fragment_free_space(block_group->fs_info->extent_root,
+                                            block_group))
+               forced = true;
+#endif
 
        /*
         * If we are below the extents threshold then we can add this as an
         * extent, and don't have to deal with the bitmap
         */
-       if (ctl->free_extents < ctl->extents_thresh) {
+       if (!forced && ctl->free_extents < ctl->extents_thresh) {
                /*
                 * If this block group has some small extents we don't want to
                 * use up all of our free slots in the cache with them, we want
index 809221b0fdd86be4f959cf98df193c5a5ffaab72..8b2f459b614beec9e004c710e041cbe7f8752659 100644 (file)
@@ -325,6 +325,9 @@ enum {
        Opt_commit_interval, Opt_barrier, Opt_nodefrag, Opt_nodiscard,
        Opt_noenospc_debug, Opt_noflushoncommit, Opt_acl, Opt_datacow,
        Opt_datasum, Opt_treelog, Opt_noinode_cache,
+#ifdef CONFIG_BTRFS_DEBUG
+       Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all,
+#endif
        Opt_err,
 };
 
@@ -377,6 +380,11 @@ static match_table_t tokens = {
        {Opt_rescan_uuid_tree, "rescan_uuid_tree"},
        {Opt_fatal_errors, "fatal_errors=%s"},
        {Opt_commit_interval, "commit=%d"},
+#ifdef CONFIG_BTRFS_DEBUG
+       {Opt_fragment_data, "fragment=data"},
+       {Opt_fragment_metadata, "fragment=metadata"},
+       {Opt_fragment_all, "fragment=all"},
+#endif
        {Opt_err, NULL},
 };
 
@@ -743,6 +751,22 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                                info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
                        }
                        break;
+#ifdef CONFIG_BTRFS_DEBUG
+               case Opt_fragment_all:
+                       btrfs_info(root->fs_info, "fragmenting all space");
+                       btrfs_set_opt(info->mount_opt, FRAGMENT_DATA);
+                       btrfs_set_opt(info->mount_opt, FRAGMENT_METADATA);
+                       break;
+               case Opt_fragment_metadata:
+                       btrfs_info(root->fs_info, "fragmenting metadata");
+                       btrfs_set_opt(info->mount_opt,
+                                     FRAGMENT_METADATA);
+                       break;
+               case Opt_fragment_data:
+                       btrfs_info(root->fs_info, "fragmenting data");
+                       btrfs_set_opt(info->mount_opt, FRAGMENT_DATA);
+                       break;
+#endif
                case Opt_err:
                        btrfs_info(root->fs_info, "unrecognized mount option '%s'", p);
                        ret = -EINVAL;
@@ -1108,6 +1132,16 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
                seq_puts(seq, ",fatal_errors=panic");
        if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL)
                seq_printf(seq, ",commit=%d", info->commit_interval);
+#ifdef CONFIG_BTRFS_DEBUG
+       if (btrfs_test_opt(root, FRAGMENT_DATA))
+               seq_puts(seq, ",fragment=data");
+       if (btrfs_test_opt(root, FRAGMENT_METADATA))
+               seq_puts(seq, ",fragment=metadata");
+#endif
+       seq_printf(seq, ",subvolid=%llu",
+                 BTRFS_I(d_inode(dentry))->root->root_key.objectid);
+       seq_puts(seq, ",subvol=");
+       seq_dentry(seq, dentry, " \t\n\\");
        return 0;
 }
 
index 2299bfde39eec666fe1b0876733746a76673f5a5..c8c3d70c31ffad4e02acd04e0f7dcaa54ad0fe2b 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/slab.h>
 #include "btrfs-tests.h"
 #include "../ctree.h"
+#include "../disk-io.h"
 #include "../free-space-cache.h"
 
 #define BITS_PER_BITMAP                (PAGE_CACHE_SIZE * 8)
@@ -35,6 +36,12 @@ static struct btrfs_block_group_cache *init_test_block_group(void)
                kfree(cache);
                return NULL;
        }
+       cache->fs_info = btrfs_alloc_dummy_fs_info();
+       if (!cache->fs_info) {
+               kfree(cache->free_space_ctl);
+               kfree(cache);
+               return NULL;
+       }
 
        cache->key.objectid = 0;
        cache->key.offset = 1024 * 1024 * 1024;
@@ -879,7 +886,8 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
 int btrfs_test_free_space_cache(void)
 {
        struct btrfs_block_group_cache *cache;
-       int ret;
+       struct btrfs_root *root = NULL;
+       int ret = -ENOMEM;
 
        test_msg("Running btrfs free space cache tests\n");
 
@@ -889,6 +897,17 @@ int btrfs_test_free_space_cache(void)
                return 0;
        }
 
+       root = btrfs_alloc_dummy_root();
+       if (!root)
+               goto out;
+
+       root->fs_info = btrfs_alloc_dummy_fs_info();
+       if (!root->fs_info)
+               goto out;
+
+       root->fs_info->extent_root = root;
+       cache->fs_info = root->fs_info;
+
        ret = test_extents(cache);
        if (ret)
                goto out;
@@ -904,6 +923,7 @@ out:
        __btrfs_remove_free_space_cache(cache->free_space_ctl);
        kfree(cache->free_space_ctl);
        kfree(cache);
+       btrfs_free_dummy_root(root);
        test_msg("Free space cache tests finished\n");
        return ret;
 }