From: Zygo Blaxell Date: Tue, 15 Dec 2015 18:24:04 +0000 (-0500) Subject: Revert "Btrfs: fix file corruption and data loss after cloning inline extents" X-Git-Url: http://git.hungrycats.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=f6151a91c534a66e3ee9a7f6a1110349c01ffd39;p=linux Revert "Btrfs: fix file corruption and data loss after cloning inline extents" This reverts commit e69df47ad28484ad4289190a9abf096848072e39. --- diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index dae6c2e6ccaf..7098c0a2086c 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3306,147 +3306,6 @@ static void clone_update_extent_map(struct inode *inode, &BTRFS_I(inode)->runtime_flags); } -/* - * Make sure we do not end up inserting an inline extent into a file that has - * already other (non-inline) extents. If a file has an inline extent it can - * not have any other extents and the (single) inline extent must start at the - * file offset 0. Failing to respect these rules will lead to file corruption, - * resulting in EIO errors on read/write operations, hitting BUG_ON's in mm, etc - * - * We can have extents that have been already written to disk or we can have - * dirty ranges still in delalloc, in which case the extent maps and items are - * created only when we run delalloc, and the delalloc ranges might fall outside - * the range we are currently locking in the inode's io tree. So we check the - * inode's i_size because of that (i_size updates are done while holding the - * i_mutex, which we are holding here). - * We also check to see if the inode has a size not greater than "datal" but has - * extents beyond it, due to an fallocate with FALLOC_FL_KEEP_SIZE (and we are - * protected against such concurrent fallocate calls by the i_mutex). - * - * If the file has no extents but a size greater than datal, do not allow the - * copy because we would need turn the inline extent into a non-inline one (even - * with NO_HOLES enabled). If we find our destination inode only has one inline - * extent, just overwrite it with the source inline extent if its size is less - * than the source extent's size, or we could copy the source inline extent's - * data into the destination inode's inline extent if the later is greater then - * the former. - */ -static int clone_copy_inline_extent(struct inode *src, - struct inode *dst, - struct btrfs_trans_handle *trans, - struct btrfs_path *path, - struct btrfs_key *new_key, - const u64 drop_start, - const u64 datal, - const u64 skip, - const u64 size, - char *inline_data) -{ - struct btrfs_root *root = BTRFS_I(dst)->root; - const u64 aligned_end = ALIGN(new_key->offset + datal, - root->sectorsize); - int ret; - struct btrfs_key key; - - if (new_key->offset > 0) - return -EOPNOTSUPP; - - key.objectid = btrfs_ino(dst); - key.type = BTRFS_EXTENT_DATA_KEY; - key.offset = 0; - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret < 0) { - return ret; - } else if (ret > 0) { - if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { - ret = btrfs_next_leaf(root, path); - if (ret < 0) - return ret; - else if (ret > 0) - goto copy_inline_extent; - } - btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); - if (key.objectid == btrfs_ino(dst) && - key.type == BTRFS_EXTENT_DATA_KEY) { - ASSERT(key.offset > 0); - return -EOPNOTSUPP; - } - } else if (i_size_read(dst) <= datal) { - struct btrfs_file_extent_item *ei; - u64 ext_len; - - /* - * If the file size is <= datal, make sure there are no other - * extents following (can happen do to an fallocate call with - * the flag FALLOC_FL_KEEP_SIZE). - */ - ei = btrfs_item_ptr(path->nodes[0], path->slots[0], - struct btrfs_file_extent_item); - /* - * If it's an inline extent, it can not have other extents - * following it. - */ - if (btrfs_file_extent_type(path->nodes[0], ei) == - BTRFS_FILE_EXTENT_INLINE) - goto copy_inline_extent; - - ext_len = btrfs_file_extent_num_bytes(path->nodes[0], ei); - if (ext_len > aligned_end) - return -EOPNOTSUPP; - - ret = btrfs_next_item(root, path); - if (ret < 0) { - return ret; - } else if (ret == 0) { - btrfs_item_key_to_cpu(path->nodes[0], &key, - path->slots[0]); - if (key.objectid == btrfs_ino(dst) && - key.type == BTRFS_EXTENT_DATA_KEY) - return -EOPNOTSUPP; - } - } - -copy_inline_extent: - /* - * We have no extent items, or we have an extent at offset 0 which may - * or may not be inlined. All these cases are dealt the same way. - */ - if (i_size_read(dst) > datal) { - /* If the destination inode has an inline extent... - * This would require copying the data from the source inline - * extent into the beginning of the destination's inline extent. - * But this is really complex, both extents can be compressed - * or just one of them, which would require decompressing and - * re-compressing data. - * So just don't support this case for now (it should be rare, - * we are not really saving space when cloning inline extents). - */ - return -EOPNOTSUPP; - } - - btrfs_release_path(path); - ret = btrfs_drop_extents(trans, root, dst, drop_start, aligned_end, 1); - if (ret) - return ret; - ret = btrfs_insert_empty_item(trans, root, path, new_key, size); - if (ret) - return ret; - - if (skip) { - const u32 start = btrfs_file_extent_calc_inline_size(0); - - memmove(inline_data + start, inline_data + start + skip, datal); - } - - write_extent_buffer(path->nodes[0], inline_data, - btrfs_item_ptr_offset(path->nodes[0], - path->slots[0]), - size); - inode_add_bytes(dst, datal); - - return 0; -} - /** * btrfs_clone() - clone a range from inode file to another * @@ -3713,6 +3572,21 @@ process_slot: } else if (type == BTRFS_FILE_EXTENT_INLINE) { u64 skip = 0; u64 trim = 0; + u64 aligned_end = 0; + + /* + * Don't copy an inline extent into an offset + * greater than zero. Having an inline extent + * at such an offset results in chaos as btrfs + * isn't prepared for such cases. Just skip + * this case for the same reasons as commented + * at btrfs_ioctl_clone(). + */ + if (last_dest_end > 0) { + ret = -EOPNOTSUPP; + btrfs_end_transaction(trans, root); + goto out; + } if (off > key.offset) { skip = off - key.offset; @@ -3730,22 +3604,42 @@ process_slot: size -= skip + trim; datal -= skip + trim; - ret = clone_copy_inline_extent(src, inode, - trans, path, - &new_key, - drop_start, - datal, - skip, size, buf); + aligned_end = ALIGN(new_key.offset + datal, + root->sectorsize); + ret = btrfs_drop_extents(trans, root, inode, + drop_start, + aligned_end, + 1); if (ret) { if (ret != -EOPNOTSUPP) btrfs_abort_transaction(trans, - root, - ret); + root, ret); btrfs_end_transaction(trans, root); goto out; } + + ret = btrfs_insert_empty_item(trans, root, path, + &new_key, size); + if (ret) { + btrfs_abort_transaction(trans, root, + ret); + btrfs_end_transaction(trans, root); + goto out; + } + + if (skip) { + u32 start = + btrfs_file_extent_calc_inline_size(0); + memmove(buf+start, buf+start+skip, + datal); + } + leaf = path->nodes[0]; slot = path->slots[0]; + write_extent_buffer(leaf, buf, + btrfs_item_ptr_offset(leaf, slot), + size); + inode_add_bytes(inode, datal); } /* If we have an implicit hole (NO_HOLES feature). */