blob: 651a26a6c6511700af514f5f05da34677e0b7dbe [file] [log] [blame]
Chris Mason6cbd5572007-06-12 09:07:21 -04001/*
Chris Masond352ac62008-09-29 15:18:18 -04002 * Copyright (C) 2007,2008 Oracle. All rights reserved.
Chris Mason6cbd5572007-06-12 09:07:21 -04003 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
Chris Masona6b6e752007-10-15 16:22:39 -040019#include <linux/sched.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090020#include <linux/slab.h>
Chris Masoneb60cea2007-02-02 09:18:22 -050021#include "ctree.h"
22#include "disk-io.h"
Chris Mason7f5c1512007-03-23 15:56:19 -040023#include "transaction.h"
Chris Mason5f39d392007-10-15 16:14:19 -040024#include "print-tree.h"
Chris Mason925baed2008-06-25 16:01:30 -040025#include "locking.h"
Chris Mason9a8dd152007-02-23 08:38:36 -050026
Chris Masone089f052007-03-16 16:20:31 -040027static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
28 *root, struct btrfs_path *path, int level);
29static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
Chris Masond4dbff92007-04-04 14:08:15 -040030 *root, struct btrfs_key *ins_key,
Chris Masoncc0c5532007-10-25 15:42:57 -040031 struct btrfs_path *path, int data_size, int extend);
Chris Mason5f39d392007-10-15 16:14:19 -040032static int push_node_left(struct btrfs_trans_handle *trans,
33 struct btrfs_root *root, struct extent_buffer *dst,
Chris Mason971a1f62008-04-24 10:54:32 -040034 struct extent_buffer *src, int empty);
Chris Mason5f39d392007-10-15 16:14:19 -040035static int balance_node_right(struct btrfs_trans_handle *trans,
36 struct btrfs_root *root,
37 struct extent_buffer *dst_buf,
38 struct extent_buffer *src_buf);
Jeff Mahoney143bede2012-03-01 14:56:26 +010039static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
Chris Masone089f052007-03-16 16:20:31 -040040 struct btrfs_path *path, int level, int slot);
Chris Masond97e63b2007-02-20 16:40:44 -050041
Chris Mason2c90e5d62007-04-02 10:50:19 -040042struct btrfs_path *btrfs_alloc_path(void)
43{
Chris Masondf24a2b2007-04-04 09:36:31 -040044 struct btrfs_path *path;
Jeff Mahoneye00f7302009-02-12 14:11:25 -050045 path = kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS);
Chris Masondf24a2b2007-04-04 09:36:31 -040046 return path;
Chris Mason2c90e5d62007-04-02 10:50:19 -040047}
48
Chris Masonb4ce94d2009-02-04 09:25:08 -050049/*
50 * set all locked nodes in the path to blocking locks. This should
51 * be done before scheduling
52 */
53noinline void btrfs_set_path_blocking(struct btrfs_path *p)
54{
55 int i;
56 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
Chris Masonbd681512011-07-16 15:23:14 -040057 if (!p->nodes[i] || !p->locks[i])
58 continue;
59 btrfs_set_lock_blocking_rw(p->nodes[i], p->locks[i]);
60 if (p->locks[i] == BTRFS_READ_LOCK)
61 p->locks[i] = BTRFS_READ_LOCK_BLOCKING;
62 else if (p->locks[i] == BTRFS_WRITE_LOCK)
63 p->locks[i] = BTRFS_WRITE_LOCK_BLOCKING;
Chris Masonb4ce94d2009-02-04 09:25:08 -050064 }
65}
66
67/*
68 * reset all the locked nodes in the patch to spinning locks.
Chris Mason4008c042009-02-12 14:09:45 -050069 *
70 * held is used to keep lockdep happy, when lockdep is enabled
71 * we set held to a blocking lock before we go around and
72 * retake all the spinlocks in the path. You can safely use NULL
73 * for held
Chris Masonb4ce94d2009-02-04 09:25:08 -050074 */
Chris Mason4008c042009-02-12 14:09:45 -050075noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
Chris Masonbd681512011-07-16 15:23:14 -040076 struct extent_buffer *held, int held_rw)
Chris Masonb4ce94d2009-02-04 09:25:08 -050077{
78 int i;
Chris Mason4008c042009-02-12 14:09:45 -050079
80#ifdef CONFIG_DEBUG_LOCK_ALLOC
81 /* lockdep really cares that we take all of these spinlocks
82 * in the right order. If any of the locks in the path are not
83 * currently blocking, it is going to complain. So, make really
84 * really sure by forcing the path to blocking before we clear
85 * the path blocking.
86 */
Chris Masonbd681512011-07-16 15:23:14 -040087 if (held) {
88 btrfs_set_lock_blocking_rw(held, held_rw);
89 if (held_rw == BTRFS_WRITE_LOCK)
90 held_rw = BTRFS_WRITE_LOCK_BLOCKING;
91 else if (held_rw == BTRFS_READ_LOCK)
92 held_rw = BTRFS_READ_LOCK_BLOCKING;
93 }
Chris Mason4008c042009-02-12 14:09:45 -050094 btrfs_set_path_blocking(p);
95#endif
96
97 for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) {
Chris Masonbd681512011-07-16 15:23:14 -040098 if (p->nodes[i] && p->locks[i]) {
99 btrfs_clear_lock_blocking_rw(p->nodes[i], p->locks[i]);
100 if (p->locks[i] == BTRFS_WRITE_LOCK_BLOCKING)
101 p->locks[i] = BTRFS_WRITE_LOCK;
102 else if (p->locks[i] == BTRFS_READ_LOCK_BLOCKING)
103 p->locks[i] = BTRFS_READ_LOCK;
104 }
Chris Masonb4ce94d2009-02-04 09:25:08 -0500105 }
Chris Mason4008c042009-02-12 14:09:45 -0500106
107#ifdef CONFIG_DEBUG_LOCK_ALLOC
108 if (held)
Chris Masonbd681512011-07-16 15:23:14 -0400109 btrfs_clear_lock_blocking_rw(held, held_rw);
Chris Mason4008c042009-02-12 14:09:45 -0500110#endif
Chris Masonb4ce94d2009-02-04 09:25:08 -0500111}
112
Chris Masond352ac62008-09-29 15:18:18 -0400113/* this also releases the path */
Chris Mason2c90e5d62007-04-02 10:50:19 -0400114void btrfs_free_path(struct btrfs_path *p)
115{
Jesper Juhlff175d52010-12-25 21:22:30 +0000116 if (!p)
117 return;
David Sterbab3b4aa72011-04-21 01:20:15 +0200118 btrfs_release_path(p);
Chris Mason2c90e5d62007-04-02 10:50:19 -0400119 kmem_cache_free(btrfs_path_cachep, p);
120}
121
Chris Masond352ac62008-09-29 15:18:18 -0400122/*
123 * path release drops references on the extent buffers in the path
124 * and it drops any locks held by this path
125 *
126 * It is safe to call this on paths that no locks or extent buffers held.
127 */
David Sterbab3b4aa72011-04-21 01:20:15 +0200128noinline void btrfs_release_path(struct btrfs_path *p)
Chris Masoneb60cea2007-02-02 09:18:22 -0500129{
130 int i;
Chris Masona2135012008-06-25 16:01:30 -0400131
Chris Mason234b63a2007-03-13 10:46:10 -0400132 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
Chris Mason3f157a22008-06-25 16:01:31 -0400133 p->slots[i] = 0;
Chris Masoneb60cea2007-02-02 09:18:22 -0500134 if (!p->nodes[i])
Chris Mason925baed2008-06-25 16:01:30 -0400135 continue;
136 if (p->locks[i]) {
Chris Masonbd681512011-07-16 15:23:14 -0400137 btrfs_tree_unlock_rw(p->nodes[i], p->locks[i]);
Chris Mason925baed2008-06-25 16:01:30 -0400138 p->locks[i] = 0;
139 }
Chris Mason5f39d392007-10-15 16:14:19 -0400140 free_extent_buffer(p->nodes[i]);
Chris Mason3f157a22008-06-25 16:01:31 -0400141 p->nodes[i] = NULL;
Chris Masoneb60cea2007-02-02 09:18:22 -0500142 }
143}
144
Chris Masond352ac62008-09-29 15:18:18 -0400145/*
146 * safely gets a reference on the root node of a tree. A lock
147 * is not taken, so a concurrent writer may put a different node
148 * at the root of the tree. See btrfs_lock_root_node for the
149 * looping required.
150 *
151 * The extent buffer returned by this has a reference taken, so
152 * it won't disappear. It may stop being the root of the tree
153 * at any time because there are no locks held.
154 */
Chris Mason925baed2008-06-25 16:01:30 -0400155struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
156{
157 struct extent_buffer *eb;
Chris Mason240f62c2011-03-23 14:54:42 -0400158
159 rcu_read_lock();
160 eb = rcu_dereference(root->node);
Chris Mason925baed2008-06-25 16:01:30 -0400161 extent_buffer_get(eb);
Chris Mason240f62c2011-03-23 14:54:42 -0400162 rcu_read_unlock();
Chris Mason925baed2008-06-25 16:01:30 -0400163 return eb;
164}
165
Chris Masond352ac62008-09-29 15:18:18 -0400166/* loop around taking references on and locking the root node of the
167 * tree until you end up with a lock on the root. A locked buffer
168 * is returned, with a reference held.
169 */
Chris Mason925baed2008-06-25 16:01:30 -0400170struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
171{
172 struct extent_buffer *eb;
173
Chris Masond3977122009-01-05 21:25:51 -0500174 while (1) {
Chris Mason925baed2008-06-25 16:01:30 -0400175 eb = btrfs_root_node(root);
176 btrfs_tree_lock(eb);
Chris Mason240f62c2011-03-23 14:54:42 -0400177 if (eb == root->node)
Chris Mason925baed2008-06-25 16:01:30 -0400178 break;
Chris Mason925baed2008-06-25 16:01:30 -0400179 btrfs_tree_unlock(eb);
180 free_extent_buffer(eb);
181 }
182 return eb;
183}
184
Chris Masonbd681512011-07-16 15:23:14 -0400185/* loop around taking references on and locking the root node of the
186 * tree until you end up with a lock on the root. A locked buffer
187 * is returned, with a reference held.
188 */
189struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
190{
191 struct extent_buffer *eb;
192
193 while (1) {
194 eb = btrfs_root_node(root);
195 btrfs_tree_read_lock(eb);
196 if (eb == root->node)
197 break;
198 btrfs_tree_read_unlock(eb);
199 free_extent_buffer(eb);
200 }
201 return eb;
202}
203
Chris Masond352ac62008-09-29 15:18:18 -0400204/* cowonly root (everything not a reference counted cow subvolume), just get
205 * put onto a simple dirty list. transaction.c walks this to make sure they
206 * get properly updated on disk.
207 */
Chris Mason0b86a832008-03-24 15:01:56 -0400208static void add_root_to_dirty_list(struct btrfs_root *root)
209{
210 if (root->track_dirty && list_empty(&root->dirty_list)) {
211 list_add(&root->dirty_list,
212 &root->fs_info->dirty_cowonly_roots);
213 }
214}
215
Chris Masond352ac62008-09-29 15:18:18 -0400216/*
217 * used by snapshot creation to make a copy of a root for a tree with
218 * a given objectid. The buffer with the new root node is returned in
219 * cow_ret, and this func returns zero on success or a negative error code.
220 */
Chris Masonbe20aa92007-12-17 20:14:01 -0500221int btrfs_copy_root(struct btrfs_trans_handle *trans,
222 struct btrfs_root *root,
223 struct extent_buffer *buf,
224 struct extent_buffer **cow_ret, u64 new_root_objectid)
225{
226 struct extent_buffer *cow;
Chris Masonbe20aa92007-12-17 20:14:01 -0500227 int ret = 0;
228 int level;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400229 struct btrfs_disk_key disk_key;
Chris Masonbe20aa92007-12-17 20:14:01 -0500230
231 WARN_ON(root->ref_cows && trans->transid !=
232 root->fs_info->running_transaction->transid);
233 WARN_ON(root->ref_cows && trans->transid != root->last_trans);
234
235 level = btrfs_header_level(buf);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400236 if (level == 0)
237 btrfs_item_key(buf, &disk_key, 0);
238 else
239 btrfs_node_key(buf, &disk_key, 0);
Zheng Yan31840ae2008-09-23 13:14:14 -0400240
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400241 cow = btrfs_alloc_free_block(trans, root, buf->len, 0,
242 new_root_objectid, &disk_key, level,
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200243 buf->start, 0, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400244 if (IS_ERR(cow))
Chris Masonbe20aa92007-12-17 20:14:01 -0500245 return PTR_ERR(cow);
246
247 copy_extent_buffer(cow, buf, 0, 0, cow->len);
248 btrfs_set_header_bytenr(cow, cow->start);
249 btrfs_set_header_generation(cow, trans->transid);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400250 btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
251 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
252 BTRFS_HEADER_FLAG_RELOC);
253 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
254 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
255 else
256 btrfs_set_header_owner(cow, new_root_objectid);
Chris Masonbe20aa92007-12-17 20:14:01 -0500257
Yan Zheng2b820322008-11-17 21:11:30 -0500258 write_extent_buffer(cow, root->fs_info->fsid,
259 (unsigned long)btrfs_header_fsid(cow),
260 BTRFS_FSID_SIZE);
261
Chris Masonbe20aa92007-12-17 20:14:01 -0500262 WARN_ON(btrfs_header_generation(buf) > trans->transid);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400263 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200264 ret = btrfs_inc_ref(trans, root, cow, 1, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400265 else
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200266 ret = btrfs_inc_ref(trans, root, cow, 0, 1);
Chris Mason4aec2b52007-12-18 16:25:45 -0500267
Chris Masonbe20aa92007-12-17 20:14:01 -0500268 if (ret)
269 return ret;
270
271 btrfs_mark_buffer_dirty(cow);
272 *cow_ret = cow;
273 return 0;
274}
275
Chris Masond352ac62008-09-29 15:18:18 -0400276/*
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400277 * check if the tree block can be shared by multiple trees
278 */
279int btrfs_block_can_be_shared(struct btrfs_root *root,
280 struct extent_buffer *buf)
281{
282 /*
283 * Tree blocks not in refernece counted trees and tree roots
284 * are never shared. If a block was allocated after the last
285 * snapshot and the block was not allocated by tree relocation,
286 * we know the block is not shared.
287 */
288 if (root->ref_cows &&
289 buf != root->node && buf != root->commit_root &&
290 (btrfs_header_generation(buf) <=
291 btrfs_root_last_snapshot(&root->root_item) ||
292 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
293 return 1;
294#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
295 if (root->ref_cows &&
296 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
297 return 1;
298#endif
299 return 0;
300}
301
302static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
303 struct btrfs_root *root,
304 struct extent_buffer *buf,
Yan, Zhengf0486c62010-05-16 10:46:25 -0400305 struct extent_buffer *cow,
306 int *last_ref)
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400307{
308 u64 refs;
309 u64 owner;
310 u64 flags;
311 u64 new_flags = 0;
312 int ret;
313
314 /*
315 * Backrefs update rules:
316 *
317 * Always use full backrefs for extent pointers in tree block
318 * allocated by tree relocation.
319 *
320 * If a shared tree block is no longer referenced by its owner
321 * tree (btrfs_header_owner(buf) == root->root_key.objectid),
322 * use full backrefs for extent pointers in tree block.
323 *
324 * If a tree block is been relocating
325 * (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID),
326 * use full backrefs for extent pointers in tree block.
327 * The reason for this is some operations (such as drop tree)
328 * are only allowed for blocks use full backrefs.
329 */
330
331 if (btrfs_block_can_be_shared(root, buf)) {
332 ret = btrfs_lookup_extent_info(trans, root, buf->start,
333 buf->len, &refs, &flags);
Mark Fashehbe1a5562011-08-08 13:20:18 -0700334 if (ret)
335 return ret;
Mark Fashehe5df9572011-08-29 14:17:04 -0700336 if (refs == 0) {
337 ret = -EROFS;
338 btrfs_std_error(root->fs_info, ret);
339 return ret;
340 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400341 } else {
342 refs = 1;
343 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
344 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
345 flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
346 else
347 flags = 0;
348 }
349
350 owner = btrfs_header_owner(buf);
351 BUG_ON(owner == BTRFS_TREE_RELOC_OBJECTID &&
352 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
353
354 if (refs > 1) {
355 if ((owner == root->root_key.objectid ||
356 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
357 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200358 ret = btrfs_inc_ref(trans, root, buf, 1, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400359 BUG_ON(ret);
360
361 if (root->root_key.objectid ==
362 BTRFS_TREE_RELOC_OBJECTID) {
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200363 ret = btrfs_dec_ref(trans, root, buf, 0, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400364 BUG_ON(ret);
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200365 ret = btrfs_inc_ref(trans, root, cow, 1, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400366 BUG_ON(ret);
367 }
368 new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
369 } else {
370
371 if (root->root_key.objectid ==
372 BTRFS_TREE_RELOC_OBJECTID)
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200373 ret = btrfs_inc_ref(trans, root, cow, 1, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400374 else
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200375 ret = btrfs_inc_ref(trans, root, cow, 0, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400376 BUG_ON(ret);
377 }
378 if (new_flags != 0) {
379 ret = btrfs_set_disk_extent_flags(trans, root,
380 buf->start,
381 buf->len,
382 new_flags, 0);
Mark Fashehbe1a5562011-08-08 13:20:18 -0700383 if (ret)
384 return ret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400385 }
386 } else {
387 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
388 if (root->root_key.objectid ==
389 BTRFS_TREE_RELOC_OBJECTID)
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200390 ret = btrfs_inc_ref(trans, root, cow, 1, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400391 else
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200392 ret = btrfs_inc_ref(trans, root, cow, 0, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400393 BUG_ON(ret);
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200394 ret = btrfs_dec_ref(trans, root, buf, 1, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400395 BUG_ON(ret);
396 }
397 clean_tree_block(trans, root, buf);
Yan, Zhengf0486c62010-05-16 10:46:25 -0400398 *last_ref = 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400399 }
400 return 0;
401}
402
403/*
Chris Masond3977122009-01-05 21:25:51 -0500404 * does the dirty work in cow of a single block. The parent block (if
405 * supplied) is updated to point to the new cow copy. The new buffer is marked
406 * dirty and returned locked. If you modify the block it needs to be marked
407 * dirty again.
Chris Masond352ac62008-09-29 15:18:18 -0400408 *
409 * search_start -- an allocation hint for the new block
410 *
Chris Masond3977122009-01-05 21:25:51 -0500411 * empty_size -- a hint that you plan on doing more cow. This is the size in
412 * bytes the allocator should try to find free next to the block it returns.
413 * This is just a hint and may be ignored by the allocator.
Chris Masond352ac62008-09-29 15:18:18 -0400414 */
Chris Masond3977122009-01-05 21:25:51 -0500415static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
Chris Mason5f39d392007-10-15 16:14:19 -0400416 struct btrfs_root *root,
417 struct extent_buffer *buf,
418 struct extent_buffer *parent, int parent_slot,
419 struct extent_buffer **cow_ret,
Chris Mason9fa8cfe2009-03-13 10:24:59 -0400420 u64 search_start, u64 empty_size)
Chris Mason6702ed42007-08-07 16:15:09 -0400421{
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400422 struct btrfs_disk_key disk_key;
Chris Mason5f39d392007-10-15 16:14:19 -0400423 struct extent_buffer *cow;
Mark Fashehbe1a5562011-08-08 13:20:18 -0700424 int level, ret;
Yan, Zhengf0486c62010-05-16 10:46:25 -0400425 int last_ref = 0;
Chris Mason925baed2008-06-25 16:01:30 -0400426 int unlock_orig = 0;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400427 u64 parent_start;
Chris Mason6702ed42007-08-07 16:15:09 -0400428
Chris Mason925baed2008-06-25 16:01:30 -0400429 if (*cow_ret == buf)
430 unlock_orig = 1;
431
Chris Masonb9447ef82009-03-09 11:45:38 -0400432 btrfs_assert_tree_locked(buf);
Chris Mason925baed2008-06-25 16:01:30 -0400433
Chris Mason7bb86312007-12-11 09:25:06 -0500434 WARN_ON(root->ref_cows && trans->transid !=
435 root->fs_info->running_transaction->transid);
Chris Mason6702ed42007-08-07 16:15:09 -0400436 WARN_ON(root->ref_cows && trans->transid != root->last_trans);
Chris Mason5f39d392007-10-15 16:14:19 -0400437
Chris Mason7bb86312007-12-11 09:25:06 -0500438 level = btrfs_header_level(buf);
Zheng Yan31840ae2008-09-23 13:14:14 -0400439
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400440 if (level == 0)
441 btrfs_item_key(buf, &disk_key, 0);
442 else
443 btrfs_node_key(buf, &disk_key, 0);
444
445 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
446 if (parent)
447 parent_start = parent->start;
448 else
449 parent_start = 0;
450 } else
451 parent_start = 0;
452
453 cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start,
454 root->root_key.objectid, &disk_key,
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200455 level, search_start, empty_size, 1);
Chris Mason6702ed42007-08-07 16:15:09 -0400456 if (IS_ERR(cow))
457 return PTR_ERR(cow);
458
Chris Masonb4ce94d2009-02-04 09:25:08 -0500459 /* cow is set to blocking by btrfs_init_new_buffer */
460
Chris Mason5f39d392007-10-15 16:14:19 -0400461 copy_extent_buffer(cow, buf, 0, 0, cow->len);
Chris Masondb945352007-10-15 16:15:53 -0400462 btrfs_set_header_bytenr(cow, cow->start);
Chris Mason5f39d392007-10-15 16:14:19 -0400463 btrfs_set_header_generation(cow, trans->transid);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400464 btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
465 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
466 BTRFS_HEADER_FLAG_RELOC);
467 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
468 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
469 else
470 btrfs_set_header_owner(cow, root->root_key.objectid);
Chris Mason6702ed42007-08-07 16:15:09 -0400471
Yan Zheng2b820322008-11-17 21:11:30 -0500472 write_extent_buffer(cow, root->fs_info->fsid,
473 (unsigned long)btrfs_header_fsid(cow),
474 BTRFS_FSID_SIZE);
475
Mark Fashehbe1a5562011-08-08 13:20:18 -0700476 ret = update_ref_for_cow(trans, root, buf, cow, &last_ref);
Mark Fashehb68dc2a2011-08-29 14:30:39 -0700477 if (ret) {
478 btrfs_std_error(root->fs_info, ret);
479 return ret;
480 }
Zheng Yan1a40e232008-09-26 10:09:34 -0400481
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400482 if (root->ref_cows)
483 btrfs_reloc_cow_block(trans, root, buf, cow);
484
Chris Mason6702ed42007-08-07 16:15:09 -0400485 if (buf == root->node) {
Chris Mason925baed2008-06-25 16:01:30 -0400486 WARN_ON(parent && parent != buf);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400487 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
488 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
489 parent_start = buf->start;
490 else
491 parent_start = 0;
Chris Mason925baed2008-06-25 16:01:30 -0400492
Chris Mason5f39d392007-10-15 16:14:19 -0400493 extent_buffer_get(cow);
Chris Mason240f62c2011-03-23 14:54:42 -0400494 rcu_assign_pointer(root->node, cow);
Chris Mason925baed2008-06-25 16:01:30 -0400495
Yan, Zhengf0486c62010-05-16 10:46:25 -0400496 btrfs_free_tree_block(trans, root, buf, parent_start,
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200497 last_ref, 1);
Chris Mason5f39d392007-10-15 16:14:19 -0400498 free_extent_buffer(buf);
Chris Mason0b86a832008-03-24 15:01:56 -0400499 add_root_to_dirty_list(root);
Chris Mason6702ed42007-08-07 16:15:09 -0400500 } else {
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400501 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
502 parent_start = parent->start;
503 else
504 parent_start = 0;
505
506 WARN_ON(trans->transid != btrfs_header_generation(parent));
Chris Mason5f39d392007-10-15 16:14:19 -0400507 btrfs_set_node_blockptr(parent, parent_slot,
Chris Masondb945352007-10-15 16:15:53 -0400508 cow->start);
Chris Mason74493f72007-12-11 09:25:06 -0500509 btrfs_set_node_ptr_generation(parent, parent_slot,
510 trans->transid);
Chris Mason6702ed42007-08-07 16:15:09 -0400511 btrfs_mark_buffer_dirty(parent);
Yan, Zhengf0486c62010-05-16 10:46:25 -0400512 btrfs_free_tree_block(trans, root, buf, parent_start,
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200513 last_ref, 1);
Chris Mason6702ed42007-08-07 16:15:09 -0400514 }
Chris Mason925baed2008-06-25 16:01:30 -0400515 if (unlock_orig)
516 btrfs_tree_unlock(buf);
Chris Mason5f39d392007-10-15 16:14:19 -0400517 free_extent_buffer(buf);
Chris Mason6702ed42007-08-07 16:15:09 -0400518 btrfs_mark_buffer_dirty(cow);
519 *cow_ret = cow;
520 return 0;
521}
522
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400523static inline int should_cow_block(struct btrfs_trans_handle *trans,
524 struct btrfs_root *root,
525 struct extent_buffer *buf)
526{
Liu Bof1ebcc72011-11-14 20:48:06 -0500527 /* ensure we can see the force_cow */
528 smp_rmb();
529
530 /*
531 * We do not need to cow a block if
532 * 1) this block is not created or changed in this transaction;
533 * 2) this block does not belong to TREE_RELOC tree;
534 * 3) the root is not forced COW.
535 *
536 * What is forced COW:
537 * when we create snapshot during commiting the transaction,
538 * after we've finished coping src root, we must COW the shared
539 * block to ensure the metadata consistency.
540 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400541 if (btrfs_header_generation(buf) == trans->transid &&
542 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&
543 !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
Liu Bof1ebcc72011-11-14 20:48:06 -0500544 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) &&
545 !root->force_cow)
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400546 return 0;
547 return 1;
548}
549
Chris Masond352ac62008-09-29 15:18:18 -0400550/*
551 * cows a single block, see __btrfs_cow_block for the real work.
552 * This version of it has extra checks so that a block isn't cow'd more than
553 * once per transaction, as long as it hasn't been written yet
554 */
Chris Masond3977122009-01-05 21:25:51 -0500555noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
Chris Mason5f39d392007-10-15 16:14:19 -0400556 struct btrfs_root *root, struct extent_buffer *buf,
557 struct extent_buffer *parent, int parent_slot,
Chris Mason9fa8cfe2009-03-13 10:24:59 -0400558 struct extent_buffer **cow_ret)
Chris Mason02217ed2007-03-02 16:08:05 -0500559{
Chris Mason6702ed42007-08-07 16:15:09 -0400560 u64 search_start;
Chris Masonf510cfe2007-10-15 16:14:48 -0400561 int ret;
Chris Masondc17ff82008-01-08 15:46:30 -0500562
Chris Masonccd467d2007-06-28 15:57:36 -0400563 if (trans->transaction != root->fs_info->running_transaction) {
Chris Masond3977122009-01-05 21:25:51 -0500564 printk(KERN_CRIT "trans %llu running %llu\n",
565 (unsigned long long)trans->transid,
566 (unsigned long long)
Chris Masonccd467d2007-06-28 15:57:36 -0400567 root->fs_info->running_transaction->transid);
568 WARN_ON(1);
569 }
570 if (trans->transid != root->fs_info->generation) {
Chris Masond3977122009-01-05 21:25:51 -0500571 printk(KERN_CRIT "trans %llu running %llu\n",
572 (unsigned long long)trans->transid,
573 (unsigned long long)root->fs_info->generation);
Chris Masonccd467d2007-06-28 15:57:36 -0400574 WARN_ON(1);
575 }
Chris Masondc17ff82008-01-08 15:46:30 -0500576
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400577 if (!should_cow_block(trans, root, buf)) {
Chris Mason02217ed2007-03-02 16:08:05 -0500578 *cow_ret = buf;
579 return 0;
580 }
Chris Masonc4876852009-02-04 09:24:25 -0500581
Chris Mason0b86a832008-03-24 15:01:56 -0400582 search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1);
Chris Masonb4ce94d2009-02-04 09:25:08 -0500583
584 if (parent)
585 btrfs_set_lock_blocking(parent);
586 btrfs_set_lock_blocking(buf);
587
Chris Masonf510cfe2007-10-15 16:14:48 -0400588 ret = __btrfs_cow_block(trans, root, buf, parent,
Chris Mason9fa8cfe2009-03-13 10:24:59 -0400589 parent_slot, cow_ret, search_start, 0);
liubo1abe9b82011-03-24 11:18:59 +0000590
591 trace_btrfs_cow_block(root, buf, *cow_ret);
592
Chris Masonf510cfe2007-10-15 16:14:48 -0400593 return ret;
Chris Mason6702ed42007-08-07 16:15:09 -0400594}
595
Chris Masond352ac62008-09-29 15:18:18 -0400596/*
597 * helper function for defrag to decide if two blocks pointed to by a
598 * node are actually close by
599 */
Chris Mason6b800532007-10-15 16:17:34 -0400600static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
Chris Mason6702ed42007-08-07 16:15:09 -0400601{
Chris Mason6b800532007-10-15 16:17:34 -0400602 if (blocknr < other && other - (blocknr + blocksize) < 32768)
Chris Mason6702ed42007-08-07 16:15:09 -0400603 return 1;
Chris Mason6b800532007-10-15 16:17:34 -0400604 if (blocknr > other && blocknr - (other + blocksize) < 32768)
Chris Mason6702ed42007-08-07 16:15:09 -0400605 return 1;
Chris Mason02217ed2007-03-02 16:08:05 -0500606 return 0;
607}
608
Chris Mason081e9572007-11-06 10:26:24 -0500609/*
610 * compare two keys in a memcmp fashion
611 */
612static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
613{
614 struct btrfs_key k1;
615
616 btrfs_disk_key_to_cpu(&k1, disk);
617
Diego Calleja20736ab2009-07-24 11:06:52 -0400618 return btrfs_comp_cpu_keys(&k1, k2);
Chris Mason081e9572007-11-06 10:26:24 -0500619}
620
Josef Bacikf3465ca2008-11-12 14:19:50 -0500621/*
622 * same as comp_keys only with two btrfs_key's
623 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400624int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2)
Josef Bacikf3465ca2008-11-12 14:19:50 -0500625{
626 if (k1->objectid > k2->objectid)
627 return 1;
628 if (k1->objectid < k2->objectid)
629 return -1;
630 if (k1->type > k2->type)
631 return 1;
632 if (k1->type < k2->type)
633 return -1;
634 if (k1->offset > k2->offset)
635 return 1;
636 if (k1->offset < k2->offset)
637 return -1;
638 return 0;
639}
Chris Mason081e9572007-11-06 10:26:24 -0500640
Chris Masond352ac62008-09-29 15:18:18 -0400641/*
642 * this is used by the defrag code to go through all the
643 * leaves pointed to by a node and reallocate them so that
644 * disk order is close to key order
645 */
Chris Mason6702ed42007-08-07 16:15:09 -0400646int btrfs_realloc_node(struct btrfs_trans_handle *trans,
Chris Mason5f39d392007-10-15 16:14:19 -0400647 struct btrfs_root *root, struct extent_buffer *parent,
Chris Masona6b6e752007-10-15 16:22:39 -0400648 int start_slot, int cache_only, u64 *last_ret,
649 struct btrfs_key *progress)
Chris Mason6702ed42007-08-07 16:15:09 -0400650{
Chris Mason6b800532007-10-15 16:17:34 -0400651 struct extent_buffer *cur;
Chris Mason6702ed42007-08-07 16:15:09 -0400652 u64 blocknr;
Chris Masonca7a79a2008-05-12 12:59:19 -0400653 u64 gen;
Chris Masone9d0b132007-08-10 14:06:19 -0400654 u64 search_start = *last_ret;
655 u64 last_block = 0;
Chris Mason6702ed42007-08-07 16:15:09 -0400656 u64 other;
657 u32 parent_nritems;
Chris Mason6702ed42007-08-07 16:15:09 -0400658 int end_slot;
659 int i;
660 int err = 0;
Chris Masonf2183bd2007-08-10 14:42:37 -0400661 int parent_level;
Chris Mason6b800532007-10-15 16:17:34 -0400662 int uptodate;
663 u32 blocksize;
Chris Mason081e9572007-11-06 10:26:24 -0500664 int progress_passed = 0;
665 struct btrfs_disk_key disk_key;
Chris Mason6702ed42007-08-07 16:15:09 -0400666
Chris Mason5708b952007-10-25 15:43:18 -0400667 parent_level = btrfs_header_level(parent);
668 if (cache_only && parent_level != 1)
669 return 0;
670
Chris Masond3977122009-01-05 21:25:51 -0500671 if (trans->transaction != root->fs_info->running_transaction)
Chris Mason6702ed42007-08-07 16:15:09 -0400672 WARN_ON(1);
Chris Masond3977122009-01-05 21:25:51 -0500673 if (trans->transid != root->fs_info->generation)
Chris Mason6702ed42007-08-07 16:15:09 -0400674 WARN_ON(1);
Chris Mason86479a02007-09-10 19:58:16 -0400675
Chris Mason6b800532007-10-15 16:17:34 -0400676 parent_nritems = btrfs_header_nritems(parent);
Chris Mason6b800532007-10-15 16:17:34 -0400677 blocksize = btrfs_level_size(root, parent_level - 1);
Chris Mason6702ed42007-08-07 16:15:09 -0400678 end_slot = parent_nritems;
679
680 if (parent_nritems == 1)
681 return 0;
682
Chris Masonb4ce94d2009-02-04 09:25:08 -0500683 btrfs_set_lock_blocking(parent);
684
Chris Mason6702ed42007-08-07 16:15:09 -0400685 for (i = start_slot; i < end_slot; i++) {
686 int close = 1;
Chris Masona6b6e752007-10-15 16:22:39 -0400687
Chris Mason081e9572007-11-06 10:26:24 -0500688 btrfs_node_key(parent, &disk_key, i);
689 if (!progress_passed && comp_keys(&disk_key, progress) < 0)
690 continue;
691
692 progress_passed = 1;
Chris Mason6b800532007-10-15 16:17:34 -0400693 blocknr = btrfs_node_blockptr(parent, i);
Chris Masonca7a79a2008-05-12 12:59:19 -0400694 gen = btrfs_node_ptr_generation(parent, i);
Chris Masone9d0b132007-08-10 14:06:19 -0400695 if (last_block == 0)
696 last_block = blocknr;
Chris Mason5708b952007-10-25 15:43:18 -0400697
Chris Mason6702ed42007-08-07 16:15:09 -0400698 if (i > 0) {
Chris Mason6b800532007-10-15 16:17:34 -0400699 other = btrfs_node_blockptr(parent, i - 1);
700 close = close_blocks(blocknr, other, blocksize);
Chris Mason6702ed42007-08-07 16:15:09 -0400701 }
Chris Mason0ef3e662008-05-24 14:04:53 -0400702 if (!close && i < end_slot - 2) {
Chris Mason6b800532007-10-15 16:17:34 -0400703 other = btrfs_node_blockptr(parent, i + 1);
704 close = close_blocks(blocknr, other, blocksize);
Chris Mason6702ed42007-08-07 16:15:09 -0400705 }
Chris Masone9d0b132007-08-10 14:06:19 -0400706 if (close) {
707 last_block = blocknr;
Chris Mason6702ed42007-08-07 16:15:09 -0400708 continue;
Chris Masone9d0b132007-08-10 14:06:19 -0400709 }
Chris Mason6702ed42007-08-07 16:15:09 -0400710
Chris Mason6b800532007-10-15 16:17:34 -0400711 cur = btrfs_find_tree_block(root, blocknr, blocksize);
712 if (cur)
Chris Mason1259ab72008-05-12 13:39:03 -0400713 uptodate = btrfs_buffer_uptodate(cur, gen);
Chris Mason6b800532007-10-15 16:17:34 -0400714 else
715 uptodate = 0;
Chris Mason5708b952007-10-25 15:43:18 -0400716 if (!cur || !uptodate) {
Chris Mason6702ed42007-08-07 16:15:09 -0400717 if (cache_only) {
Chris Mason6b800532007-10-15 16:17:34 -0400718 free_extent_buffer(cur);
Chris Mason6702ed42007-08-07 16:15:09 -0400719 continue;
720 }
Chris Mason6b800532007-10-15 16:17:34 -0400721 if (!cur) {
722 cur = read_tree_block(root, blocknr,
Chris Masonca7a79a2008-05-12 12:59:19 -0400723 blocksize, gen);
Tsutomu Itoh97d9a8a2011-03-24 06:33:21 +0000724 if (!cur)
725 return -EIO;
Chris Mason6b800532007-10-15 16:17:34 -0400726 } else if (!uptodate) {
Chris Masonca7a79a2008-05-12 12:59:19 -0400727 btrfs_read_buffer(cur, gen);
Chris Masonf2183bd2007-08-10 14:42:37 -0400728 }
Chris Mason6702ed42007-08-07 16:15:09 -0400729 }
Chris Masone9d0b132007-08-10 14:06:19 -0400730 if (search_start == 0)
Chris Mason6b800532007-10-15 16:17:34 -0400731 search_start = last_block;
Chris Masone9d0b132007-08-10 14:06:19 -0400732
Chris Masone7a84562008-06-25 16:01:31 -0400733 btrfs_tree_lock(cur);
Chris Masonb4ce94d2009-02-04 09:25:08 -0500734 btrfs_set_lock_blocking(cur);
Chris Mason6b800532007-10-15 16:17:34 -0400735 err = __btrfs_cow_block(trans, root, cur, parent, i,
Chris Masone7a84562008-06-25 16:01:31 -0400736 &cur, search_start,
Chris Mason6b800532007-10-15 16:17:34 -0400737 min(16 * blocksize,
Chris Mason9fa8cfe2009-03-13 10:24:59 -0400738 (end_slot - i) * blocksize));
Yan252c38f2007-08-29 09:11:44 -0400739 if (err) {
Chris Masone7a84562008-06-25 16:01:31 -0400740 btrfs_tree_unlock(cur);
Chris Mason6b800532007-10-15 16:17:34 -0400741 free_extent_buffer(cur);
Chris Mason6702ed42007-08-07 16:15:09 -0400742 break;
Yan252c38f2007-08-29 09:11:44 -0400743 }
Chris Masone7a84562008-06-25 16:01:31 -0400744 search_start = cur->start;
745 last_block = cur->start;
Chris Masonf2183bd2007-08-10 14:42:37 -0400746 *last_ret = search_start;
Chris Masone7a84562008-06-25 16:01:31 -0400747 btrfs_tree_unlock(cur);
748 free_extent_buffer(cur);
Chris Mason6702ed42007-08-07 16:15:09 -0400749 }
750 return err;
751}
752
Chris Mason74123bd2007-02-02 11:05:29 -0500753/*
754 * The leaf data grows from end-to-front in the node.
755 * this returns the address of the start of the last item,
756 * which is the stop of the leaf data stack
757 */
Chris Mason123abc82007-03-14 14:14:43 -0400758static inline unsigned int leaf_data_end(struct btrfs_root *root,
Chris Mason5f39d392007-10-15 16:14:19 -0400759 struct extent_buffer *leaf)
Chris Masonbe0e5c02007-01-26 15:51:26 -0500760{
Chris Mason5f39d392007-10-15 16:14:19 -0400761 u32 nr = btrfs_header_nritems(leaf);
Chris Masonbe0e5c02007-01-26 15:51:26 -0500762 if (nr == 0)
Chris Mason123abc82007-03-14 14:14:43 -0400763 return BTRFS_LEAF_DATA_SIZE(root);
Chris Mason5f39d392007-10-15 16:14:19 -0400764 return btrfs_item_offset_nr(leaf, nr - 1);
Chris Masonbe0e5c02007-01-26 15:51:26 -0500765}
766
Chris Masonaa5d6be2007-02-28 16:35:06 -0500767
Chris Mason74123bd2007-02-02 11:05:29 -0500768/*
Chris Mason5f39d392007-10-15 16:14:19 -0400769 * search for key in the extent_buffer. The items start at offset p,
770 * and they are item_size apart. There are 'max' items in p.
771 *
Chris Mason74123bd2007-02-02 11:05:29 -0500772 * the slot in the array is returned via slot, and it points to
773 * the place where you would insert key if it is not found in
774 * the array.
775 *
776 * slot may point to max if the key is bigger than all of the keys
777 */
Chris Masone02119d2008-09-05 16:13:11 -0400778static noinline int generic_bin_search(struct extent_buffer *eb,
779 unsigned long p,
780 int item_size, struct btrfs_key *key,
781 int max, int *slot)
Chris Masonbe0e5c02007-01-26 15:51:26 -0500782{
783 int low = 0;
784 int high = max;
785 int mid;
786 int ret;
Chris Mason479965d2007-10-15 16:14:27 -0400787 struct btrfs_disk_key *tmp = NULL;
Chris Mason5f39d392007-10-15 16:14:19 -0400788 struct btrfs_disk_key unaligned;
789 unsigned long offset;
Chris Mason5f39d392007-10-15 16:14:19 -0400790 char *kaddr = NULL;
791 unsigned long map_start = 0;
792 unsigned long map_len = 0;
Chris Mason479965d2007-10-15 16:14:27 -0400793 int err;
Chris Masonbe0e5c02007-01-26 15:51:26 -0500794
Chris Masond3977122009-01-05 21:25:51 -0500795 while (low < high) {
Chris Masonbe0e5c02007-01-26 15:51:26 -0500796 mid = (low + high) / 2;
Chris Mason5f39d392007-10-15 16:14:19 -0400797 offset = p + mid * item_size;
798
Chris Masona6591712011-07-19 12:04:14 -0400799 if (!kaddr || offset < map_start ||
Chris Mason5f39d392007-10-15 16:14:19 -0400800 (offset + sizeof(struct btrfs_disk_key)) >
801 map_start + map_len) {
Chris Mason934d3752008-12-08 16:43:10 -0500802
803 err = map_private_extent_buffer(eb, offset,
Chris Mason479965d2007-10-15 16:14:27 -0400804 sizeof(struct btrfs_disk_key),
Chris Masona6591712011-07-19 12:04:14 -0400805 &kaddr, &map_start, &map_len);
Chris Mason5f39d392007-10-15 16:14:19 -0400806
Chris Mason479965d2007-10-15 16:14:27 -0400807 if (!err) {
808 tmp = (struct btrfs_disk_key *)(kaddr + offset -
809 map_start);
810 } else {
811 read_extent_buffer(eb, &unaligned,
812 offset, sizeof(unaligned));
813 tmp = &unaligned;
814 }
815
Chris Mason5f39d392007-10-15 16:14:19 -0400816 } else {
817 tmp = (struct btrfs_disk_key *)(kaddr + offset -
818 map_start);
819 }
Chris Masonbe0e5c02007-01-26 15:51:26 -0500820 ret = comp_keys(tmp, key);
821
822 if (ret < 0)
823 low = mid + 1;
824 else if (ret > 0)
825 high = mid;
826 else {
827 *slot = mid;
828 return 0;
829 }
830 }
831 *slot = low;
832 return 1;
833}
834
Chris Mason97571fd2007-02-24 13:39:08 -0500835/*
836 * simple bin_search frontend that does the right thing for
837 * leaves vs nodes
838 */
Chris Mason5f39d392007-10-15 16:14:19 -0400839static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
840 int level, int *slot)
Chris Masonbe0e5c02007-01-26 15:51:26 -0500841{
Chris Mason5f39d392007-10-15 16:14:19 -0400842 if (level == 0) {
843 return generic_bin_search(eb,
844 offsetof(struct btrfs_leaf, items),
Chris Mason0783fcf2007-03-12 20:12:07 -0400845 sizeof(struct btrfs_item),
Chris Mason5f39d392007-10-15 16:14:19 -0400846 key, btrfs_header_nritems(eb),
Chris Mason7518a232007-03-12 12:01:18 -0400847 slot);
Chris Masonbe0e5c02007-01-26 15:51:26 -0500848 } else {
Chris Mason5f39d392007-10-15 16:14:19 -0400849 return generic_bin_search(eb,
850 offsetof(struct btrfs_node, ptrs),
Chris Mason123abc82007-03-14 14:14:43 -0400851 sizeof(struct btrfs_key_ptr),
Chris Mason5f39d392007-10-15 16:14:19 -0400852 key, btrfs_header_nritems(eb),
Chris Mason7518a232007-03-12 12:01:18 -0400853 slot);
Chris Masonbe0e5c02007-01-26 15:51:26 -0500854 }
855 return -1;
856}
857
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400858int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
859 int level, int *slot)
860{
861 return bin_search(eb, key, level, slot);
862}
863
Yan, Zhengf0486c62010-05-16 10:46:25 -0400864static void root_add_used(struct btrfs_root *root, u32 size)
865{
866 spin_lock(&root->accounting_lock);
867 btrfs_set_root_used(&root->root_item,
868 btrfs_root_used(&root->root_item) + size);
869 spin_unlock(&root->accounting_lock);
870}
871
872static void root_sub_used(struct btrfs_root *root, u32 size)
873{
874 spin_lock(&root->accounting_lock);
875 btrfs_set_root_used(&root->root_item,
876 btrfs_root_used(&root->root_item) - size);
877 spin_unlock(&root->accounting_lock);
878}
879
Chris Masond352ac62008-09-29 15:18:18 -0400880/* given a node and slot number, this reads the blocks it points to. The
881 * extent buffer is returned with a reference taken (but unlocked).
882 * NULL is returned on error.
883 */
Chris Masone02119d2008-09-05 16:13:11 -0400884static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root,
Chris Mason5f39d392007-10-15 16:14:19 -0400885 struct extent_buffer *parent, int slot)
Chris Masonbb803952007-03-01 12:04:21 -0500886{
Chris Masonca7a79a2008-05-12 12:59:19 -0400887 int level = btrfs_header_level(parent);
Chris Masonbb803952007-03-01 12:04:21 -0500888 if (slot < 0)
889 return NULL;
Chris Mason5f39d392007-10-15 16:14:19 -0400890 if (slot >= btrfs_header_nritems(parent))
Chris Masonbb803952007-03-01 12:04:21 -0500891 return NULL;
Chris Masonca7a79a2008-05-12 12:59:19 -0400892
893 BUG_ON(level == 0);
894
Chris Masondb945352007-10-15 16:15:53 -0400895 return read_tree_block(root, btrfs_node_blockptr(parent, slot),
Chris Masonca7a79a2008-05-12 12:59:19 -0400896 btrfs_level_size(root, level - 1),
897 btrfs_node_ptr_generation(parent, slot));
Chris Masonbb803952007-03-01 12:04:21 -0500898}
899
Chris Masond352ac62008-09-29 15:18:18 -0400900/*
901 * node level balancing, used to make sure nodes are in proper order for
902 * item deletion. We balance from the top down, so we have to make sure
903 * that a deletion won't leave an node completely empty later on.
904 */
Chris Masone02119d2008-09-05 16:13:11 -0400905static noinline int balance_level(struct btrfs_trans_handle *trans,
Chris Mason98ed5172008-01-03 10:01:48 -0500906 struct btrfs_root *root,
907 struct btrfs_path *path, int level)
Chris Masonbb803952007-03-01 12:04:21 -0500908{
Chris Mason5f39d392007-10-15 16:14:19 -0400909 struct extent_buffer *right = NULL;
910 struct extent_buffer *mid;
911 struct extent_buffer *left = NULL;
912 struct extent_buffer *parent = NULL;
Chris Masonbb803952007-03-01 12:04:21 -0500913 int ret = 0;
914 int wret;
915 int pslot;
Chris Masonbb803952007-03-01 12:04:21 -0500916 int orig_slot = path->slots[level];
Chris Mason79f95c82007-03-01 15:16:26 -0500917 u64 orig_ptr;
Chris Masonbb803952007-03-01 12:04:21 -0500918
919 if (level == 0)
920 return 0;
921
Chris Mason5f39d392007-10-15 16:14:19 -0400922 mid = path->nodes[level];
Chris Masonb4ce94d2009-02-04 09:25:08 -0500923
Chris Masonbd681512011-07-16 15:23:14 -0400924 WARN_ON(path->locks[level] != BTRFS_WRITE_LOCK &&
925 path->locks[level] != BTRFS_WRITE_LOCK_BLOCKING);
Chris Mason7bb86312007-12-11 09:25:06 -0500926 WARN_ON(btrfs_header_generation(mid) != trans->transid);
927
Chris Mason1d4f8a02007-03-13 09:28:32 -0400928 orig_ptr = btrfs_node_blockptr(mid, orig_slot);
Chris Mason79f95c82007-03-01 15:16:26 -0500929
Li Zefana05a9bb2011-09-06 16:55:34 +0800930 if (level < BTRFS_MAX_LEVEL - 1) {
Chris Mason5f39d392007-10-15 16:14:19 -0400931 parent = path->nodes[level + 1];
Li Zefana05a9bb2011-09-06 16:55:34 +0800932 pslot = path->slots[level + 1];
933 }
Chris Masonbb803952007-03-01 12:04:21 -0500934
Chris Mason40689472007-03-17 14:29:23 -0400935 /*
936 * deal with the case where there is only one pointer in the root
937 * by promoting the node below to a root
938 */
Chris Mason5f39d392007-10-15 16:14:19 -0400939 if (!parent) {
940 struct extent_buffer *child;
Chris Masonbb803952007-03-01 12:04:21 -0500941
Chris Mason5f39d392007-10-15 16:14:19 -0400942 if (btrfs_header_nritems(mid) != 1)
Chris Masonbb803952007-03-01 12:04:21 -0500943 return 0;
944
945 /* promote the child to a root */
Chris Mason5f39d392007-10-15 16:14:19 -0400946 child = read_node_slot(root, mid, 0);
Mark Fasheh305a26a2011-09-01 11:27:57 -0700947 if (!child) {
948 ret = -EROFS;
949 btrfs_std_error(root->fs_info, ret);
950 goto enospc;
951 }
952
Chris Mason925baed2008-06-25 16:01:30 -0400953 btrfs_tree_lock(child);
Chris Masonb4ce94d2009-02-04 09:25:08 -0500954 btrfs_set_lock_blocking(child);
Chris Mason9fa8cfe2009-03-13 10:24:59 -0400955 ret = btrfs_cow_block(trans, root, child, mid, 0, &child);
Yan, Zhengf0486c62010-05-16 10:46:25 -0400956 if (ret) {
957 btrfs_tree_unlock(child);
958 free_extent_buffer(child);
959 goto enospc;
960 }
Yan2f375ab2008-02-01 14:58:07 -0500961
Chris Mason240f62c2011-03-23 14:54:42 -0400962 rcu_assign_pointer(root->node, child);
Chris Mason925baed2008-06-25 16:01:30 -0400963
Chris Mason0b86a832008-03-24 15:01:56 -0400964 add_root_to_dirty_list(root);
Chris Mason925baed2008-06-25 16:01:30 -0400965 btrfs_tree_unlock(child);
Chris Masonb4ce94d2009-02-04 09:25:08 -0500966
Chris Mason925baed2008-06-25 16:01:30 -0400967 path->locks[level] = 0;
Chris Masonbb803952007-03-01 12:04:21 -0500968 path->nodes[level] = NULL;
Chris Mason5f39d392007-10-15 16:14:19 -0400969 clean_tree_block(trans, root, mid);
Chris Mason925baed2008-06-25 16:01:30 -0400970 btrfs_tree_unlock(mid);
Chris Masonbb803952007-03-01 12:04:21 -0500971 /* once for the path */
Chris Mason5f39d392007-10-15 16:14:19 -0400972 free_extent_buffer(mid);
Yan, Zhengf0486c62010-05-16 10:46:25 -0400973
974 root_sub_used(root, mid->len);
Arne Jansen66d7e7f2011-09-12 15:26:38 +0200975 btrfs_free_tree_block(trans, root, mid, 0, 1, 0);
Chris Masonbb803952007-03-01 12:04:21 -0500976 /* once for the root ptr */
Chris Mason5f39d392007-10-15 16:14:19 -0400977 free_extent_buffer(mid);
Yan, Zhengf0486c62010-05-16 10:46:25 -0400978 return 0;
Chris Masonbb803952007-03-01 12:04:21 -0500979 }
Chris Mason5f39d392007-10-15 16:14:19 -0400980 if (btrfs_header_nritems(mid) >
Chris Mason123abc82007-03-14 14:14:43 -0400981 BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
Chris Masonbb803952007-03-01 12:04:21 -0500982 return 0;
983
Andi Kleen559af822010-10-29 15:14:37 -0400984 btrfs_header_nritems(mid);
Chris Mason54aa1f42007-06-22 14:16:25 -0400985
Chris Mason5f39d392007-10-15 16:14:19 -0400986 left = read_node_slot(root, parent, pslot - 1);
987 if (left) {
Chris Mason925baed2008-06-25 16:01:30 -0400988 btrfs_tree_lock(left);
Chris Masonb4ce94d2009-02-04 09:25:08 -0500989 btrfs_set_lock_blocking(left);
Chris Mason5f39d392007-10-15 16:14:19 -0400990 wret = btrfs_cow_block(trans, root, left,
Chris Mason9fa8cfe2009-03-13 10:24:59 -0400991 parent, pslot - 1, &left);
Chris Mason54aa1f42007-06-22 14:16:25 -0400992 if (wret) {
993 ret = wret;
994 goto enospc;
995 }
Chris Mason2cc58cf2007-08-27 16:49:44 -0400996 }
Chris Mason5f39d392007-10-15 16:14:19 -0400997 right = read_node_slot(root, parent, pslot + 1);
998 if (right) {
Chris Mason925baed2008-06-25 16:01:30 -0400999 btrfs_tree_lock(right);
Chris Masonb4ce94d2009-02-04 09:25:08 -05001000 btrfs_set_lock_blocking(right);
Chris Mason5f39d392007-10-15 16:14:19 -04001001 wret = btrfs_cow_block(trans, root, right,
Chris Mason9fa8cfe2009-03-13 10:24:59 -04001002 parent, pslot + 1, &right);
Chris Mason2cc58cf2007-08-27 16:49:44 -04001003 if (wret) {
1004 ret = wret;
1005 goto enospc;
1006 }
1007 }
1008
1009 /* first, try to make some room in the middle buffer */
Chris Mason5f39d392007-10-15 16:14:19 -04001010 if (left) {
1011 orig_slot += btrfs_header_nritems(left);
Chris Masonbce4eae2008-04-24 14:42:46 -04001012 wret = push_node_left(trans, root, left, mid, 1);
Chris Mason79f95c82007-03-01 15:16:26 -05001013 if (wret < 0)
1014 ret = wret;
Andi Kleen559af822010-10-29 15:14:37 -04001015 btrfs_header_nritems(mid);
Chris Masonbb803952007-03-01 12:04:21 -05001016 }
Chris Mason79f95c82007-03-01 15:16:26 -05001017
1018 /*
1019 * then try to empty the right most buffer into the middle
1020 */
Chris Mason5f39d392007-10-15 16:14:19 -04001021 if (right) {
Chris Mason971a1f62008-04-24 10:54:32 -04001022 wret = push_node_left(trans, root, mid, right, 1);
Chris Mason54aa1f42007-06-22 14:16:25 -04001023 if (wret < 0 && wret != -ENOSPC)
Chris Mason79f95c82007-03-01 15:16:26 -05001024 ret = wret;
Chris Mason5f39d392007-10-15 16:14:19 -04001025 if (btrfs_header_nritems(right) == 0) {
Chris Mason5f39d392007-10-15 16:14:19 -04001026 clean_tree_block(trans, root, right);
Chris Mason925baed2008-06-25 16:01:30 -04001027 btrfs_tree_unlock(right);
Jeff Mahoney143bede2012-03-01 14:56:26 +01001028 del_ptr(trans, root, path, level + 1, pslot + 1);
Yan, Zhengf0486c62010-05-16 10:46:25 -04001029 root_sub_used(root, right->len);
Arne Jansen66d7e7f2011-09-12 15:26:38 +02001030 btrfs_free_tree_block(trans, root, right, 0, 1, 0);
Yan, Zhengf0486c62010-05-16 10:46:25 -04001031 free_extent_buffer(right);
1032 right = NULL;
Chris Masonbb803952007-03-01 12:04:21 -05001033 } else {
Chris Mason5f39d392007-10-15 16:14:19 -04001034 struct btrfs_disk_key right_key;
1035 btrfs_node_key(right, &right_key, 0);
1036 btrfs_set_node_key(parent, &right_key, pslot + 1);
1037 btrfs_mark_buffer_dirty(parent);
Chris Masonbb803952007-03-01 12:04:21 -05001038 }
1039 }
Chris Mason5f39d392007-10-15 16:14:19 -04001040 if (btrfs_header_nritems(mid) == 1) {
Chris Mason79f95c82007-03-01 15:16:26 -05001041 /*
1042 * we're not allowed to leave a node with one item in the
1043 * tree during a delete. A deletion from lower in the tree
1044 * could try to delete the only pointer in this node.
1045 * So, pull some keys from the left.
1046 * There has to be a left pointer at this point because
1047 * otherwise we would have pulled some pointers from the
1048 * right
1049 */
Mark Fasheh305a26a2011-09-01 11:27:57 -07001050 if (!left) {
1051 ret = -EROFS;
1052 btrfs_std_error(root->fs_info, ret);
1053 goto enospc;
1054 }
Chris Mason5f39d392007-10-15 16:14:19 -04001055 wret = balance_node_right(trans, root, mid, left);
Chris Mason54aa1f42007-06-22 14:16:25 -04001056 if (wret < 0) {
Chris Mason79f95c82007-03-01 15:16:26 -05001057 ret = wret;
Chris Mason54aa1f42007-06-22 14:16:25 -04001058 goto enospc;
1059 }
Chris Masonbce4eae2008-04-24 14:42:46 -04001060 if (wret == 1) {
1061 wret = push_node_left(trans, root, left, mid, 1);
1062 if (wret < 0)
1063 ret = wret;
1064 }
Chris Mason79f95c82007-03-01 15:16:26 -05001065 BUG_ON(wret == 1);
1066 }
Chris Mason5f39d392007-10-15 16:14:19 -04001067 if (btrfs_header_nritems(mid) == 0) {
Chris Mason5f39d392007-10-15 16:14:19 -04001068 clean_tree_block(trans, root, mid);
Chris Mason925baed2008-06-25 16:01:30 -04001069 btrfs_tree_unlock(mid);
Jeff Mahoney143bede2012-03-01 14:56:26 +01001070 del_ptr(trans, root, path, level + 1, pslot);
Yan, Zhengf0486c62010-05-16 10:46:25 -04001071 root_sub_used(root, mid->len);
Arne Jansen66d7e7f2011-09-12 15:26:38 +02001072 btrfs_free_tree_block(trans, root, mid, 0, 1, 0);
Yan, Zhengf0486c62010-05-16 10:46:25 -04001073 free_extent_buffer(mid);
1074 mid = NULL;
Chris Mason79f95c82007-03-01 15:16:26 -05001075 } else {
1076 /* update the parent key to reflect our changes */
Chris Mason5f39d392007-10-15 16:14:19 -04001077 struct btrfs_disk_key mid_key;
1078 btrfs_node_key(mid, &mid_key, 0);
1079 btrfs_set_node_key(parent, &mid_key, pslot);
1080 btrfs_mark_buffer_dirty(parent);
Chris Mason79f95c82007-03-01 15:16:26 -05001081 }
Chris Masonbb803952007-03-01 12:04:21 -05001082
Chris Mason79f95c82007-03-01 15:16:26 -05001083 /* update the path */
Chris Mason5f39d392007-10-15 16:14:19 -04001084 if (left) {
1085 if (btrfs_header_nritems(left) > orig_slot) {
1086 extent_buffer_get(left);
Chris Mason925baed2008-06-25 16:01:30 -04001087 /* left was locked after cow */
Chris Mason5f39d392007-10-15 16:14:19 -04001088 path->nodes[level] = left;
Chris Masonbb803952007-03-01 12:04:21 -05001089 path->slots[level + 1] -= 1;
1090 path->slots[level] = orig_slot;
Chris Mason925baed2008-06-25 16:01:30 -04001091 if (mid) {
1092 btrfs_tree_unlock(mid);
Chris Mason5f39d392007-10-15 16:14:19 -04001093 free_extent_buffer(mid);
Chris Mason925baed2008-06-25 16:01:30 -04001094 }
Chris Masonbb803952007-03-01 12:04:21 -05001095 } else {
Chris Mason5f39d392007-10-15 16:14:19 -04001096 orig_slot -= btrfs_header_nritems(left);
Chris Masonbb803952007-03-01 12:04:21 -05001097 path->slots[level] = orig_slot;
1098 }
1099 }
Chris Mason79f95c82007-03-01 15:16:26 -05001100 /* double check we haven't messed things up */
Chris Masone20d96d2007-03-22 12:13:20 -04001101 if (orig_ptr !=
Chris Mason5f39d392007-10-15 16:14:19 -04001102 btrfs_node_blockptr(path->nodes[level], path->slots[level]))
Chris Mason79f95c82007-03-01 15:16:26 -05001103 BUG();
Chris Mason54aa1f42007-06-22 14:16:25 -04001104enospc:
Chris Mason925baed2008-06-25 16:01:30 -04001105 if (right) {
1106 btrfs_tree_unlock(right);
Chris Mason5f39d392007-10-15 16:14:19 -04001107 free_extent_buffer(right);
Chris Mason925baed2008-06-25 16:01:30 -04001108 }
1109 if (left) {
1110 if (path->nodes[level] != left)
1111 btrfs_tree_unlock(left);
Chris Mason5f39d392007-10-15 16:14:19 -04001112 free_extent_buffer(left);
Chris Mason925baed2008-06-25 16:01:30 -04001113 }
Chris Masonbb803952007-03-01 12:04:21 -05001114 return ret;
1115}
1116
Chris Masond352ac62008-09-29 15:18:18 -04001117/* Node balancing for insertion. Here we only split or push nodes around
1118 * when they are completely full. This is also done top down, so we
1119 * have to be pessimistic.
1120 */
Chris Masond3977122009-01-05 21:25:51 -05001121static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
Chris Mason98ed5172008-01-03 10:01:48 -05001122 struct btrfs_root *root,
1123 struct btrfs_path *path, int level)
Chris Masone66f7092007-04-20 13:16:02 -04001124{
Chris Mason5f39d392007-10-15 16:14:19 -04001125 struct extent_buffer *right = NULL;
1126 struct extent_buffer *mid;
1127 struct extent_buffer *left = NULL;
1128 struct extent_buffer *parent = NULL;
Chris Masone66f7092007-04-20 13:16:02 -04001129 int ret = 0;
1130 int wret;
1131 int pslot;
1132 int orig_slot = path->slots[level];
Chris Masone66f7092007-04-20 13:16:02 -04001133
1134 if (level == 0)
1135 return 1;
1136
Chris Mason5f39d392007-10-15 16:14:19 -04001137 mid = path->nodes[level];
Chris Mason7bb86312007-12-11 09:25:06 -05001138 WARN_ON(btrfs_header_generation(mid) != trans->transid);
Chris Masone66f7092007-04-20 13:16:02 -04001139
Li Zefana05a9bb2011-09-06 16:55:34 +08001140 if (level < BTRFS_MAX_LEVEL - 1) {
Chris Mason5f39d392007-10-15 16:14:19 -04001141 parent = path->nodes[level + 1];
Li Zefana05a9bb2011-09-06 16:55:34 +08001142 pslot = path->slots[level + 1];
1143 }
Chris Masone66f7092007-04-20 13:16:02 -04001144
Chris Mason5f39d392007-10-15 16:14:19 -04001145 if (!parent)
Chris Masone66f7092007-04-20 13:16:02 -04001146 return 1;
Chris Masone66f7092007-04-20 13:16:02 -04001147
Chris Mason5f39d392007-10-15 16:14:19 -04001148 left = read_node_slot(root, parent, pslot - 1);
Chris Masone66f7092007-04-20 13:16:02 -04001149
1150 /* first, try to make some room in the middle buffer */
Chris Mason5f39d392007-10-15 16:14:19 -04001151 if (left) {
Chris Masone66f7092007-04-20 13:16:02 -04001152 u32 left_nr;
Chris Mason925baed2008-06-25 16:01:30 -04001153
1154 btrfs_tree_lock(left);
Chris Masonb4ce94d2009-02-04 09:25:08 -05001155 btrfs_set_lock_blocking(left);
1156
Chris Mason5f39d392007-10-15 16:14:19 -04001157 left_nr = btrfs_header_nritems(left);
Chris Mason33ade1f2007-04-20 13:48:57 -04001158 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
1159 wret = 1;
1160 } else {
Chris Mason5f39d392007-10-15 16:14:19 -04001161 ret = btrfs_cow_block(trans, root, left, parent,
Chris Mason9fa8cfe2009-03-13 10:24:59 -04001162 pslot - 1, &left);
Chris Mason54aa1f42007-06-22 14:16:25 -04001163 if (ret)
1164 wret = 1;
1165 else {
Chris Mason54aa1f42007-06-22 14:16:25 -04001166 wret = push_node_left(trans, root,
Chris Mason971a1f62008-04-24 10:54:32 -04001167 left, mid, 0);
Chris Mason54aa1f42007-06-22 14:16:25 -04001168 }
Chris Mason33ade1f2007-04-20 13:48:57 -04001169 }
Chris Masone66f7092007-04-20 13:16:02 -04001170 if (wret < 0)
1171 ret = wret;
1172 if (wret == 0) {
Chris Mason5f39d392007-10-15 16:14:19 -04001173 struct btrfs_disk_key disk_key;
Chris Masone66f7092007-04-20 13:16:02 -04001174 orig_slot += left_nr;
Chris Mason5f39d392007-10-15 16:14:19 -04001175 btrfs_node_key(mid, &disk_key, 0);
1176 btrfs_set_node_key(parent, &disk_key, pslot);
1177 btrfs_mark_buffer_dirty(parent);
1178 if (btrfs_header_nritems(left) > orig_slot) {
1179 path->nodes[level] = left;
Chris Masone66f7092007-04-20 13:16:02 -04001180 path->slots[level + 1] -= 1;
1181 path->slots[level] = orig_slot;
Chris Mason925baed2008-06-25 16:01:30 -04001182 btrfs_tree_unlock(mid);
Chris Mason5f39d392007-10-15 16:14:19 -04001183 free_extent_buffer(mid);
Chris Masone66f7092007-04-20 13:16:02 -04001184 } else {
1185 orig_slot -=
Chris Mason5f39d392007-10-15 16:14:19 -04001186 btrfs_header_nritems(left);
Chris Masone66f7092007-04-20 13:16:02 -04001187 path->slots[level] = orig_slot;
Chris Mason925baed2008-06-25 16:01:30 -04001188 btrfs_tree_unlock(left);
Chris Mason5f39d392007-10-15 16:14:19 -04001189 free_extent_buffer(left);
Chris Masone66f7092007-04-20 13:16:02 -04001190 }
Chris Masone66f7092007-04-20 13:16:02 -04001191 return 0;
1192 }
Chris Mason925baed2008-06-25 16:01:30 -04001193 btrfs_tree_unlock(left);
Chris Mason5f39d392007-10-15 16:14:19 -04001194 free_extent_buffer(left);
Chris Masone66f7092007-04-20 13:16:02 -04001195 }
Chris Mason925baed2008-06-25 16:01:30 -04001196 right = read_node_slot(root, parent, pslot + 1);
Chris Masone66f7092007-04-20 13:16:02 -04001197
1198 /*
1199 * then try to empty the right most buffer into the middle
1200 */
Chris Mason5f39d392007-10-15 16:14:19 -04001201 if (right) {
Chris Mason33ade1f2007-04-20 13:48:57 -04001202 u32 right_nr;
Chris Masonb4ce94d2009-02-04 09:25:08 -05001203
Chris Mason925baed2008-06-25 16:01:30 -04001204 btrfs_tree_lock(right);
Chris Masonb4ce94d2009-02-04 09:25:08 -05001205 btrfs_set_lock_blocking(right);
1206
Chris Mason5f39d392007-10-15 16:14:19 -04001207 right_nr = btrfs_header_nritems(right);
Chris Mason33ade1f2007-04-20 13:48:57 -04001208 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
1209 wret = 1;
1210 } else {
Chris Mason5f39d392007-10-15 16:14:19 -04001211 ret = btrfs_cow_block(trans, root, right,
1212 parent, pslot + 1,
Chris Mason9fa8cfe2009-03-13 10:24:59 -04001213 &right);
Chris Mason54aa1f42007-06-22 14:16:25 -04001214 if (ret)
1215 wret = 1;
1216 else {
Chris Mason54aa1f42007-06-22 14:16:25 -04001217 wret = balance_node_right(trans, root,
Chris Mason5f39d392007-10-15 16:14:19 -04001218 right, mid);
Chris Mason54aa1f42007-06-22 14:16:25 -04001219 }
Chris Mason33ade1f2007-04-20 13:48:57 -04001220 }
Chris Masone66f7092007-04-20 13:16:02 -04001221 if (wret < 0)
1222 ret = wret;
1223 if (wret == 0) {
Chris Mason5f39d392007-10-15 16:14:19 -04001224 struct btrfs_disk_key disk_key;
1225
1226 btrfs_node_key(right, &disk_key, 0);
1227 btrfs_set_node_key(parent, &disk_key, pslot + 1);
1228 btrfs_mark_buffer_dirty(parent);
1229
1230 if (btrfs_header_nritems(mid) <= orig_slot) {
1231 path->nodes[level] = right;
Chris Masone66f7092007-04-20 13:16:02 -04001232 path->slots[level + 1] += 1;
1233 path->slots[level] = orig_slot -
Chris Mason5f39d392007-10-15 16:14:19 -04001234 btrfs_header_nritems(mid);
Chris Mason925baed2008-06-25 16:01:30 -04001235 btrfs_tree_unlock(mid);
Chris Mason5f39d392007-10-15 16:14:19 -04001236 free_extent_buffer(mid);
Chris Masone66f7092007-04-20 13:16:02 -04001237 } else {
Chris Mason925baed2008-06-25 16:01:30 -04001238 btrfs_tree_unlock(right);
Chris Mason5f39d392007-10-15 16:14:19 -04001239 free_extent_buffer(right);
Chris Masone66f7092007-04-20 13:16:02 -04001240 }
Chris Masone66f7092007-04-20 13:16:02 -04001241 return 0;
1242 }
Chris Mason925baed2008-06-25 16:01:30 -04001243 btrfs_tree_unlock(right);
Chris Mason5f39d392007-10-15 16:14:19 -04001244 free_extent_buffer(right);
Chris Masone66f7092007-04-20 13:16:02 -04001245 }
Chris Masone66f7092007-04-20 13:16:02 -04001246 return 1;
1247}
1248
Chris Mason74123bd2007-02-02 11:05:29 -05001249/*
Chris Masond352ac62008-09-29 15:18:18 -04001250 * readahead one full node of leaves, finding things that are close
1251 * to the block in 'slot', and triggering ra on them.
Chris Mason3c69fae2007-08-07 15:52:22 -04001252 */
Chris Masonc8c42862009-04-03 10:14:18 -04001253static void reada_for_search(struct btrfs_root *root,
1254 struct btrfs_path *path,
1255 int level, int slot, u64 objectid)
Chris Mason3c69fae2007-08-07 15:52:22 -04001256{
Chris Mason5f39d392007-10-15 16:14:19 -04001257 struct extent_buffer *node;
Chris Mason01f46652007-12-21 16:24:26 -05001258 struct btrfs_disk_key disk_key;
Chris Mason3c69fae2007-08-07 15:52:22 -04001259 u32 nritems;
Chris Mason3c69fae2007-08-07 15:52:22 -04001260 u64 search;
Chris Masona7175312009-01-22 09:23:10 -05001261 u64 target;
Chris Mason6b800532007-10-15 16:17:34 -04001262 u64 nread = 0;
Josef Bacikcb25c2e2011-05-11 12:17:34 -04001263 u64 gen;
Chris Mason3c69fae2007-08-07 15:52:22 -04001264 int direction = path->reada;
Chris Mason5f39d392007-10-15 16:14:19 -04001265 struct extent_buffer *eb;
Chris Mason6b800532007-10-15 16:17:34 -04001266 u32 nr;
1267 u32 blocksize;
1268 u32 nscan = 0;
Chris Masondb945352007-10-15 16:15:53 -04001269
Chris Masona6b6e752007-10-15 16:22:39 -04001270 if (level != 1)
Chris Mason3c69fae2007-08-07 15:52:22 -04001271 return;
1272
Chris Mason6702ed42007-08-07 16:15:09 -04001273 if (!path->nodes[level])
1274 return;
1275
Chris Mason5f39d392007-10-15 16:14:19 -04001276 node = path->nodes[level];
Chris Mason925baed2008-06-25 16:01:30 -04001277
Chris Mason3c69fae2007-08-07 15:52:22 -04001278 search = btrfs_node_blockptr(node, slot);
Chris Mason6b800532007-10-15 16:17:34 -04001279 blocksize = btrfs_level_size(root, level - 1);
1280 eb = btrfs_find_tree_block(root, search, blocksize);
Chris Mason5f39d392007-10-15 16:14:19 -04001281 if (eb) {
1282 free_extent_buffer(eb);
Chris Mason3c69fae2007-08-07 15:52:22 -04001283 return;
1284 }
1285
Chris Masona7175312009-01-22 09:23:10 -05001286 target = search;
Chris Mason6b800532007-10-15 16:17:34 -04001287
Chris Mason5f39d392007-10-15 16:14:19 -04001288 nritems = btrfs_header_nritems(node);
Chris Mason6b800532007-10-15 16:17:34 -04001289 nr = slot;
Josef Bacik25b8b932011-06-08 14:36:54 -04001290
Chris Masond3977122009-01-05 21:25:51 -05001291 while (1) {
Chris Mason6b800532007-10-15 16:17:34 -04001292 if (direction < 0) {
1293 if (nr == 0)
1294 break;
1295 nr--;
1296 } else if (direction > 0) {
1297 nr++;
1298 if (nr >= nritems)
1299 break;
Chris Mason3c69fae2007-08-07 15:52:22 -04001300 }
Chris Mason01f46652007-12-21 16:24:26 -05001301 if (path->reada < 0 && objectid) {
1302 btrfs_node_key(node, &disk_key, nr);
1303 if (btrfs_disk_key_objectid(&disk_key) != objectid)
1304 break;
1305 }
Chris Mason6b800532007-10-15 16:17:34 -04001306 search = btrfs_node_blockptr(node, nr);
Chris Masona7175312009-01-22 09:23:10 -05001307 if ((search <= target && target - search <= 65536) ||
1308 (search > target && search - target <= 65536)) {
Josef Bacikcb25c2e2011-05-11 12:17:34 -04001309 gen = btrfs_node_ptr_generation(node, nr);
Josef Bacikcb25c2e2011-05-11 12:17:34 -04001310 readahead_tree_block(root, search, blocksize, gen);
Chris Mason6b800532007-10-15 16:17:34 -04001311 nread += blocksize;
1312 }
1313 nscan++;
Chris Masona7175312009-01-22 09:23:10 -05001314 if ((nread > 65536 || nscan > 32))
Chris Mason6b800532007-10-15 16:17:34 -04001315 break;
Chris Mason3c69fae2007-08-07 15:52:22 -04001316 }
1317}
Chris Mason925baed2008-06-25 16:01:30 -04001318
Chris Masond352ac62008-09-29 15:18:18 -04001319/*
Chris Masonb4ce94d2009-02-04 09:25:08 -05001320 * returns -EAGAIN if it had to drop the path, or zero if everything was in
1321 * cache
1322 */
1323static noinline int reada_for_balance(struct btrfs_root *root,
1324 struct btrfs_path *path, int level)
1325{
1326 int slot;
1327 int nritems;
1328 struct extent_buffer *parent;
1329 struct extent_buffer *eb;
1330 u64 gen;
1331 u64 block1 = 0;
1332 u64 block2 = 0;
1333 int ret = 0;
1334 int blocksize;
1335
Chris Mason8c594ea2009-04-20 15:50:10 -04001336 parent = path->nodes[level + 1];
Chris Masonb4ce94d2009-02-04 09:25:08 -05001337 if (!parent)
1338 return 0;
1339
1340 nritems = btrfs_header_nritems(parent);
Chris Mason8c594ea2009-04-20 15:50:10 -04001341 slot = path->slots[level + 1];
Chris Masonb4ce94d2009-02-04 09:25:08 -05001342 blocksize = btrfs_level_size(root, level);
1343
1344 if (slot > 0) {
1345 block1 = btrfs_node_blockptr(parent, slot - 1);
1346 gen = btrfs_node_ptr_generation(parent, slot - 1);
1347 eb = btrfs_find_tree_block(root, block1, blocksize);
1348 if (eb && btrfs_buffer_uptodate(eb, gen))
1349 block1 = 0;
1350 free_extent_buffer(eb);
1351 }
Chris Mason8c594ea2009-04-20 15:50:10 -04001352 if (slot + 1 < nritems) {
Chris Masonb4ce94d2009-02-04 09:25:08 -05001353 block2 = btrfs_node_blockptr(parent, slot + 1);
1354 gen = btrfs_node_ptr_generation(parent, slot + 1);
1355 eb = btrfs_find_tree_block(root, block2, blocksize);
1356 if (eb && btrfs_buffer_uptodate(eb, gen))
1357 block2 = 0;
1358 free_extent_buffer(eb);
1359 }
1360 if (block1 || block2) {
1361 ret = -EAGAIN;
Chris Mason8c594ea2009-04-20 15:50:10 -04001362
1363 /* release the whole path */
David Sterbab3b4aa72011-04-21 01:20:15 +02001364 btrfs_release_path(path);
Chris Mason8c594ea2009-04-20 15:50:10 -04001365
1366 /* read the blocks */
Chris Masonb4ce94d2009-02-04 09:25:08 -05001367 if (block1)
1368 readahead_tree_block(root, block1, blocksize, 0);
1369 if (block2)
1370 readahead_tree_block(root, block2, blocksize, 0);
1371
1372 if (block1) {
1373 eb = read_tree_block(root, block1, blocksize, 0);
1374 free_extent_buffer(eb);
1375 }
Chris Mason8c594ea2009-04-20 15:50:10 -04001376 if (block2) {
Chris Masonb4ce94d2009-02-04 09:25:08 -05001377 eb = read_tree_block(root, block2, blocksize, 0);
1378 free_extent_buffer(eb);
1379 }
1380 }
1381 return ret;
1382}
1383
1384
1385/*
Chris Masond3977122009-01-05 21:25:51 -05001386 * when we walk down the tree, it is usually safe to unlock the higher layers
1387 * in the tree. The exceptions are when our path goes through slot 0, because
1388 * operations on the tree might require changing key pointers higher up in the
1389 * tree.
Chris Masond352ac62008-09-29 15:18:18 -04001390 *
Chris Masond3977122009-01-05 21:25:51 -05001391 * callers might also have set path->keep_locks, which tells this code to keep
1392 * the lock if the path points to the last slot in the block. This is part of
1393 * walking through the tree, and selecting the next slot in the higher block.
Chris Masond352ac62008-09-29 15:18:18 -04001394 *
Chris Masond3977122009-01-05 21:25:51 -05001395 * lowest_unlock sets the lowest level in the tree we're allowed to unlock. so
1396 * if lowest_unlock is 1, level 0 won't be unlocked
Chris Masond352ac62008-09-29 15:18:18 -04001397 */
Chris Masone02119d2008-09-05 16:13:11 -04001398static noinline void unlock_up(struct btrfs_path *path, int level,
1399 int lowest_unlock)
Chris Mason925baed2008-06-25 16:01:30 -04001400{
1401 int i;
1402 int skip_level = level;
Chris Mason051e1b92008-06-25 16:01:30 -04001403 int no_skips = 0;
Chris Mason925baed2008-06-25 16:01:30 -04001404 struct extent_buffer *t;
1405
1406 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1407 if (!path->nodes[i])
1408 break;
1409 if (!path->locks[i])
1410 break;
Chris Mason051e1b92008-06-25 16:01:30 -04001411 if (!no_skips && path->slots[i] == 0) {
Chris Mason925baed2008-06-25 16:01:30 -04001412 skip_level = i + 1;
1413 continue;
1414 }
Chris Mason051e1b92008-06-25 16:01:30 -04001415 if (!no_skips && path->keep_locks) {
Chris Mason925baed2008-06-25 16:01:30 -04001416 u32 nritems;
1417 t = path->nodes[i];
1418 nritems = btrfs_header_nritems(t);
Chris Mason051e1b92008-06-25 16:01:30 -04001419 if (nritems < 1 || path->slots[i] >= nritems - 1) {
Chris Mason925baed2008-06-25 16:01:30 -04001420 skip_level = i + 1;
1421 continue;
1422 }
1423 }
Chris Mason051e1b92008-06-25 16:01:30 -04001424 if (skip_level < i && i >= lowest_unlock)
1425 no_skips = 1;
1426
Chris Mason925baed2008-06-25 16:01:30 -04001427 t = path->nodes[i];
1428 if (i >= lowest_unlock && i > skip_level && path->locks[i]) {
Chris Masonbd681512011-07-16 15:23:14 -04001429 btrfs_tree_unlock_rw(t, path->locks[i]);
Chris Mason925baed2008-06-25 16:01:30 -04001430 path->locks[i] = 0;
1431 }
1432 }
1433}
1434
Chris Mason3c69fae2007-08-07 15:52:22 -04001435/*
Chris Masonb4ce94d2009-02-04 09:25:08 -05001436 * This releases any locks held in the path starting at level and
1437 * going all the way up to the root.
1438 *
1439 * btrfs_search_slot will keep the lock held on higher nodes in a few
1440 * corner cases, such as COW of the block at slot zero in the node. This
1441 * ignores those rules, and it should only be called when there are no
1442 * more updates to be done higher up in the tree.
1443 */
1444noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
1445{
1446 int i;
1447
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001448 if (path->keep_locks)
Chris Masonb4ce94d2009-02-04 09:25:08 -05001449 return;
1450
1451 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1452 if (!path->nodes[i])
Chris Mason12f4dac2009-02-04 09:31:42 -05001453 continue;
Chris Masonb4ce94d2009-02-04 09:25:08 -05001454 if (!path->locks[i])
Chris Mason12f4dac2009-02-04 09:31:42 -05001455 continue;
Chris Masonbd681512011-07-16 15:23:14 -04001456 btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]);
Chris Masonb4ce94d2009-02-04 09:25:08 -05001457 path->locks[i] = 0;
1458 }
1459}
1460
1461/*
Chris Masonc8c42862009-04-03 10:14:18 -04001462 * helper function for btrfs_search_slot. The goal is to find a block
1463 * in cache without setting the path to blocking. If we find the block
1464 * we return zero and the path is unchanged.
1465 *
1466 * If we can't find the block, we set the path blocking and do some
1467 * reada. -EAGAIN is returned and the search must be repeated.
1468 */
1469static int
1470read_block_for_search(struct btrfs_trans_handle *trans,
1471 struct btrfs_root *root, struct btrfs_path *p,
1472 struct extent_buffer **eb_ret, int level, int slot,
1473 struct btrfs_key *key)
1474{
1475 u64 blocknr;
1476 u64 gen;
1477 u32 blocksize;
1478 struct extent_buffer *b = *eb_ret;
1479 struct extent_buffer *tmp;
Chris Mason76a05b32009-05-14 13:24:30 -04001480 int ret;
Chris Masonc8c42862009-04-03 10:14:18 -04001481
1482 blocknr = btrfs_node_blockptr(b, slot);
1483 gen = btrfs_node_ptr_generation(b, slot);
1484 blocksize = btrfs_level_size(root, level - 1);
1485
1486 tmp = btrfs_find_tree_block(root, blocknr, blocksize);
Chris Masoncb449212010-10-24 11:01:27 -04001487 if (tmp) {
1488 if (btrfs_buffer_uptodate(tmp, 0)) {
1489 if (btrfs_buffer_uptodate(tmp, gen)) {
1490 /*
1491 * we found an up to date block without
1492 * sleeping, return
1493 * right away
1494 */
1495 *eb_ret = tmp;
1496 return 0;
1497 }
1498 /* the pages were up to date, but we failed
1499 * the generation number check. Do a full
1500 * read for the generation number that is correct.
1501 * We must do this without dropping locks so
1502 * we can trust our generation number
1503 */
1504 free_extent_buffer(tmp);
Chris Masonbd681512011-07-16 15:23:14 -04001505 btrfs_set_path_blocking(p);
1506
Chris Masoncb449212010-10-24 11:01:27 -04001507 tmp = read_tree_block(root, blocknr, blocksize, gen);
1508 if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
1509 *eb_ret = tmp;
1510 return 0;
1511 }
1512 free_extent_buffer(tmp);
David Sterbab3b4aa72011-04-21 01:20:15 +02001513 btrfs_release_path(p);
Chris Masoncb449212010-10-24 11:01:27 -04001514 return -EIO;
1515 }
Chris Masonc8c42862009-04-03 10:14:18 -04001516 }
1517
1518 /*
1519 * reduce lock contention at high levels
1520 * of the btree by dropping locks before
Chris Mason76a05b32009-05-14 13:24:30 -04001521 * we read. Don't release the lock on the current
1522 * level because we need to walk this node to figure
1523 * out which blocks to read.
Chris Masonc8c42862009-04-03 10:14:18 -04001524 */
Chris Mason8c594ea2009-04-20 15:50:10 -04001525 btrfs_unlock_up_safe(p, level + 1);
1526 btrfs_set_path_blocking(p);
1527
Chris Masoncb449212010-10-24 11:01:27 -04001528 free_extent_buffer(tmp);
Chris Masonc8c42862009-04-03 10:14:18 -04001529 if (p->reada)
1530 reada_for_search(root, p, level, slot, key->objectid);
1531
David Sterbab3b4aa72011-04-21 01:20:15 +02001532 btrfs_release_path(p);
Chris Mason76a05b32009-05-14 13:24:30 -04001533
1534 ret = -EAGAIN;
Yan, Zheng5bdd3532010-05-26 11:20:30 -04001535 tmp = read_tree_block(root, blocknr, blocksize, 0);
Chris Mason76a05b32009-05-14 13:24:30 -04001536 if (tmp) {
1537 /*
1538 * If the read above didn't mark this buffer up to date,
1539 * it will never end up being up to date. Set ret to EIO now
1540 * and give up so that our caller doesn't loop forever
1541 * on our EAGAINs.
1542 */
1543 if (!btrfs_buffer_uptodate(tmp, 0))
1544 ret = -EIO;
Chris Masonc8c42862009-04-03 10:14:18 -04001545 free_extent_buffer(tmp);
Chris Mason76a05b32009-05-14 13:24:30 -04001546 }
1547 return ret;
Chris Masonc8c42862009-04-03 10:14:18 -04001548}
1549
1550/*
1551 * helper function for btrfs_search_slot. This does all of the checks
1552 * for node-level blocks and does any balancing required based on
1553 * the ins_len.
1554 *
1555 * If no extra work was required, zero is returned. If we had to
1556 * drop the path, -EAGAIN is returned and btrfs_search_slot must
1557 * start over
1558 */
1559static int
1560setup_nodes_for_search(struct btrfs_trans_handle *trans,
1561 struct btrfs_root *root, struct btrfs_path *p,
Chris Masonbd681512011-07-16 15:23:14 -04001562 struct extent_buffer *b, int level, int ins_len,
1563 int *write_lock_level)
Chris Masonc8c42862009-04-03 10:14:18 -04001564{
1565 int ret;
1566 if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >=
1567 BTRFS_NODEPTRS_PER_BLOCK(root) - 3) {
1568 int sret;
1569
Chris Masonbd681512011-07-16 15:23:14 -04001570 if (*write_lock_level < level + 1) {
1571 *write_lock_level = level + 1;
1572 btrfs_release_path(p);
1573 goto again;
1574 }
1575
Chris Masonc8c42862009-04-03 10:14:18 -04001576 sret = reada_for_balance(root, p, level);
1577 if (sret)
1578 goto again;
1579
1580 btrfs_set_path_blocking(p);
1581 sret = split_node(trans, root, p, level);
Chris Masonbd681512011-07-16 15:23:14 -04001582 btrfs_clear_path_blocking(p, NULL, 0);
Chris Masonc8c42862009-04-03 10:14:18 -04001583
1584 BUG_ON(sret > 0);
1585 if (sret) {
1586 ret = sret;
1587 goto done;
1588 }
1589 b = p->nodes[level];
1590 } else if (ins_len < 0 && btrfs_header_nritems(b) <
Chris Masoncfbb9302009-05-18 10:41:58 -04001591 BTRFS_NODEPTRS_PER_BLOCK(root) / 2) {
Chris Masonc8c42862009-04-03 10:14:18 -04001592 int sret;
1593
Chris Masonbd681512011-07-16 15:23:14 -04001594 if (*write_lock_level < level + 1) {
1595 *write_lock_level = level + 1;
1596 btrfs_release_path(p);
1597 goto again;
1598 }
1599
Chris Masonc8c42862009-04-03 10:14:18 -04001600 sret = reada_for_balance(root, p, level);
1601 if (sret)
1602 goto again;
1603
1604 btrfs_set_path_blocking(p);
1605 sret = balance_level(trans, root, p, level);
Chris Masonbd681512011-07-16 15:23:14 -04001606 btrfs_clear_path_blocking(p, NULL, 0);
Chris Masonc8c42862009-04-03 10:14:18 -04001607
1608 if (sret) {
1609 ret = sret;
1610 goto done;
1611 }
1612 b = p->nodes[level];
1613 if (!b) {
David Sterbab3b4aa72011-04-21 01:20:15 +02001614 btrfs_release_path(p);
Chris Masonc8c42862009-04-03 10:14:18 -04001615 goto again;
1616 }
1617 BUG_ON(btrfs_header_nritems(b) == 1);
1618 }
1619 return 0;
1620
1621again:
1622 ret = -EAGAIN;
1623done:
1624 return ret;
1625}
1626
1627/*
Chris Mason74123bd2007-02-02 11:05:29 -05001628 * look for key in the tree. path is filled in with nodes along the way
1629 * if key is found, we return zero and you can find the item in the leaf
1630 * level of the path (level 0)
1631 *
1632 * If the key isn't found, the path points to the slot where it should
Chris Masonaa5d6be2007-02-28 16:35:06 -05001633 * be inserted, and 1 is returned. If there are other errors during the
1634 * search a negative error number is returned.
Chris Mason97571fd2007-02-24 13:39:08 -05001635 *
1636 * if ins_len > 0, nodes and leaves will be split as we walk down the
1637 * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if
1638 * possible)
Chris Mason74123bd2007-02-02 11:05:29 -05001639 */
Chris Masone089f052007-03-16 16:20:31 -04001640int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1641 *root, struct btrfs_key *key, struct btrfs_path *p, int
1642 ins_len, int cow)
Chris Masonbe0e5c02007-01-26 15:51:26 -05001643{
Chris Mason5f39d392007-10-15 16:14:19 -04001644 struct extent_buffer *b;
Chris Masonbe0e5c02007-01-26 15:51:26 -05001645 int slot;
1646 int ret;
Yan Zheng33c66f42009-07-22 09:59:00 -04001647 int err;
Chris Masonbe0e5c02007-01-26 15:51:26 -05001648 int level;
Chris Mason925baed2008-06-25 16:01:30 -04001649 int lowest_unlock = 1;
Chris Masonbd681512011-07-16 15:23:14 -04001650 int root_lock;
1651 /* everything at write_lock_level or lower must be write locked */
1652 int write_lock_level = 0;
Chris Mason9f3a7422007-08-07 15:52:19 -04001653 u8 lowest_level = 0;
1654
Chris Mason6702ed42007-08-07 16:15:09 -04001655 lowest_level = p->lowest_level;
Chris Mason323ac952008-10-01 19:05:46 -04001656 WARN_ON(lowest_level && ins_len > 0);
Chris Mason22b0ebd2007-03-30 08:47:31 -04001657 WARN_ON(p->nodes[0] != NULL);
Josef Bacik251792012008-10-29 14:49:05 -04001658
Chris Masonbd681512011-07-16 15:23:14 -04001659 if (ins_len < 0) {
Chris Mason925baed2008-06-25 16:01:30 -04001660 lowest_unlock = 2;
Chris Mason65b51a02008-08-01 15:11:20 -04001661
Chris Masonbd681512011-07-16 15:23:14 -04001662 /* when we are removing items, we might have to go up to level
1663 * two as we update tree pointers Make sure we keep write
1664 * for those levels as well
1665 */
1666 write_lock_level = 2;
1667 } else if (ins_len > 0) {
1668 /*
1669 * for inserting items, make sure we have a write lock on
1670 * level 1 so we can update keys
1671 */
1672 write_lock_level = 1;
1673 }
1674
1675 if (!cow)
1676 write_lock_level = -1;
1677
1678 if (cow && (p->keep_locks || p->lowest_level))
1679 write_lock_level = BTRFS_MAX_LEVEL;
1680
Chris Masonbb803952007-03-01 12:04:21 -05001681again:
Chris Masonbd681512011-07-16 15:23:14 -04001682 /*
1683 * we try very hard to do read locks on the root
1684 */
1685 root_lock = BTRFS_READ_LOCK;
1686 level = 0;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001687 if (p->search_commit_root) {
Chris Masonbd681512011-07-16 15:23:14 -04001688 /*
1689 * the commit roots are read only
1690 * so we always do read locks
1691 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001692 b = root->commit_root;
1693 extent_buffer_get(b);
Chris Masonbd681512011-07-16 15:23:14 -04001694 level = btrfs_header_level(b);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001695 if (!p->skip_locking)
Chris Masonbd681512011-07-16 15:23:14 -04001696 btrfs_tree_read_lock(b);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001697 } else {
Chris Masonbd681512011-07-16 15:23:14 -04001698 if (p->skip_locking) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001699 b = btrfs_root_node(root);
Chris Masonbd681512011-07-16 15:23:14 -04001700 level = btrfs_header_level(b);
1701 } else {
1702 /* we don't know the level of the root node
1703 * until we actually have it read locked
1704 */
1705 b = btrfs_read_lock_root_node(root);
1706 level = btrfs_header_level(b);
1707 if (level <= write_lock_level) {
1708 /* whoops, must trade for write lock */
1709 btrfs_tree_read_unlock(b);
1710 free_extent_buffer(b);
1711 b = btrfs_lock_root_node(root);
1712 root_lock = BTRFS_WRITE_LOCK;
1713
1714 /* the level might have changed, check again */
1715 level = btrfs_header_level(b);
1716 }
1717 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001718 }
Chris Masonbd681512011-07-16 15:23:14 -04001719 p->nodes[level] = b;
1720 if (!p->skip_locking)
1721 p->locks[level] = root_lock;
Chris Mason925baed2008-06-25 16:01:30 -04001722
Chris Masoneb60cea2007-02-02 09:18:22 -05001723 while (b) {
Chris Mason5f39d392007-10-15 16:14:19 -04001724 level = btrfs_header_level(b);
Chris Mason65b51a02008-08-01 15:11:20 -04001725
1726 /*
1727 * setup the path here so we can release it under lock
1728 * contention with the cow code
1729 */
Chris Mason02217ed2007-03-02 16:08:05 -05001730 if (cow) {
Chris Masonc8c42862009-04-03 10:14:18 -04001731 /*
1732 * if we don't really need to cow this block
1733 * then we don't want to set the path blocking,
1734 * so we test it here
1735 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001736 if (!should_cow_block(trans, root, b))
Chris Mason65b51a02008-08-01 15:11:20 -04001737 goto cow_done;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001738
Chris Masonb4ce94d2009-02-04 09:25:08 -05001739 btrfs_set_path_blocking(p);
1740
Chris Masonbd681512011-07-16 15:23:14 -04001741 /*
1742 * must have write locks on this node and the
1743 * parent
1744 */
1745 if (level + 1 > write_lock_level) {
1746 write_lock_level = level + 1;
1747 btrfs_release_path(p);
1748 goto again;
1749 }
1750
Yan Zheng33c66f42009-07-22 09:59:00 -04001751 err = btrfs_cow_block(trans, root, b,
1752 p->nodes[level + 1],
1753 p->slots[level + 1], &b);
1754 if (err) {
Yan Zheng33c66f42009-07-22 09:59:00 -04001755 ret = err;
Chris Mason65b51a02008-08-01 15:11:20 -04001756 goto done;
Chris Mason54aa1f42007-06-22 14:16:25 -04001757 }
Chris Mason02217ed2007-03-02 16:08:05 -05001758 }
Chris Mason65b51a02008-08-01 15:11:20 -04001759cow_done:
Chris Mason02217ed2007-03-02 16:08:05 -05001760 BUG_ON(!cow && ins_len);
Chris Mason65b51a02008-08-01 15:11:20 -04001761
Chris Masoneb60cea2007-02-02 09:18:22 -05001762 p->nodes[level] = b;
Chris Masonbd681512011-07-16 15:23:14 -04001763 btrfs_clear_path_blocking(p, NULL, 0);
Chris Masonb4ce94d2009-02-04 09:25:08 -05001764
1765 /*
1766 * we have a lock on b and as long as we aren't changing
1767 * the tree, there is no way to for the items in b to change.
1768 * It is safe to drop the lock on our parent before we
1769 * go through the expensive btree search on b.
1770 *
1771 * If cow is true, then we might be changing slot zero,
1772 * which may require changing the parent. So, we can't
1773 * drop the lock until after we know which slot we're
1774 * operating on.
1775 */
1776 if (!cow)
1777 btrfs_unlock_up_safe(p, level + 1);
1778
Chris Mason5f39d392007-10-15 16:14:19 -04001779 ret = bin_search(b, key, level, &slot);
Chris Masonb4ce94d2009-02-04 09:25:08 -05001780
Chris Mason5f39d392007-10-15 16:14:19 -04001781 if (level != 0) {
Yan Zheng33c66f42009-07-22 09:59:00 -04001782 int dec = 0;
1783 if (ret && slot > 0) {
1784 dec = 1;
Chris Masonbe0e5c02007-01-26 15:51:26 -05001785 slot -= 1;
Yan Zheng33c66f42009-07-22 09:59:00 -04001786 }
Chris Masonbe0e5c02007-01-26 15:51:26 -05001787 p->slots[level] = slot;
Yan Zheng33c66f42009-07-22 09:59:00 -04001788 err = setup_nodes_for_search(trans, root, p, b, level,
Chris Masonbd681512011-07-16 15:23:14 -04001789 ins_len, &write_lock_level);
Yan Zheng33c66f42009-07-22 09:59:00 -04001790 if (err == -EAGAIN)
Chris Masonc8c42862009-04-03 10:14:18 -04001791 goto again;
Yan Zheng33c66f42009-07-22 09:59:00 -04001792 if (err) {
1793 ret = err;
Chris Masonc8c42862009-04-03 10:14:18 -04001794 goto done;
Yan Zheng33c66f42009-07-22 09:59:00 -04001795 }
Chris Masonc8c42862009-04-03 10:14:18 -04001796 b = p->nodes[level];
1797 slot = p->slots[level];
Chris Masonb4ce94d2009-02-04 09:25:08 -05001798
Chris Masonbd681512011-07-16 15:23:14 -04001799 /*
1800 * slot 0 is special, if we change the key
1801 * we have to update the parent pointer
1802 * which means we must have a write lock
1803 * on the parent
1804 */
1805 if (slot == 0 && cow &&
1806 write_lock_level < level + 1) {
1807 write_lock_level = level + 1;
1808 btrfs_release_path(p);
1809 goto again;
1810 }
1811
Chris Masonf9efa9c2008-06-25 16:14:04 -04001812 unlock_up(p, level, lowest_unlock);
1813
Chris Mason925baed2008-06-25 16:01:30 -04001814 if (level == lowest_level) {
Yan Zheng33c66f42009-07-22 09:59:00 -04001815 if (dec)
1816 p->slots[level]++;
Zheng Yan5b21f2e2008-09-26 10:05:38 -04001817 goto done;
Chris Mason925baed2008-06-25 16:01:30 -04001818 }
Chris Masonca7a79a2008-05-12 12:59:19 -04001819
Yan Zheng33c66f42009-07-22 09:59:00 -04001820 err = read_block_for_search(trans, root, p,
Chris Masonc8c42862009-04-03 10:14:18 -04001821 &b, level, slot, key);
Yan Zheng33c66f42009-07-22 09:59:00 -04001822 if (err == -EAGAIN)
Chris Masonc8c42862009-04-03 10:14:18 -04001823 goto again;
Yan Zheng33c66f42009-07-22 09:59:00 -04001824 if (err) {
1825 ret = err;
Chris Mason76a05b32009-05-14 13:24:30 -04001826 goto done;
Yan Zheng33c66f42009-07-22 09:59:00 -04001827 }
Chris Mason76a05b32009-05-14 13:24:30 -04001828
Chris Masonb4ce94d2009-02-04 09:25:08 -05001829 if (!p->skip_locking) {
Chris Masonbd681512011-07-16 15:23:14 -04001830 level = btrfs_header_level(b);
1831 if (level <= write_lock_level) {
1832 err = btrfs_try_tree_write_lock(b);
1833 if (!err) {
1834 btrfs_set_path_blocking(p);
1835 btrfs_tree_lock(b);
1836 btrfs_clear_path_blocking(p, b,
1837 BTRFS_WRITE_LOCK);
1838 }
1839 p->locks[level] = BTRFS_WRITE_LOCK;
1840 } else {
1841 err = btrfs_try_tree_read_lock(b);
1842 if (!err) {
1843 btrfs_set_path_blocking(p);
1844 btrfs_tree_read_lock(b);
1845 btrfs_clear_path_blocking(p, b,
1846 BTRFS_READ_LOCK);
1847 }
1848 p->locks[level] = BTRFS_READ_LOCK;
Chris Masonb4ce94d2009-02-04 09:25:08 -05001849 }
Chris Masonbd681512011-07-16 15:23:14 -04001850 p->nodes[level] = b;
Chris Masonb4ce94d2009-02-04 09:25:08 -05001851 }
Chris Masonbe0e5c02007-01-26 15:51:26 -05001852 } else {
1853 p->slots[level] = slot;
Yan Zheng87b29b22008-12-17 10:21:48 -05001854 if (ins_len > 0 &&
1855 btrfs_leaf_free_space(root, b) < ins_len) {
Chris Masonbd681512011-07-16 15:23:14 -04001856 if (write_lock_level < 1) {
1857 write_lock_level = 1;
1858 btrfs_release_path(p);
1859 goto again;
1860 }
1861
Chris Masonb4ce94d2009-02-04 09:25:08 -05001862 btrfs_set_path_blocking(p);
Yan Zheng33c66f42009-07-22 09:59:00 -04001863 err = split_leaf(trans, root, key,
1864 p, ins_len, ret == 0);
Chris Masonbd681512011-07-16 15:23:14 -04001865 btrfs_clear_path_blocking(p, NULL, 0);
Chris Masonb4ce94d2009-02-04 09:25:08 -05001866
Yan Zheng33c66f42009-07-22 09:59:00 -04001867 BUG_ON(err > 0);
1868 if (err) {
1869 ret = err;
Chris Mason65b51a02008-08-01 15:11:20 -04001870 goto done;
1871 }
Chris Mason5c680ed2007-02-22 11:39:13 -05001872 }
Chris Mason459931e2008-12-10 09:10:46 -05001873 if (!p->search_for_split)
1874 unlock_up(p, level, lowest_unlock);
Chris Mason65b51a02008-08-01 15:11:20 -04001875 goto done;
Chris Masonbe0e5c02007-01-26 15:51:26 -05001876 }
1877 }
Chris Mason65b51a02008-08-01 15:11:20 -04001878 ret = 1;
1879done:
Chris Masonb4ce94d2009-02-04 09:25:08 -05001880 /*
1881 * we don't really know what they plan on doing with the path
1882 * from here on, so for now just mark it as blocking
1883 */
Chris Masonb9473432009-03-13 11:00:37 -04001884 if (!p->leave_spinning)
1885 btrfs_set_path_blocking(p);
Chris Mason76a05b32009-05-14 13:24:30 -04001886 if (ret < 0)
David Sterbab3b4aa72011-04-21 01:20:15 +02001887 btrfs_release_path(p);
Chris Mason65b51a02008-08-01 15:11:20 -04001888 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05001889}
1890
Chris Mason74123bd2007-02-02 11:05:29 -05001891/*
1892 * adjust the pointers going up the tree, starting at level
1893 * making sure the right key of each node is points to 'key'.
1894 * This is used after shifting pointers to the left, so it stops
1895 * fixing up pointers when a given leaf/node is not in slot 0 of the
1896 * higher levels
Chris Masonaa5d6be2007-02-28 16:35:06 -05001897 *
Chris Mason74123bd2007-02-02 11:05:29 -05001898 */
Jeff Mahoney143bede2012-03-01 14:56:26 +01001899static void fixup_low_keys(struct btrfs_trans_handle *trans,
1900 struct btrfs_root *root, struct btrfs_path *path,
1901 struct btrfs_disk_key *key, int level)
Chris Masonbe0e5c02007-01-26 15:51:26 -05001902{
1903 int i;
Chris Mason5f39d392007-10-15 16:14:19 -04001904 struct extent_buffer *t;
1905
Chris Mason234b63a2007-03-13 10:46:10 -04001906 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
Chris Masonbe0e5c02007-01-26 15:51:26 -05001907 int tslot = path->slots[i];
Chris Masoneb60cea2007-02-02 09:18:22 -05001908 if (!path->nodes[i])
Chris Masonbe0e5c02007-01-26 15:51:26 -05001909 break;
Chris Mason5f39d392007-10-15 16:14:19 -04001910 t = path->nodes[i];
1911 btrfs_set_node_key(t, key, tslot);
Chris Masond6025572007-03-30 14:27:56 -04001912 btrfs_mark_buffer_dirty(path->nodes[i]);
Chris Masonbe0e5c02007-01-26 15:51:26 -05001913 if (tslot != 0)
1914 break;
1915 }
1916}
1917
Chris Mason74123bd2007-02-02 11:05:29 -05001918/*
Zheng Yan31840ae2008-09-23 13:14:14 -04001919 * update item key.
1920 *
1921 * This function isn't completely safe. It's the caller's responsibility
1922 * that the new key won't break the order
1923 */
Jeff Mahoney143bede2012-03-01 14:56:26 +01001924void btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
1925 struct btrfs_root *root, struct btrfs_path *path,
1926 struct btrfs_key *new_key)
Zheng Yan31840ae2008-09-23 13:14:14 -04001927{
1928 struct btrfs_disk_key disk_key;
1929 struct extent_buffer *eb;
1930 int slot;
1931
1932 eb = path->nodes[0];
1933 slot = path->slots[0];
1934 if (slot > 0) {
1935 btrfs_item_key(eb, &disk_key, slot - 1);
Jeff Mahoney143bede2012-03-01 14:56:26 +01001936 BUG_ON(comp_keys(&disk_key, new_key) >= 0);
Zheng Yan31840ae2008-09-23 13:14:14 -04001937 }
1938 if (slot < btrfs_header_nritems(eb) - 1) {
1939 btrfs_item_key(eb, &disk_key, slot + 1);
Jeff Mahoney143bede2012-03-01 14:56:26 +01001940 BUG_ON(comp_keys(&disk_key, new_key) <= 0);
Zheng Yan31840ae2008-09-23 13:14:14 -04001941 }
1942
1943 btrfs_cpu_key_to_disk(&disk_key, new_key);
1944 btrfs_set_item_key(eb, &disk_key, slot);
1945 btrfs_mark_buffer_dirty(eb);
1946 if (slot == 0)
1947 fixup_low_keys(trans, root, path, &disk_key, 1);
Zheng Yan31840ae2008-09-23 13:14:14 -04001948}
1949
1950/*
Chris Mason74123bd2007-02-02 11:05:29 -05001951 * try to push data from one node into the next node left in the
Chris Mason79f95c82007-03-01 15:16:26 -05001952 * tree.
Chris Masonaa5d6be2007-02-28 16:35:06 -05001953 *
1954 * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
1955 * error, and > 0 if there was no room in the left hand block.
Chris Mason74123bd2007-02-02 11:05:29 -05001956 */
Chris Mason98ed5172008-01-03 10:01:48 -05001957static int push_node_left(struct btrfs_trans_handle *trans,
1958 struct btrfs_root *root, struct extent_buffer *dst,
Chris Mason971a1f62008-04-24 10:54:32 -04001959 struct extent_buffer *src, int empty)
Chris Masonbe0e5c02007-01-26 15:51:26 -05001960{
Chris Masonbe0e5c02007-01-26 15:51:26 -05001961 int push_items = 0;
Chris Masonbb803952007-03-01 12:04:21 -05001962 int src_nritems;
1963 int dst_nritems;
Chris Masonaa5d6be2007-02-28 16:35:06 -05001964 int ret = 0;
Chris Masonbe0e5c02007-01-26 15:51:26 -05001965
Chris Mason5f39d392007-10-15 16:14:19 -04001966 src_nritems = btrfs_header_nritems(src);
1967 dst_nritems = btrfs_header_nritems(dst);
Chris Mason123abc82007-03-14 14:14:43 -04001968 push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
Chris Mason7bb86312007-12-11 09:25:06 -05001969 WARN_ON(btrfs_header_generation(src) != trans->transid);
1970 WARN_ON(btrfs_header_generation(dst) != trans->transid);
Chris Mason54aa1f42007-06-22 14:16:25 -04001971
Chris Masonbce4eae2008-04-24 14:42:46 -04001972 if (!empty && src_nritems <= 8)
Chris Mason971a1f62008-04-24 10:54:32 -04001973 return 1;
1974
Chris Masond3977122009-01-05 21:25:51 -05001975 if (push_items <= 0)
Chris Masonbe0e5c02007-01-26 15:51:26 -05001976 return 1;
1977
Chris Masonbce4eae2008-04-24 14:42:46 -04001978 if (empty) {
Chris Mason971a1f62008-04-24 10:54:32 -04001979 push_items = min(src_nritems, push_items);
Chris Masonbce4eae2008-04-24 14:42:46 -04001980 if (push_items < src_nritems) {
1981 /* leave at least 8 pointers in the node if
1982 * we aren't going to empty it
1983 */
1984 if (src_nritems - push_items < 8) {
1985 if (push_items <= 8)
1986 return 1;
1987 push_items -= 8;
1988 }
1989 }
1990 } else
1991 push_items = min(src_nritems - 8, push_items);
Chris Mason79f95c82007-03-01 15:16:26 -05001992
Chris Mason5f39d392007-10-15 16:14:19 -04001993 copy_extent_buffer(dst, src,
1994 btrfs_node_key_ptr_offset(dst_nritems),
1995 btrfs_node_key_ptr_offset(0),
Chris Masond3977122009-01-05 21:25:51 -05001996 push_items * sizeof(struct btrfs_key_ptr));
Chris Mason5f39d392007-10-15 16:14:19 -04001997
Chris Masonbb803952007-03-01 12:04:21 -05001998 if (push_items < src_nritems) {
Chris Mason5f39d392007-10-15 16:14:19 -04001999 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
2000 btrfs_node_key_ptr_offset(push_items),
2001 (src_nritems - push_items) *
2002 sizeof(struct btrfs_key_ptr));
Chris Masonbb803952007-03-01 12:04:21 -05002003 }
Chris Mason5f39d392007-10-15 16:14:19 -04002004 btrfs_set_header_nritems(src, src_nritems - push_items);
2005 btrfs_set_header_nritems(dst, dst_nritems + push_items);
2006 btrfs_mark_buffer_dirty(src);
2007 btrfs_mark_buffer_dirty(dst);
Zheng Yan31840ae2008-09-23 13:14:14 -04002008
Chris Masonbb803952007-03-01 12:04:21 -05002009 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002010}
2011
Chris Mason97571fd2007-02-24 13:39:08 -05002012/*
Chris Mason79f95c82007-03-01 15:16:26 -05002013 * try to push data from one node into the next node right in the
2014 * tree.
2015 *
2016 * returns 0 if some ptrs were pushed, < 0 if there was some horrible
2017 * error, and > 0 if there was no room in the right hand block.
2018 *
2019 * this will only push up to 1/2 the contents of the left node over
2020 */
Chris Mason5f39d392007-10-15 16:14:19 -04002021static int balance_node_right(struct btrfs_trans_handle *trans,
2022 struct btrfs_root *root,
2023 struct extent_buffer *dst,
2024 struct extent_buffer *src)
Chris Mason79f95c82007-03-01 15:16:26 -05002025{
Chris Mason79f95c82007-03-01 15:16:26 -05002026 int push_items = 0;
2027 int max_push;
2028 int src_nritems;
2029 int dst_nritems;
2030 int ret = 0;
Chris Mason79f95c82007-03-01 15:16:26 -05002031
Chris Mason7bb86312007-12-11 09:25:06 -05002032 WARN_ON(btrfs_header_generation(src) != trans->transid);
2033 WARN_ON(btrfs_header_generation(dst) != trans->transid);
2034
Chris Mason5f39d392007-10-15 16:14:19 -04002035 src_nritems = btrfs_header_nritems(src);
2036 dst_nritems = btrfs_header_nritems(dst);
Chris Mason123abc82007-03-14 14:14:43 -04002037 push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
Chris Masond3977122009-01-05 21:25:51 -05002038 if (push_items <= 0)
Chris Mason79f95c82007-03-01 15:16:26 -05002039 return 1;
Chris Masonbce4eae2008-04-24 14:42:46 -04002040
Chris Masond3977122009-01-05 21:25:51 -05002041 if (src_nritems < 4)
Chris Masonbce4eae2008-04-24 14:42:46 -04002042 return 1;
Chris Mason79f95c82007-03-01 15:16:26 -05002043
2044 max_push = src_nritems / 2 + 1;
2045 /* don't try to empty the node */
Chris Masond3977122009-01-05 21:25:51 -05002046 if (max_push >= src_nritems)
Chris Mason79f95c82007-03-01 15:16:26 -05002047 return 1;
Yan252c38f2007-08-29 09:11:44 -04002048
Chris Mason79f95c82007-03-01 15:16:26 -05002049 if (max_push < push_items)
2050 push_items = max_push;
2051
Chris Mason5f39d392007-10-15 16:14:19 -04002052 memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
2053 btrfs_node_key_ptr_offset(0),
2054 (dst_nritems) *
2055 sizeof(struct btrfs_key_ptr));
Chris Masond6025572007-03-30 14:27:56 -04002056
Chris Mason5f39d392007-10-15 16:14:19 -04002057 copy_extent_buffer(dst, src,
2058 btrfs_node_key_ptr_offset(0),
2059 btrfs_node_key_ptr_offset(src_nritems - push_items),
Chris Masond3977122009-01-05 21:25:51 -05002060 push_items * sizeof(struct btrfs_key_ptr));
Chris Mason79f95c82007-03-01 15:16:26 -05002061
Chris Mason5f39d392007-10-15 16:14:19 -04002062 btrfs_set_header_nritems(src, src_nritems - push_items);
2063 btrfs_set_header_nritems(dst, dst_nritems + push_items);
Chris Mason79f95c82007-03-01 15:16:26 -05002064
Chris Mason5f39d392007-10-15 16:14:19 -04002065 btrfs_mark_buffer_dirty(src);
2066 btrfs_mark_buffer_dirty(dst);
Zheng Yan31840ae2008-09-23 13:14:14 -04002067
Chris Mason79f95c82007-03-01 15:16:26 -05002068 return ret;
2069}
2070
2071/*
Chris Mason97571fd2007-02-24 13:39:08 -05002072 * helper function to insert a new root level in the tree.
2073 * A new node is allocated, and a single item is inserted to
2074 * point to the existing root
Chris Masonaa5d6be2007-02-28 16:35:06 -05002075 *
2076 * returns zero on success or < 0 on failure.
Chris Mason97571fd2007-02-24 13:39:08 -05002077 */
Chris Masond3977122009-01-05 21:25:51 -05002078static noinline int insert_new_root(struct btrfs_trans_handle *trans,
Chris Mason5f39d392007-10-15 16:14:19 -04002079 struct btrfs_root *root,
2080 struct btrfs_path *path, int level)
Chris Mason5c680ed2007-02-22 11:39:13 -05002081{
Chris Mason7bb86312007-12-11 09:25:06 -05002082 u64 lower_gen;
Chris Mason5f39d392007-10-15 16:14:19 -04002083 struct extent_buffer *lower;
2084 struct extent_buffer *c;
Chris Mason925baed2008-06-25 16:01:30 -04002085 struct extent_buffer *old;
Chris Mason5f39d392007-10-15 16:14:19 -04002086 struct btrfs_disk_key lower_key;
Chris Mason5c680ed2007-02-22 11:39:13 -05002087
2088 BUG_ON(path->nodes[level]);
2089 BUG_ON(path->nodes[level-1] != root->node);
2090
Chris Mason7bb86312007-12-11 09:25:06 -05002091 lower = path->nodes[level-1];
2092 if (level == 1)
2093 btrfs_item_key(lower, &lower_key, 0);
2094 else
2095 btrfs_node_key(lower, &lower_key, 0);
2096
Zheng Yan31840ae2008-09-23 13:14:14 -04002097 c = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002098 root->root_key.objectid, &lower_key,
Arne Jansen66d7e7f2011-09-12 15:26:38 +02002099 level, root->node->start, 0, 0);
Chris Mason5f39d392007-10-15 16:14:19 -04002100 if (IS_ERR(c))
2101 return PTR_ERR(c);
Chris Mason925baed2008-06-25 16:01:30 -04002102
Yan, Zhengf0486c62010-05-16 10:46:25 -04002103 root_add_used(root, root->nodesize);
2104
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002105 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
Chris Mason5f39d392007-10-15 16:14:19 -04002106 btrfs_set_header_nritems(c, 1);
2107 btrfs_set_header_level(c, level);
Chris Masondb945352007-10-15 16:15:53 -04002108 btrfs_set_header_bytenr(c, c->start);
Chris Mason5f39d392007-10-15 16:14:19 -04002109 btrfs_set_header_generation(c, trans->transid);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002110 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
Chris Mason5f39d392007-10-15 16:14:19 -04002111 btrfs_set_header_owner(c, root->root_key.objectid);
Chris Masond5719762007-03-23 10:01:08 -04002112
Chris Mason5f39d392007-10-15 16:14:19 -04002113 write_extent_buffer(c, root->fs_info->fsid,
2114 (unsigned long)btrfs_header_fsid(c),
2115 BTRFS_FSID_SIZE);
Chris Masone17cade2008-04-15 15:41:47 -04002116
2117 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
2118 (unsigned long)btrfs_header_chunk_tree_uuid(c),
2119 BTRFS_UUID_SIZE);
2120
Chris Mason5f39d392007-10-15 16:14:19 -04002121 btrfs_set_node_key(c, &lower_key, 0);
Chris Masondb945352007-10-15 16:15:53 -04002122 btrfs_set_node_blockptr(c, 0, lower->start);
Chris Mason7bb86312007-12-11 09:25:06 -05002123 lower_gen = btrfs_header_generation(lower);
Zheng Yan31840ae2008-09-23 13:14:14 -04002124 WARN_ON(lower_gen != trans->transid);
Chris Mason7bb86312007-12-11 09:25:06 -05002125
2126 btrfs_set_node_ptr_generation(c, 0, lower_gen);
Chris Mason5f39d392007-10-15 16:14:19 -04002127
2128 btrfs_mark_buffer_dirty(c);
Chris Masond5719762007-03-23 10:01:08 -04002129
Chris Mason925baed2008-06-25 16:01:30 -04002130 old = root->node;
Chris Mason240f62c2011-03-23 14:54:42 -04002131 rcu_assign_pointer(root->node, c);
Chris Mason925baed2008-06-25 16:01:30 -04002132
2133 /* the super has an extra ref to root->node */
2134 free_extent_buffer(old);
2135
Chris Mason0b86a832008-03-24 15:01:56 -04002136 add_root_to_dirty_list(root);
Chris Mason5f39d392007-10-15 16:14:19 -04002137 extent_buffer_get(c);
2138 path->nodes[level] = c;
Chris Masonbd681512011-07-16 15:23:14 -04002139 path->locks[level] = BTRFS_WRITE_LOCK;
Chris Mason5c680ed2007-02-22 11:39:13 -05002140 path->slots[level] = 0;
2141 return 0;
2142}
2143
Chris Mason74123bd2007-02-02 11:05:29 -05002144/*
2145 * worker function to insert a single pointer in a node.
2146 * the node should have enough room for the pointer already
Chris Mason97571fd2007-02-24 13:39:08 -05002147 *
Chris Mason74123bd2007-02-02 11:05:29 -05002148 * slot and level indicate where you want the key to go, and
2149 * blocknr is the block the key points to.
2150 */
Jeff Mahoney143bede2012-03-01 14:56:26 +01002151static void insert_ptr(struct btrfs_trans_handle *trans,
2152 struct btrfs_root *root, struct btrfs_path *path,
2153 struct btrfs_disk_key *key, u64 bytenr,
2154 int slot, int level)
Chris Mason74123bd2007-02-02 11:05:29 -05002155{
Chris Mason5f39d392007-10-15 16:14:19 -04002156 struct extent_buffer *lower;
Chris Mason74123bd2007-02-02 11:05:29 -05002157 int nritems;
Chris Mason5c680ed2007-02-22 11:39:13 -05002158
2159 BUG_ON(!path->nodes[level]);
Yan, Zhengf0486c62010-05-16 10:46:25 -04002160 btrfs_assert_tree_locked(path->nodes[level]);
Chris Mason5f39d392007-10-15 16:14:19 -04002161 lower = path->nodes[level];
2162 nritems = btrfs_header_nritems(lower);
Stoyan Gaydarovc2934982009-04-02 17:05:11 -04002163 BUG_ON(slot > nritems);
Jeff Mahoney143bede2012-03-01 14:56:26 +01002164 BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root));
Chris Mason74123bd2007-02-02 11:05:29 -05002165 if (slot != nritems) {
Chris Mason5f39d392007-10-15 16:14:19 -04002166 memmove_extent_buffer(lower,
2167 btrfs_node_key_ptr_offset(slot + 1),
2168 btrfs_node_key_ptr_offset(slot),
Chris Masond6025572007-03-30 14:27:56 -04002169 (nritems - slot) * sizeof(struct btrfs_key_ptr));
Chris Mason74123bd2007-02-02 11:05:29 -05002170 }
Chris Mason5f39d392007-10-15 16:14:19 -04002171 btrfs_set_node_key(lower, key, slot);
Chris Masondb945352007-10-15 16:15:53 -04002172 btrfs_set_node_blockptr(lower, slot, bytenr);
Chris Mason74493f72007-12-11 09:25:06 -05002173 WARN_ON(trans->transid == 0);
2174 btrfs_set_node_ptr_generation(lower, slot, trans->transid);
Chris Mason5f39d392007-10-15 16:14:19 -04002175 btrfs_set_header_nritems(lower, nritems + 1);
2176 btrfs_mark_buffer_dirty(lower);
Chris Mason74123bd2007-02-02 11:05:29 -05002177}
2178
Chris Mason97571fd2007-02-24 13:39:08 -05002179/*
2180 * split the node at the specified level in path in two.
2181 * The path is corrected to point to the appropriate node after the split
2182 *
2183 * Before splitting this tries to make some room in the node by pushing
2184 * left and right, if either one works, it returns right away.
Chris Masonaa5d6be2007-02-28 16:35:06 -05002185 *
2186 * returns 0 on success and < 0 on failure
Chris Mason97571fd2007-02-24 13:39:08 -05002187 */
Chris Masone02119d2008-09-05 16:13:11 -04002188static noinline int split_node(struct btrfs_trans_handle *trans,
2189 struct btrfs_root *root,
2190 struct btrfs_path *path, int level)
Chris Masonbe0e5c02007-01-26 15:51:26 -05002191{
Chris Mason5f39d392007-10-15 16:14:19 -04002192 struct extent_buffer *c;
2193 struct extent_buffer *split;
2194 struct btrfs_disk_key disk_key;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002195 int mid;
Chris Mason5c680ed2007-02-22 11:39:13 -05002196 int ret;
Chris Mason7518a232007-03-12 12:01:18 -04002197 u32 c_nritems;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002198
Chris Mason5f39d392007-10-15 16:14:19 -04002199 c = path->nodes[level];
Chris Mason7bb86312007-12-11 09:25:06 -05002200 WARN_ON(btrfs_header_generation(c) != trans->transid);
Chris Mason5f39d392007-10-15 16:14:19 -04002201 if (c == root->node) {
Chris Mason5c680ed2007-02-22 11:39:13 -05002202 /* trying to split the root, lets make a new one */
Chris Masone089f052007-03-16 16:20:31 -04002203 ret = insert_new_root(trans, root, path, level + 1);
Chris Mason5c680ed2007-02-22 11:39:13 -05002204 if (ret)
2205 return ret;
Chris Masonb3612422009-05-13 19:12:15 -04002206 } else {
Chris Masone66f7092007-04-20 13:16:02 -04002207 ret = push_nodes_for_insert(trans, root, path, level);
Chris Mason5f39d392007-10-15 16:14:19 -04002208 c = path->nodes[level];
2209 if (!ret && btrfs_header_nritems(c) <
Chris Masonc448acf2008-04-24 09:34:34 -04002210 BTRFS_NODEPTRS_PER_BLOCK(root) - 3)
Chris Masone66f7092007-04-20 13:16:02 -04002211 return 0;
Chris Mason54aa1f42007-06-22 14:16:25 -04002212 if (ret < 0)
2213 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002214 }
Chris Masone66f7092007-04-20 13:16:02 -04002215
Chris Mason5f39d392007-10-15 16:14:19 -04002216 c_nritems = btrfs_header_nritems(c);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002217 mid = (c_nritems + 1) / 2;
2218 btrfs_node_key(c, &disk_key, mid);
Chris Mason7bb86312007-12-11 09:25:06 -05002219
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002220 split = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
Zheng Yan31840ae2008-09-23 13:14:14 -04002221 root->root_key.objectid,
Arne Jansen66d7e7f2011-09-12 15:26:38 +02002222 &disk_key, level, c->start, 0, 0);
Chris Mason5f39d392007-10-15 16:14:19 -04002223 if (IS_ERR(split))
2224 return PTR_ERR(split);
Chris Mason54aa1f42007-06-22 14:16:25 -04002225
Yan, Zhengf0486c62010-05-16 10:46:25 -04002226 root_add_used(root, root->nodesize);
2227
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002228 memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header));
Chris Mason5f39d392007-10-15 16:14:19 -04002229 btrfs_set_header_level(split, btrfs_header_level(c));
Chris Masondb945352007-10-15 16:15:53 -04002230 btrfs_set_header_bytenr(split, split->start);
Chris Mason5f39d392007-10-15 16:14:19 -04002231 btrfs_set_header_generation(split, trans->transid);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002232 btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV);
Chris Mason5f39d392007-10-15 16:14:19 -04002233 btrfs_set_header_owner(split, root->root_key.objectid);
2234 write_extent_buffer(split, root->fs_info->fsid,
2235 (unsigned long)btrfs_header_fsid(split),
2236 BTRFS_FSID_SIZE);
Chris Masone17cade2008-04-15 15:41:47 -04002237 write_extent_buffer(split, root->fs_info->chunk_tree_uuid,
2238 (unsigned long)btrfs_header_chunk_tree_uuid(split),
2239 BTRFS_UUID_SIZE);
Chris Mason5f39d392007-10-15 16:14:19 -04002240
Chris Mason5f39d392007-10-15 16:14:19 -04002241
2242 copy_extent_buffer(split, c,
2243 btrfs_node_key_ptr_offset(0),
2244 btrfs_node_key_ptr_offset(mid),
2245 (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
2246 btrfs_set_header_nritems(split, c_nritems - mid);
2247 btrfs_set_header_nritems(c, mid);
Chris Masonaa5d6be2007-02-28 16:35:06 -05002248 ret = 0;
2249
Chris Mason5f39d392007-10-15 16:14:19 -04002250 btrfs_mark_buffer_dirty(c);
2251 btrfs_mark_buffer_dirty(split);
2252
Jeff Mahoney143bede2012-03-01 14:56:26 +01002253 insert_ptr(trans, root, path, &disk_key, split->start,
2254 path->slots[level + 1] + 1, level + 1);
Chris Masonaa5d6be2007-02-28 16:35:06 -05002255
Chris Mason5de08d72007-02-24 06:24:44 -05002256 if (path->slots[level] >= mid) {
Chris Mason5c680ed2007-02-22 11:39:13 -05002257 path->slots[level] -= mid;
Chris Mason925baed2008-06-25 16:01:30 -04002258 btrfs_tree_unlock(c);
Chris Mason5f39d392007-10-15 16:14:19 -04002259 free_extent_buffer(c);
2260 path->nodes[level] = split;
Chris Mason5c680ed2007-02-22 11:39:13 -05002261 path->slots[level + 1] += 1;
2262 } else {
Chris Mason925baed2008-06-25 16:01:30 -04002263 btrfs_tree_unlock(split);
Chris Mason5f39d392007-10-15 16:14:19 -04002264 free_extent_buffer(split);
Chris Masonbe0e5c02007-01-26 15:51:26 -05002265 }
Chris Masonaa5d6be2007-02-28 16:35:06 -05002266 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002267}
2268
Chris Mason74123bd2007-02-02 11:05:29 -05002269/*
2270 * how many bytes are required to store the items in a leaf. start
2271 * and nr indicate which items in the leaf to check. This totals up the
2272 * space used both by the item structs and the item data
2273 */
Chris Mason5f39d392007-10-15 16:14:19 -04002274static int leaf_space_used(struct extent_buffer *l, int start, int nr)
Chris Masonbe0e5c02007-01-26 15:51:26 -05002275{
2276 int data_len;
Chris Mason5f39d392007-10-15 16:14:19 -04002277 int nritems = btrfs_header_nritems(l);
Chris Masond4dbff92007-04-04 14:08:15 -04002278 int end = min(nritems, start + nr) - 1;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002279
2280 if (!nr)
2281 return 0;
Chris Mason5f39d392007-10-15 16:14:19 -04002282 data_len = btrfs_item_end_nr(l, start);
2283 data_len = data_len - btrfs_item_offset_nr(l, end);
Chris Mason0783fcf2007-03-12 20:12:07 -04002284 data_len += sizeof(struct btrfs_item) * nr;
Chris Masond4dbff92007-04-04 14:08:15 -04002285 WARN_ON(data_len < 0);
Chris Masonbe0e5c02007-01-26 15:51:26 -05002286 return data_len;
2287}
2288
Chris Mason74123bd2007-02-02 11:05:29 -05002289/*
Chris Masond4dbff92007-04-04 14:08:15 -04002290 * The space between the end of the leaf items and
2291 * the start of the leaf data. IOW, how much room
2292 * the leaf has left for both items and data
2293 */
Chris Masond3977122009-01-05 21:25:51 -05002294noinline int btrfs_leaf_free_space(struct btrfs_root *root,
Chris Masone02119d2008-09-05 16:13:11 -04002295 struct extent_buffer *leaf)
Chris Masond4dbff92007-04-04 14:08:15 -04002296{
Chris Mason5f39d392007-10-15 16:14:19 -04002297 int nritems = btrfs_header_nritems(leaf);
2298 int ret;
2299 ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
2300 if (ret < 0) {
Chris Masond3977122009-01-05 21:25:51 -05002301 printk(KERN_CRIT "leaf free space ret %d, leaf data size %lu, "
2302 "used %d nritems %d\n",
Jens Axboeae2f5412007-10-19 09:22:59 -04002303 ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root),
Chris Mason5f39d392007-10-15 16:14:19 -04002304 leaf_space_used(leaf, 0, nritems), nritems);
2305 }
2306 return ret;
Chris Masond4dbff92007-04-04 14:08:15 -04002307}
2308
Chris Mason99d8f832010-07-07 10:51:48 -04002309/*
2310 * min slot controls the lowest index we're willing to push to the
2311 * right. We'll push up to and including min_slot, but no lower
2312 */
Chris Mason44871b12009-03-13 10:04:31 -04002313static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
2314 struct btrfs_root *root,
2315 struct btrfs_path *path,
2316 int data_size, int empty,
2317 struct extent_buffer *right,
Chris Mason99d8f832010-07-07 10:51:48 -04002318 int free_space, u32 left_nritems,
2319 u32 min_slot)
Chris Mason00ec4c52007-02-24 12:47:20 -05002320{
Chris Mason5f39d392007-10-15 16:14:19 -04002321 struct extent_buffer *left = path->nodes[0];
Chris Mason44871b12009-03-13 10:04:31 -04002322 struct extent_buffer *upper = path->nodes[1];
Chris Mason5f39d392007-10-15 16:14:19 -04002323 struct btrfs_disk_key disk_key;
Chris Mason00ec4c52007-02-24 12:47:20 -05002324 int slot;
Chris Mason34a38212007-11-07 13:31:03 -05002325 u32 i;
Chris Mason00ec4c52007-02-24 12:47:20 -05002326 int push_space = 0;
2327 int push_items = 0;
Chris Mason0783fcf2007-03-12 20:12:07 -04002328 struct btrfs_item *item;
Chris Mason34a38212007-11-07 13:31:03 -05002329 u32 nr;
Chris Mason7518a232007-03-12 12:01:18 -04002330 u32 right_nritems;
Chris Mason5f39d392007-10-15 16:14:19 -04002331 u32 data_end;
Chris Masondb945352007-10-15 16:15:53 -04002332 u32 this_item_size;
Chris Mason00ec4c52007-02-24 12:47:20 -05002333
Chris Mason34a38212007-11-07 13:31:03 -05002334 if (empty)
2335 nr = 0;
2336 else
Chris Mason99d8f832010-07-07 10:51:48 -04002337 nr = max_t(u32, 1, min_slot);
Chris Mason34a38212007-11-07 13:31:03 -05002338
Zheng Yan31840ae2008-09-23 13:14:14 -04002339 if (path->slots[0] >= left_nritems)
Yan Zheng87b29b22008-12-17 10:21:48 -05002340 push_space += data_size;
Zheng Yan31840ae2008-09-23 13:14:14 -04002341
Chris Mason44871b12009-03-13 10:04:31 -04002342 slot = path->slots[1];
Chris Mason34a38212007-11-07 13:31:03 -05002343 i = left_nritems - 1;
2344 while (i >= nr) {
Chris Mason5f39d392007-10-15 16:14:19 -04002345 item = btrfs_item_nr(left, i);
Chris Masondb945352007-10-15 16:15:53 -04002346
Zheng Yan31840ae2008-09-23 13:14:14 -04002347 if (!empty && push_items > 0) {
2348 if (path->slots[0] > i)
2349 break;
2350 if (path->slots[0] == i) {
2351 int space = btrfs_leaf_free_space(root, left);
2352 if (space + push_space * 2 > free_space)
2353 break;
2354 }
2355 }
2356
Chris Mason00ec4c52007-02-24 12:47:20 -05002357 if (path->slots[0] == i)
Yan Zheng87b29b22008-12-17 10:21:48 -05002358 push_space += data_size;
Chris Masondb945352007-10-15 16:15:53 -04002359
Chris Masondb945352007-10-15 16:15:53 -04002360 this_item_size = btrfs_item_size(left, item);
2361 if (this_item_size + sizeof(*item) + push_space > free_space)
Chris Mason00ec4c52007-02-24 12:47:20 -05002362 break;
Zheng Yan31840ae2008-09-23 13:14:14 -04002363
Chris Mason00ec4c52007-02-24 12:47:20 -05002364 push_items++;
Chris Masondb945352007-10-15 16:15:53 -04002365 push_space += this_item_size + sizeof(*item);
Chris Mason34a38212007-11-07 13:31:03 -05002366 if (i == 0)
2367 break;
2368 i--;
Chris Masondb945352007-10-15 16:15:53 -04002369 }
Chris Mason5f39d392007-10-15 16:14:19 -04002370
Chris Mason925baed2008-06-25 16:01:30 -04002371 if (push_items == 0)
2372 goto out_unlock;
Chris Mason5f39d392007-10-15 16:14:19 -04002373
Chris Mason34a38212007-11-07 13:31:03 -05002374 if (!empty && push_items == left_nritems)
Chris Masona429e512007-04-18 16:15:28 -04002375 WARN_ON(1);
Chris Mason5f39d392007-10-15 16:14:19 -04002376
Chris Mason00ec4c52007-02-24 12:47:20 -05002377 /* push left to right */
Chris Mason5f39d392007-10-15 16:14:19 -04002378 right_nritems = btrfs_header_nritems(right);
Chris Mason34a38212007-11-07 13:31:03 -05002379
Chris Mason5f39d392007-10-15 16:14:19 -04002380 push_space = btrfs_item_end_nr(left, left_nritems - push_items);
Chris Mason123abc82007-03-14 14:14:43 -04002381 push_space -= leaf_data_end(root, left);
Chris Mason5f39d392007-10-15 16:14:19 -04002382
Chris Mason00ec4c52007-02-24 12:47:20 -05002383 /* make room in the right data area */
Chris Mason5f39d392007-10-15 16:14:19 -04002384 data_end = leaf_data_end(root, right);
2385 memmove_extent_buffer(right,
2386 btrfs_leaf_data(right) + data_end - push_space,
2387 btrfs_leaf_data(right) + data_end,
2388 BTRFS_LEAF_DATA_SIZE(root) - data_end);
2389
Chris Mason00ec4c52007-02-24 12:47:20 -05002390 /* copy from the left data area */
Chris Mason5f39d392007-10-15 16:14:19 -04002391 copy_extent_buffer(right, left, btrfs_leaf_data(right) +
Chris Masond6025572007-03-30 14:27:56 -04002392 BTRFS_LEAF_DATA_SIZE(root) - push_space,
2393 btrfs_leaf_data(left) + leaf_data_end(root, left),
2394 push_space);
Chris Mason5f39d392007-10-15 16:14:19 -04002395
2396 memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
2397 btrfs_item_nr_offset(0),
2398 right_nritems * sizeof(struct btrfs_item));
2399
Chris Mason00ec4c52007-02-24 12:47:20 -05002400 /* copy the items from left to right */
Chris Mason5f39d392007-10-15 16:14:19 -04002401 copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
2402 btrfs_item_nr_offset(left_nritems - push_items),
2403 push_items * sizeof(struct btrfs_item));
Chris Mason00ec4c52007-02-24 12:47:20 -05002404
2405 /* update the item pointers */
Chris Mason7518a232007-03-12 12:01:18 -04002406 right_nritems += push_items;
Chris Mason5f39d392007-10-15 16:14:19 -04002407 btrfs_set_header_nritems(right, right_nritems);
Chris Mason123abc82007-03-14 14:14:43 -04002408 push_space = BTRFS_LEAF_DATA_SIZE(root);
Chris Mason7518a232007-03-12 12:01:18 -04002409 for (i = 0; i < right_nritems; i++) {
Chris Mason5f39d392007-10-15 16:14:19 -04002410 item = btrfs_item_nr(right, i);
Chris Masondb945352007-10-15 16:15:53 -04002411 push_space -= btrfs_item_size(right, item);
2412 btrfs_set_item_offset(right, item, push_space);
2413 }
2414
Chris Mason7518a232007-03-12 12:01:18 -04002415 left_nritems -= push_items;
Chris Mason5f39d392007-10-15 16:14:19 -04002416 btrfs_set_header_nritems(left, left_nritems);
Chris Mason00ec4c52007-02-24 12:47:20 -05002417
Chris Mason34a38212007-11-07 13:31:03 -05002418 if (left_nritems)
2419 btrfs_mark_buffer_dirty(left);
Yan, Zhengf0486c62010-05-16 10:46:25 -04002420 else
2421 clean_tree_block(trans, root, left);
2422
Chris Mason5f39d392007-10-15 16:14:19 -04002423 btrfs_mark_buffer_dirty(right);
Chris Masona429e512007-04-18 16:15:28 -04002424
Chris Mason5f39d392007-10-15 16:14:19 -04002425 btrfs_item_key(right, &disk_key, 0);
2426 btrfs_set_node_key(upper, &disk_key, slot + 1);
Chris Masond6025572007-03-30 14:27:56 -04002427 btrfs_mark_buffer_dirty(upper);
Chris Mason02217ed2007-03-02 16:08:05 -05002428
Chris Mason00ec4c52007-02-24 12:47:20 -05002429 /* then fixup the leaf pointer in the path */
Chris Mason7518a232007-03-12 12:01:18 -04002430 if (path->slots[0] >= left_nritems) {
2431 path->slots[0] -= left_nritems;
Chris Mason925baed2008-06-25 16:01:30 -04002432 if (btrfs_header_nritems(path->nodes[0]) == 0)
2433 clean_tree_block(trans, root, path->nodes[0]);
2434 btrfs_tree_unlock(path->nodes[0]);
Chris Mason5f39d392007-10-15 16:14:19 -04002435 free_extent_buffer(path->nodes[0]);
2436 path->nodes[0] = right;
Chris Mason00ec4c52007-02-24 12:47:20 -05002437 path->slots[1] += 1;
2438 } else {
Chris Mason925baed2008-06-25 16:01:30 -04002439 btrfs_tree_unlock(right);
Chris Mason5f39d392007-10-15 16:14:19 -04002440 free_extent_buffer(right);
Chris Mason00ec4c52007-02-24 12:47:20 -05002441 }
2442 return 0;
Chris Mason925baed2008-06-25 16:01:30 -04002443
2444out_unlock:
2445 btrfs_tree_unlock(right);
2446 free_extent_buffer(right);
2447 return 1;
Chris Mason00ec4c52007-02-24 12:47:20 -05002448}
Chris Mason925baed2008-06-25 16:01:30 -04002449
Chris Mason00ec4c52007-02-24 12:47:20 -05002450/*
Chris Mason44871b12009-03-13 10:04:31 -04002451 * push some data in the path leaf to the right, trying to free up at
2452 * least data_size bytes. returns zero if the push worked, nonzero otherwise
2453 *
2454 * returns 1 if the push failed because the other node didn't have enough
2455 * room, 0 if everything worked out and < 0 if there were major errors.
Chris Mason99d8f832010-07-07 10:51:48 -04002456 *
2457 * this will push starting from min_slot to the end of the leaf. It won't
2458 * push any slot lower than min_slot
Chris Mason44871b12009-03-13 10:04:31 -04002459 */
2460static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
Chris Mason99d8f832010-07-07 10:51:48 -04002461 *root, struct btrfs_path *path,
2462 int min_data_size, int data_size,
2463 int empty, u32 min_slot)
Chris Mason44871b12009-03-13 10:04:31 -04002464{
2465 struct extent_buffer *left = path->nodes[0];
2466 struct extent_buffer *right;
2467 struct extent_buffer *upper;
2468 int slot;
2469 int free_space;
2470 u32 left_nritems;
2471 int ret;
2472
2473 if (!path->nodes[1])
2474 return 1;
2475
2476 slot = path->slots[1];
2477 upper = path->nodes[1];
2478 if (slot >= btrfs_header_nritems(upper) - 1)
2479 return 1;
2480
2481 btrfs_assert_tree_locked(path->nodes[1]);
2482
2483 right = read_node_slot(root, upper, slot + 1);
Tsutomu Itoh91ca3382011-01-05 02:32:22 +00002484 if (right == NULL)
2485 return 1;
2486
Chris Mason44871b12009-03-13 10:04:31 -04002487 btrfs_tree_lock(right);
2488 btrfs_set_lock_blocking(right);
2489
2490 free_space = btrfs_leaf_free_space(root, right);
2491 if (free_space < data_size)
2492 goto out_unlock;
2493
2494 /* cow and double check */
2495 ret = btrfs_cow_block(trans, root, right, upper,
2496 slot + 1, &right);
2497 if (ret)
2498 goto out_unlock;
2499
2500 free_space = btrfs_leaf_free_space(root, right);
2501 if (free_space < data_size)
2502 goto out_unlock;
2503
2504 left_nritems = btrfs_header_nritems(left);
2505 if (left_nritems == 0)
2506 goto out_unlock;
2507
Chris Mason99d8f832010-07-07 10:51:48 -04002508 return __push_leaf_right(trans, root, path, min_data_size, empty,
2509 right, free_space, left_nritems, min_slot);
Chris Mason44871b12009-03-13 10:04:31 -04002510out_unlock:
2511 btrfs_tree_unlock(right);
2512 free_extent_buffer(right);
2513 return 1;
2514}
2515
2516/*
Chris Mason74123bd2007-02-02 11:05:29 -05002517 * push some data in the path leaf to the left, trying to free up at
2518 * least data_size bytes. returns zero if the push worked, nonzero otherwise
Chris Mason99d8f832010-07-07 10:51:48 -04002519 *
2520 * max_slot can put a limit on how far into the leaf we'll push items. The
2521 * item at 'max_slot' won't be touched. Use (u32)-1 to make us do all the
2522 * items
Chris Mason74123bd2007-02-02 11:05:29 -05002523 */
Chris Mason44871b12009-03-13 10:04:31 -04002524static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
2525 struct btrfs_root *root,
2526 struct btrfs_path *path, int data_size,
2527 int empty, struct extent_buffer *left,
Chris Mason99d8f832010-07-07 10:51:48 -04002528 int free_space, u32 right_nritems,
2529 u32 max_slot)
Chris Masonbe0e5c02007-01-26 15:51:26 -05002530{
Chris Mason5f39d392007-10-15 16:14:19 -04002531 struct btrfs_disk_key disk_key;
2532 struct extent_buffer *right = path->nodes[0];
Chris Masonbe0e5c02007-01-26 15:51:26 -05002533 int i;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002534 int push_space = 0;
2535 int push_items = 0;
Chris Mason0783fcf2007-03-12 20:12:07 -04002536 struct btrfs_item *item;
Chris Mason7518a232007-03-12 12:01:18 -04002537 u32 old_left_nritems;
Chris Mason34a38212007-11-07 13:31:03 -05002538 u32 nr;
Chris Masonaa5d6be2007-02-28 16:35:06 -05002539 int ret = 0;
Chris Masondb945352007-10-15 16:15:53 -04002540 u32 this_item_size;
2541 u32 old_left_item_size;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002542
Chris Mason34a38212007-11-07 13:31:03 -05002543 if (empty)
Chris Mason99d8f832010-07-07 10:51:48 -04002544 nr = min(right_nritems, max_slot);
Chris Mason34a38212007-11-07 13:31:03 -05002545 else
Chris Mason99d8f832010-07-07 10:51:48 -04002546 nr = min(right_nritems - 1, max_slot);
Chris Mason34a38212007-11-07 13:31:03 -05002547
2548 for (i = 0; i < nr; i++) {
Chris Mason5f39d392007-10-15 16:14:19 -04002549 item = btrfs_item_nr(right, i);
Chris Masondb945352007-10-15 16:15:53 -04002550
Zheng Yan31840ae2008-09-23 13:14:14 -04002551 if (!empty && push_items > 0) {
2552 if (path->slots[0] < i)
2553 break;
2554 if (path->slots[0] == i) {
2555 int space = btrfs_leaf_free_space(root, right);
2556 if (space + push_space * 2 > free_space)
2557 break;
2558 }
2559 }
2560
Chris Masonbe0e5c02007-01-26 15:51:26 -05002561 if (path->slots[0] == i)
Yan Zheng87b29b22008-12-17 10:21:48 -05002562 push_space += data_size;
Chris Masondb945352007-10-15 16:15:53 -04002563
2564 this_item_size = btrfs_item_size(right, item);
2565 if (this_item_size + sizeof(*item) + push_space > free_space)
Chris Masonbe0e5c02007-01-26 15:51:26 -05002566 break;
Chris Masondb945352007-10-15 16:15:53 -04002567
Chris Masonbe0e5c02007-01-26 15:51:26 -05002568 push_items++;
Chris Masondb945352007-10-15 16:15:53 -04002569 push_space += this_item_size + sizeof(*item);
Chris Masonbe0e5c02007-01-26 15:51:26 -05002570 }
Chris Masondb945352007-10-15 16:15:53 -04002571
Chris Masonbe0e5c02007-01-26 15:51:26 -05002572 if (push_items == 0) {
Chris Mason925baed2008-06-25 16:01:30 -04002573 ret = 1;
2574 goto out;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002575 }
Chris Mason34a38212007-11-07 13:31:03 -05002576 if (!empty && push_items == btrfs_header_nritems(right))
Chris Masona429e512007-04-18 16:15:28 -04002577 WARN_ON(1);
Chris Mason5f39d392007-10-15 16:14:19 -04002578
Chris Masonbe0e5c02007-01-26 15:51:26 -05002579 /* push data from right to left */
Chris Mason5f39d392007-10-15 16:14:19 -04002580 copy_extent_buffer(left, right,
2581 btrfs_item_nr_offset(btrfs_header_nritems(left)),
2582 btrfs_item_nr_offset(0),
2583 push_items * sizeof(struct btrfs_item));
2584
Chris Mason123abc82007-03-14 14:14:43 -04002585 push_space = BTRFS_LEAF_DATA_SIZE(root) -
Chris Masond3977122009-01-05 21:25:51 -05002586 btrfs_item_offset_nr(right, push_items - 1);
Chris Mason5f39d392007-10-15 16:14:19 -04002587
2588 copy_extent_buffer(left, right, btrfs_leaf_data(left) +
Chris Masond6025572007-03-30 14:27:56 -04002589 leaf_data_end(root, left) - push_space,
2590 btrfs_leaf_data(right) +
Chris Mason5f39d392007-10-15 16:14:19 -04002591 btrfs_item_offset_nr(right, push_items - 1),
Chris Masond6025572007-03-30 14:27:56 -04002592 push_space);
Chris Mason5f39d392007-10-15 16:14:19 -04002593 old_left_nritems = btrfs_header_nritems(left);
Yan Zheng87b29b22008-12-17 10:21:48 -05002594 BUG_ON(old_left_nritems <= 0);
Chris Masoneb60cea2007-02-02 09:18:22 -05002595
Chris Masondb945352007-10-15 16:15:53 -04002596 old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
Chris Mason0783fcf2007-03-12 20:12:07 -04002597 for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
Chris Mason5f39d392007-10-15 16:14:19 -04002598 u32 ioff;
Chris Masondb945352007-10-15 16:15:53 -04002599
Chris Mason5f39d392007-10-15 16:14:19 -04002600 item = btrfs_item_nr(left, i);
Chris Masondb945352007-10-15 16:15:53 -04002601
Chris Mason5f39d392007-10-15 16:14:19 -04002602 ioff = btrfs_item_offset(left, item);
2603 btrfs_set_item_offset(left, item,
Chris Masondb945352007-10-15 16:15:53 -04002604 ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size));
Chris Masonbe0e5c02007-01-26 15:51:26 -05002605 }
Chris Mason5f39d392007-10-15 16:14:19 -04002606 btrfs_set_header_nritems(left, old_left_nritems + push_items);
Chris Masonbe0e5c02007-01-26 15:51:26 -05002607
2608 /* fixup right node */
Chris Mason34a38212007-11-07 13:31:03 -05002609 if (push_items > right_nritems) {
Chris Masond3977122009-01-05 21:25:51 -05002610 printk(KERN_CRIT "push items %d nr %u\n", push_items,
2611 right_nritems);
Chris Mason34a38212007-11-07 13:31:03 -05002612 WARN_ON(1);
2613 }
Chris Mason5f39d392007-10-15 16:14:19 -04002614
Chris Mason34a38212007-11-07 13:31:03 -05002615 if (push_items < right_nritems) {
2616 push_space = btrfs_item_offset_nr(right, push_items - 1) -
2617 leaf_data_end(root, right);
2618 memmove_extent_buffer(right, btrfs_leaf_data(right) +
2619 BTRFS_LEAF_DATA_SIZE(root) - push_space,
2620 btrfs_leaf_data(right) +
2621 leaf_data_end(root, right), push_space);
2622
2623 memmove_extent_buffer(right, btrfs_item_nr_offset(0),
Chris Mason5f39d392007-10-15 16:14:19 -04002624 btrfs_item_nr_offset(push_items),
2625 (btrfs_header_nritems(right) - push_items) *
2626 sizeof(struct btrfs_item));
Chris Mason34a38212007-11-07 13:31:03 -05002627 }
Yaneef1c492007-11-26 10:58:13 -05002628 right_nritems -= push_items;
2629 btrfs_set_header_nritems(right, right_nritems);
Chris Mason123abc82007-03-14 14:14:43 -04002630 push_space = BTRFS_LEAF_DATA_SIZE(root);
Chris Mason5f39d392007-10-15 16:14:19 -04002631 for (i = 0; i < right_nritems; i++) {
2632 item = btrfs_item_nr(right, i);
Chris Masondb945352007-10-15 16:15:53 -04002633
Chris Masondb945352007-10-15 16:15:53 -04002634 push_space = push_space - btrfs_item_size(right, item);
2635 btrfs_set_item_offset(right, item, push_space);
2636 }
Chris Masoneb60cea2007-02-02 09:18:22 -05002637
Chris Mason5f39d392007-10-15 16:14:19 -04002638 btrfs_mark_buffer_dirty(left);
Chris Mason34a38212007-11-07 13:31:03 -05002639 if (right_nritems)
2640 btrfs_mark_buffer_dirty(right);
Yan, Zhengf0486c62010-05-16 10:46:25 -04002641 else
2642 clean_tree_block(trans, root, right);
Chris Mason098f59c2007-05-11 11:33:21 -04002643
Chris Mason5f39d392007-10-15 16:14:19 -04002644 btrfs_item_key(right, &disk_key, 0);
Jeff Mahoney143bede2012-03-01 14:56:26 +01002645 fixup_low_keys(trans, root, path, &disk_key, 1);
Chris Masonbe0e5c02007-01-26 15:51:26 -05002646
2647 /* then fixup the leaf pointer in the path */
2648 if (path->slots[0] < push_items) {
2649 path->slots[0] += old_left_nritems;
Chris Mason925baed2008-06-25 16:01:30 -04002650 btrfs_tree_unlock(path->nodes[0]);
Chris Mason5f39d392007-10-15 16:14:19 -04002651 free_extent_buffer(path->nodes[0]);
2652 path->nodes[0] = left;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002653 path->slots[1] -= 1;
2654 } else {
Chris Mason925baed2008-06-25 16:01:30 -04002655 btrfs_tree_unlock(left);
Chris Mason5f39d392007-10-15 16:14:19 -04002656 free_extent_buffer(left);
Chris Masonbe0e5c02007-01-26 15:51:26 -05002657 path->slots[0] -= push_items;
2658 }
Chris Masoneb60cea2007-02-02 09:18:22 -05002659 BUG_ON(path->slots[0] < 0);
Chris Masonaa5d6be2007-02-28 16:35:06 -05002660 return ret;
Chris Mason925baed2008-06-25 16:01:30 -04002661out:
2662 btrfs_tree_unlock(left);
2663 free_extent_buffer(left);
2664 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002665}
2666
Chris Mason74123bd2007-02-02 11:05:29 -05002667/*
Chris Mason44871b12009-03-13 10:04:31 -04002668 * push some data in the path leaf to the left, trying to free up at
2669 * least data_size bytes. returns zero if the push worked, nonzero otherwise
Chris Mason99d8f832010-07-07 10:51:48 -04002670 *
2671 * max_slot can put a limit on how far into the leaf we'll push items. The
2672 * item at 'max_slot' won't be touched. Use (u32)-1 to make us push all the
2673 * items
Chris Mason44871b12009-03-13 10:04:31 -04002674 */
2675static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
Chris Mason99d8f832010-07-07 10:51:48 -04002676 *root, struct btrfs_path *path, int min_data_size,
2677 int data_size, int empty, u32 max_slot)
Chris Mason44871b12009-03-13 10:04:31 -04002678{
2679 struct extent_buffer *right = path->nodes[0];
2680 struct extent_buffer *left;
2681 int slot;
2682 int free_space;
2683 u32 right_nritems;
2684 int ret = 0;
2685
2686 slot = path->slots[1];
2687 if (slot == 0)
2688 return 1;
2689 if (!path->nodes[1])
2690 return 1;
2691
2692 right_nritems = btrfs_header_nritems(right);
2693 if (right_nritems == 0)
2694 return 1;
2695
2696 btrfs_assert_tree_locked(path->nodes[1]);
2697
2698 left = read_node_slot(root, path->nodes[1], slot - 1);
Tsutomu Itoh91ca3382011-01-05 02:32:22 +00002699 if (left == NULL)
2700 return 1;
2701
Chris Mason44871b12009-03-13 10:04:31 -04002702 btrfs_tree_lock(left);
2703 btrfs_set_lock_blocking(left);
2704
2705 free_space = btrfs_leaf_free_space(root, left);
2706 if (free_space < data_size) {
2707 ret = 1;
2708 goto out;
2709 }
2710
2711 /* cow and double check */
2712 ret = btrfs_cow_block(trans, root, left,
2713 path->nodes[1], slot - 1, &left);
2714 if (ret) {
2715 /* we hit -ENOSPC, but it isn't fatal here */
2716 ret = 1;
2717 goto out;
2718 }
2719
2720 free_space = btrfs_leaf_free_space(root, left);
2721 if (free_space < data_size) {
2722 ret = 1;
2723 goto out;
2724 }
2725
Chris Mason99d8f832010-07-07 10:51:48 -04002726 return __push_leaf_left(trans, root, path, min_data_size,
2727 empty, left, free_space, right_nritems,
2728 max_slot);
Chris Mason44871b12009-03-13 10:04:31 -04002729out:
2730 btrfs_tree_unlock(left);
2731 free_extent_buffer(left);
2732 return ret;
2733}
2734
2735/*
Chris Mason74123bd2007-02-02 11:05:29 -05002736 * split the path's leaf in two, making sure there is at least data_size
2737 * available for the resulting leaf level of the path.
2738 */
Jeff Mahoney143bede2012-03-01 14:56:26 +01002739static noinline void copy_for_split(struct btrfs_trans_handle *trans,
2740 struct btrfs_root *root,
2741 struct btrfs_path *path,
2742 struct extent_buffer *l,
2743 struct extent_buffer *right,
2744 int slot, int mid, int nritems)
Chris Masonbe0e5c02007-01-26 15:51:26 -05002745{
Chris Masonbe0e5c02007-01-26 15:51:26 -05002746 int data_copy_size;
2747 int rt_data_off;
2748 int i;
Chris Masond4dbff92007-04-04 14:08:15 -04002749 struct btrfs_disk_key disk_key;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002750
Chris Mason5f39d392007-10-15 16:14:19 -04002751 nritems = nritems - mid;
2752 btrfs_set_header_nritems(right, nritems);
2753 data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);
2754
2755 copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
2756 btrfs_item_nr_offset(mid),
2757 nritems * sizeof(struct btrfs_item));
2758
2759 copy_extent_buffer(right, l,
Chris Masond6025572007-03-30 14:27:56 -04002760 btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
2761 data_copy_size, btrfs_leaf_data(l) +
2762 leaf_data_end(root, l), data_copy_size);
Chris Mason74123bd2007-02-02 11:05:29 -05002763
Chris Mason5f39d392007-10-15 16:14:19 -04002764 rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
2765 btrfs_item_end_nr(l, mid);
2766
2767 for (i = 0; i < nritems; i++) {
2768 struct btrfs_item *item = btrfs_item_nr(right, i);
Chris Masondb945352007-10-15 16:15:53 -04002769 u32 ioff;
2770
Chris Masondb945352007-10-15 16:15:53 -04002771 ioff = btrfs_item_offset(right, item);
Chris Mason5f39d392007-10-15 16:14:19 -04002772 btrfs_set_item_offset(right, item, ioff + rt_data_off);
Chris Mason0783fcf2007-03-12 20:12:07 -04002773 }
Chris Mason74123bd2007-02-02 11:05:29 -05002774
Chris Mason5f39d392007-10-15 16:14:19 -04002775 btrfs_set_header_nritems(l, mid);
Chris Mason5f39d392007-10-15 16:14:19 -04002776 btrfs_item_key(right, &disk_key, 0);
Jeff Mahoney143bede2012-03-01 14:56:26 +01002777 insert_ptr(trans, root, path, &disk_key, right->start,
2778 path->slots[1] + 1, 1);
Chris Mason5f39d392007-10-15 16:14:19 -04002779
2780 btrfs_mark_buffer_dirty(right);
2781 btrfs_mark_buffer_dirty(l);
Chris Masoneb60cea2007-02-02 09:18:22 -05002782 BUG_ON(path->slots[0] != slot);
Chris Mason5f39d392007-10-15 16:14:19 -04002783
Chris Masonbe0e5c02007-01-26 15:51:26 -05002784 if (mid <= slot) {
Chris Mason925baed2008-06-25 16:01:30 -04002785 btrfs_tree_unlock(path->nodes[0]);
Chris Mason5f39d392007-10-15 16:14:19 -04002786 free_extent_buffer(path->nodes[0]);
2787 path->nodes[0] = right;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002788 path->slots[0] -= mid;
2789 path->slots[1] += 1;
Chris Mason925baed2008-06-25 16:01:30 -04002790 } else {
2791 btrfs_tree_unlock(right);
Chris Mason5f39d392007-10-15 16:14:19 -04002792 free_extent_buffer(right);
Chris Mason925baed2008-06-25 16:01:30 -04002793 }
Chris Mason5f39d392007-10-15 16:14:19 -04002794
Chris Masoneb60cea2007-02-02 09:18:22 -05002795 BUG_ON(path->slots[0] < 0);
Chris Mason44871b12009-03-13 10:04:31 -04002796}
2797
2798/*
Chris Mason99d8f832010-07-07 10:51:48 -04002799 * double splits happen when we need to insert a big item in the middle
2800 * of a leaf. A double split can leave us with 3 mostly empty leaves:
2801 * leaf: [ slots 0 - N] [ our target ] [ N + 1 - total in leaf ]
2802 * A B C
2803 *
2804 * We avoid this by trying to push the items on either side of our target
2805 * into the adjacent leaves. If all goes well we can avoid the double split
2806 * completely.
2807 */
2808static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
2809 struct btrfs_root *root,
2810 struct btrfs_path *path,
2811 int data_size)
2812{
2813 int ret;
2814 int progress = 0;
2815 int slot;
2816 u32 nritems;
2817
2818 slot = path->slots[0];
2819
2820 /*
2821 * try to push all the items after our slot into the
2822 * right leaf
2823 */
2824 ret = push_leaf_right(trans, root, path, 1, data_size, 0, slot);
2825 if (ret < 0)
2826 return ret;
2827
2828 if (ret == 0)
2829 progress++;
2830
2831 nritems = btrfs_header_nritems(path->nodes[0]);
2832 /*
2833 * our goal is to get our slot at the start or end of a leaf. If
2834 * we've done so we're done
2835 */
2836 if (path->slots[0] == 0 || path->slots[0] == nritems)
2837 return 0;
2838
2839 if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size)
2840 return 0;
2841
2842 /* try to push all the items before our slot into the next leaf */
2843 slot = path->slots[0];
2844 ret = push_leaf_left(trans, root, path, 1, data_size, 0, slot);
2845 if (ret < 0)
2846 return ret;
2847
2848 if (ret == 0)
2849 progress++;
2850
2851 if (progress)
2852 return 0;
2853 return 1;
2854}
2855
2856/*
Chris Mason44871b12009-03-13 10:04:31 -04002857 * split the path's leaf in two, making sure there is at least data_size
2858 * available for the resulting leaf level of the path.
2859 *
2860 * returns 0 if all went well and < 0 on failure.
2861 */
2862static noinline int split_leaf(struct btrfs_trans_handle *trans,
2863 struct btrfs_root *root,
2864 struct btrfs_key *ins_key,
2865 struct btrfs_path *path, int data_size,
2866 int extend)
2867{
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002868 struct btrfs_disk_key disk_key;
Chris Mason44871b12009-03-13 10:04:31 -04002869 struct extent_buffer *l;
2870 u32 nritems;
2871 int mid;
2872 int slot;
2873 struct extent_buffer *right;
2874 int ret = 0;
2875 int wret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002876 int split;
Chris Mason44871b12009-03-13 10:04:31 -04002877 int num_doubles = 0;
Chris Mason99d8f832010-07-07 10:51:48 -04002878 int tried_avoid_double = 0;
Chris Mason44871b12009-03-13 10:04:31 -04002879
Yan, Zhenga5719522009-09-24 09:17:31 -04002880 l = path->nodes[0];
2881 slot = path->slots[0];
2882 if (extend && data_size + btrfs_item_size_nr(l, slot) +
2883 sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(root))
2884 return -EOVERFLOW;
2885
Chris Mason44871b12009-03-13 10:04:31 -04002886 /* first try to make some room by pushing left and right */
Chris Mason99d8f832010-07-07 10:51:48 -04002887 if (data_size) {
2888 wret = push_leaf_right(trans, root, path, data_size,
2889 data_size, 0, 0);
Chris Mason44871b12009-03-13 10:04:31 -04002890 if (wret < 0)
2891 return wret;
2892 if (wret) {
Chris Mason99d8f832010-07-07 10:51:48 -04002893 wret = push_leaf_left(trans, root, path, data_size,
2894 data_size, 0, (u32)-1);
Chris Mason44871b12009-03-13 10:04:31 -04002895 if (wret < 0)
2896 return wret;
2897 }
2898 l = path->nodes[0];
2899
2900 /* did the pushes work? */
2901 if (btrfs_leaf_free_space(root, l) >= data_size)
2902 return 0;
2903 }
2904
2905 if (!path->nodes[1]) {
2906 ret = insert_new_root(trans, root, path, 1);
2907 if (ret)
2908 return ret;
2909 }
2910again:
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002911 split = 1;
Chris Mason44871b12009-03-13 10:04:31 -04002912 l = path->nodes[0];
2913 slot = path->slots[0];
2914 nritems = btrfs_header_nritems(l);
2915 mid = (nritems + 1) / 2;
2916
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002917 if (mid <= slot) {
2918 if (nritems == 1 ||
2919 leaf_space_used(l, mid, nritems - mid) + data_size >
2920 BTRFS_LEAF_DATA_SIZE(root)) {
2921 if (slot >= nritems) {
2922 split = 0;
2923 } else {
2924 mid = slot;
2925 if (mid != nritems &&
2926 leaf_space_used(l, mid, nritems - mid) +
2927 data_size > BTRFS_LEAF_DATA_SIZE(root)) {
Chris Mason99d8f832010-07-07 10:51:48 -04002928 if (data_size && !tried_avoid_double)
2929 goto push_for_double;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002930 split = 2;
2931 }
2932 }
2933 }
2934 } else {
2935 if (leaf_space_used(l, 0, mid) + data_size >
2936 BTRFS_LEAF_DATA_SIZE(root)) {
2937 if (!extend && data_size && slot == 0) {
2938 split = 0;
2939 } else if ((extend || !data_size) && slot == 0) {
2940 mid = 1;
2941 } else {
2942 mid = slot;
2943 if (mid != nritems &&
2944 leaf_space_used(l, mid, nritems - mid) +
2945 data_size > BTRFS_LEAF_DATA_SIZE(root)) {
Chris Mason99d8f832010-07-07 10:51:48 -04002946 if (data_size && !tried_avoid_double)
2947 goto push_for_double;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002948 split = 2 ;
2949 }
2950 }
2951 }
2952 }
2953
2954 if (split == 0)
2955 btrfs_cpu_key_to_disk(&disk_key, ins_key);
2956 else
2957 btrfs_item_key(l, &disk_key, mid);
2958
2959 right = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
Chris Mason44871b12009-03-13 10:04:31 -04002960 root->root_key.objectid,
Arne Jansen66d7e7f2011-09-12 15:26:38 +02002961 &disk_key, 0, l->start, 0, 0);
Yan, Zhengf0486c62010-05-16 10:46:25 -04002962 if (IS_ERR(right))
Chris Mason44871b12009-03-13 10:04:31 -04002963 return PTR_ERR(right);
Yan, Zhengf0486c62010-05-16 10:46:25 -04002964
2965 root_add_used(root, root->leafsize);
Chris Mason44871b12009-03-13 10:04:31 -04002966
2967 memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
2968 btrfs_set_header_bytenr(right, right->start);
2969 btrfs_set_header_generation(right, trans->transid);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002970 btrfs_set_header_backref_rev(right, BTRFS_MIXED_BACKREF_REV);
Chris Mason44871b12009-03-13 10:04:31 -04002971 btrfs_set_header_owner(right, root->root_key.objectid);
2972 btrfs_set_header_level(right, 0);
2973 write_extent_buffer(right, root->fs_info->fsid,
2974 (unsigned long)btrfs_header_fsid(right),
2975 BTRFS_FSID_SIZE);
2976
2977 write_extent_buffer(right, root->fs_info->chunk_tree_uuid,
2978 (unsigned long)btrfs_header_chunk_tree_uuid(right),
2979 BTRFS_UUID_SIZE);
2980
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002981 if (split == 0) {
2982 if (mid <= slot) {
2983 btrfs_set_header_nritems(right, 0);
Jeff Mahoney143bede2012-03-01 14:56:26 +01002984 insert_ptr(trans, root, path, &disk_key, right->start,
2985 path->slots[1] + 1, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002986 btrfs_tree_unlock(path->nodes[0]);
2987 free_extent_buffer(path->nodes[0]);
2988 path->nodes[0] = right;
2989 path->slots[0] = 0;
2990 path->slots[1] += 1;
2991 } else {
2992 btrfs_set_header_nritems(right, 0);
Jeff Mahoney143bede2012-03-01 14:56:26 +01002993 insert_ptr(trans, root, path, &disk_key, right->start,
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002994 path->slots[1], 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002995 btrfs_tree_unlock(path->nodes[0]);
2996 free_extent_buffer(path->nodes[0]);
2997 path->nodes[0] = right;
2998 path->slots[0] = 0;
Jeff Mahoney143bede2012-03-01 14:56:26 +01002999 if (path->slots[1] == 0)
3000 fixup_low_keys(trans, root, path,
3001 &disk_key, 1);
Chris Mason44871b12009-03-13 10:04:31 -04003002 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003003 btrfs_mark_buffer_dirty(right);
3004 return ret;
Chris Mason44871b12009-03-13 10:04:31 -04003005 }
3006
Jeff Mahoney143bede2012-03-01 14:56:26 +01003007 copy_for_split(trans, root, path, l, right, slot, mid, nritems);
Chris Mason44871b12009-03-13 10:04:31 -04003008
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003009 if (split == 2) {
Chris Masoncc0c5532007-10-25 15:42:57 -04003010 BUG_ON(num_doubles != 0);
3011 num_doubles++;
3012 goto again;
Chris Mason3326d1b2007-10-15 16:18:25 -04003013 }
Chris Mason44871b12009-03-13 10:04:31 -04003014
Jeff Mahoney143bede2012-03-01 14:56:26 +01003015 return 0;
Chris Mason99d8f832010-07-07 10:51:48 -04003016
3017push_for_double:
3018 push_for_double_split(trans, root, path, data_size);
3019 tried_avoid_double = 1;
3020 if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size)
3021 return 0;
3022 goto again;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003023}
3024
Yan, Zhengad48fd752009-11-12 09:33:58 +00003025static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
3026 struct btrfs_root *root,
3027 struct btrfs_path *path, int ins_len)
Chris Mason459931e2008-12-10 09:10:46 -05003028{
Yan, Zhengad48fd752009-11-12 09:33:58 +00003029 struct btrfs_key key;
Chris Mason459931e2008-12-10 09:10:46 -05003030 struct extent_buffer *leaf;
Yan, Zhengad48fd752009-11-12 09:33:58 +00003031 struct btrfs_file_extent_item *fi;
3032 u64 extent_len = 0;
3033 u32 item_size;
3034 int ret;
Chris Mason459931e2008-12-10 09:10:46 -05003035
3036 leaf = path->nodes[0];
Yan, Zhengad48fd752009-11-12 09:33:58 +00003037 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3038
3039 BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY &&
3040 key.type != BTRFS_EXTENT_CSUM_KEY);
3041
3042 if (btrfs_leaf_free_space(root, leaf) >= ins_len)
3043 return 0;
Chris Mason459931e2008-12-10 09:10:46 -05003044
3045 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
Yan, Zhengad48fd752009-11-12 09:33:58 +00003046 if (key.type == BTRFS_EXTENT_DATA_KEY) {
3047 fi = btrfs_item_ptr(leaf, path->slots[0],
3048 struct btrfs_file_extent_item);
3049 extent_len = btrfs_file_extent_num_bytes(leaf, fi);
3050 }
David Sterbab3b4aa72011-04-21 01:20:15 +02003051 btrfs_release_path(path);
Chris Mason459931e2008-12-10 09:10:46 -05003052
Chris Mason459931e2008-12-10 09:10:46 -05003053 path->keep_locks = 1;
Yan, Zhengad48fd752009-11-12 09:33:58 +00003054 path->search_for_split = 1;
3055 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
Chris Mason459931e2008-12-10 09:10:46 -05003056 path->search_for_split = 0;
Yan, Zhengad48fd752009-11-12 09:33:58 +00003057 if (ret < 0)
3058 goto err;
Chris Mason459931e2008-12-10 09:10:46 -05003059
Yan, Zhengad48fd752009-11-12 09:33:58 +00003060 ret = -EAGAIN;
3061 leaf = path->nodes[0];
Chris Mason459931e2008-12-10 09:10:46 -05003062 /* if our item isn't there or got smaller, return now */
Yan, Zhengad48fd752009-11-12 09:33:58 +00003063 if (ret > 0 || item_size != btrfs_item_size_nr(leaf, path->slots[0]))
3064 goto err;
3065
Chris Mason109f6ae2010-04-02 09:20:18 -04003066 /* the leaf has changed, it now has room. return now */
3067 if (btrfs_leaf_free_space(root, path->nodes[0]) >= ins_len)
3068 goto err;
3069
Yan, Zhengad48fd752009-11-12 09:33:58 +00003070 if (key.type == BTRFS_EXTENT_DATA_KEY) {
3071 fi = btrfs_item_ptr(leaf, path->slots[0],
3072 struct btrfs_file_extent_item);
3073 if (extent_len != btrfs_file_extent_num_bytes(leaf, fi))
3074 goto err;
Chris Mason459931e2008-12-10 09:10:46 -05003075 }
3076
Chris Masonb9473432009-03-13 11:00:37 -04003077 btrfs_set_path_blocking(path);
Yan, Zhengad48fd752009-11-12 09:33:58 +00003078 ret = split_leaf(trans, root, &key, path, ins_len, 1);
Yan, Zhengf0486c62010-05-16 10:46:25 -04003079 if (ret)
3080 goto err;
Chris Mason459931e2008-12-10 09:10:46 -05003081
Yan, Zhengad48fd752009-11-12 09:33:58 +00003082 path->keep_locks = 0;
Chris Masonb9473432009-03-13 11:00:37 -04003083 btrfs_unlock_up_safe(path, 1);
Yan, Zhengad48fd752009-11-12 09:33:58 +00003084 return 0;
3085err:
3086 path->keep_locks = 0;
3087 return ret;
3088}
3089
3090static noinline int split_item(struct btrfs_trans_handle *trans,
3091 struct btrfs_root *root,
3092 struct btrfs_path *path,
3093 struct btrfs_key *new_key,
3094 unsigned long split_offset)
3095{
3096 struct extent_buffer *leaf;
3097 struct btrfs_item *item;
3098 struct btrfs_item *new_item;
3099 int slot;
3100 char *buf;
3101 u32 nritems;
3102 u32 item_size;
3103 u32 orig_offset;
3104 struct btrfs_disk_key disk_key;
3105
Chris Masonb9473432009-03-13 11:00:37 -04003106 leaf = path->nodes[0];
3107 BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));
3108
Chris Masonb4ce94d2009-02-04 09:25:08 -05003109 btrfs_set_path_blocking(path);
3110
Chris Mason459931e2008-12-10 09:10:46 -05003111 item = btrfs_item_nr(leaf, path->slots[0]);
3112 orig_offset = btrfs_item_offset(leaf, item);
3113 item_size = btrfs_item_size(leaf, item);
3114
Chris Mason459931e2008-12-10 09:10:46 -05003115 buf = kmalloc(item_size, GFP_NOFS);
Yan, Zhengad48fd752009-11-12 09:33:58 +00003116 if (!buf)
3117 return -ENOMEM;
3118
Chris Mason459931e2008-12-10 09:10:46 -05003119 read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
3120 path->slots[0]), item_size);
Yan, Zhengad48fd752009-11-12 09:33:58 +00003121
Chris Mason459931e2008-12-10 09:10:46 -05003122 slot = path->slots[0] + 1;
Chris Mason459931e2008-12-10 09:10:46 -05003123 nritems = btrfs_header_nritems(leaf);
Chris Mason459931e2008-12-10 09:10:46 -05003124 if (slot != nritems) {
3125 /* shift the items */
3126 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
Yan, Zhengad48fd752009-11-12 09:33:58 +00003127 btrfs_item_nr_offset(slot),
3128 (nritems - slot) * sizeof(struct btrfs_item));
Chris Mason459931e2008-12-10 09:10:46 -05003129 }
3130
3131 btrfs_cpu_key_to_disk(&disk_key, new_key);
3132 btrfs_set_item_key(leaf, &disk_key, slot);
3133
3134 new_item = btrfs_item_nr(leaf, slot);
3135
3136 btrfs_set_item_offset(leaf, new_item, orig_offset);
3137 btrfs_set_item_size(leaf, new_item, item_size - split_offset);
3138
3139 btrfs_set_item_offset(leaf, item,
3140 orig_offset + item_size - split_offset);
3141 btrfs_set_item_size(leaf, item, split_offset);
3142
3143 btrfs_set_header_nritems(leaf, nritems + 1);
3144
3145 /* write the data for the start of the original item */
3146 write_extent_buffer(leaf, buf,
3147 btrfs_item_ptr_offset(leaf, path->slots[0]),
3148 split_offset);
3149
3150 /* write the data for the new item */
3151 write_extent_buffer(leaf, buf + split_offset,
3152 btrfs_item_ptr_offset(leaf, slot),
3153 item_size - split_offset);
3154 btrfs_mark_buffer_dirty(leaf);
3155
Yan, Zhengad48fd752009-11-12 09:33:58 +00003156 BUG_ON(btrfs_leaf_free_space(root, leaf) < 0);
Chris Mason459931e2008-12-10 09:10:46 -05003157 kfree(buf);
Yan, Zhengad48fd752009-11-12 09:33:58 +00003158 return 0;
3159}
3160
3161/*
3162 * This function splits a single item into two items,
3163 * giving 'new_key' to the new item and splitting the
3164 * old one at split_offset (from the start of the item).
3165 *
3166 * The path may be released by this operation. After
3167 * the split, the path is pointing to the old item. The
3168 * new item is going to be in the same node as the old one.
3169 *
3170 * Note, the item being split must be smaller enough to live alone on
3171 * a tree block with room for one extra struct btrfs_item
3172 *
3173 * This allows us to split the item in place, keeping a lock on the
3174 * leaf the entire time.
3175 */
3176int btrfs_split_item(struct btrfs_trans_handle *trans,
3177 struct btrfs_root *root,
3178 struct btrfs_path *path,
3179 struct btrfs_key *new_key,
3180 unsigned long split_offset)
3181{
3182 int ret;
3183 ret = setup_leaf_for_split(trans, root, path,
3184 sizeof(struct btrfs_item));
3185 if (ret)
3186 return ret;
3187
3188 ret = split_item(trans, root, path, new_key, split_offset);
Chris Mason459931e2008-12-10 09:10:46 -05003189 return ret;
3190}
3191
3192/*
Yan, Zhengad48fd752009-11-12 09:33:58 +00003193 * This function duplicate a item, giving 'new_key' to the new item.
3194 * It guarantees both items live in the same tree leaf and the new item
3195 * is contiguous with the original item.
3196 *
3197 * This allows us to split file extent in place, keeping a lock on the
3198 * leaf the entire time.
3199 */
3200int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
3201 struct btrfs_root *root,
3202 struct btrfs_path *path,
3203 struct btrfs_key *new_key)
3204{
3205 struct extent_buffer *leaf;
3206 int ret;
3207 u32 item_size;
3208
3209 leaf = path->nodes[0];
3210 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
3211 ret = setup_leaf_for_split(trans, root, path,
3212 item_size + sizeof(struct btrfs_item));
3213 if (ret)
3214 return ret;
3215
3216 path->slots[0]++;
Jeff Mahoney143bede2012-03-01 14:56:26 +01003217 setup_items_for_insert(trans, root, path, new_key, &item_size,
3218 item_size, item_size +
3219 sizeof(struct btrfs_item), 1);
Yan, Zhengad48fd752009-11-12 09:33:58 +00003220 leaf = path->nodes[0];
3221 memcpy_extent_buffer(leaf,
3222 btrfs_item_ptr_offset(leaf, path->slots[0]),
3223 btrfs_item_ptr_offset(leaf, path->slots[0] - 1),
3224 item_size);
3225 return 0;
3226}
3227
3228/*
Chris Masond352ac62008-09-29 15:18:18 -04003229 * make the item pointed to by the path smaller. new_size indicates
3230 * how small to make it, and from_end tells us if we just chop bytes
3231 * off the end of the item or if we shift the item to chop bytes off
3232 * the front.
3233 */
Jeff Mahoney143bede2012-03-01 14:56:26 +01003234void btrfs_truncate_item(struct btrfs_trans_handle *trans,
3235 struct btrfs_root *root,
3236 struct btrfs_path *path,
3237 u32 new_size, int from_end)
Chris Masonb18c6682007-04-17 13:26:50 -04003238{
Chris Masonb18c6682007-04-17 13:26:50 -04003239 int slot;
Chris Mason5f39d392007-10-15 16:14:19 -04003240 struct extent_buffer *leaf;
3241 struct btrfs_item *item;
Chris Masonb18c6682007-04-17 13:26:50 -04003242 u32 nritems;
3243 unsigned int data_end;
3244 unsigned int old_data_start;
3245 unsigned int old_size;
3246 unsigned int size_diff;
3247 int i;
3248
Chris Mason5f39d392007-10-15 16:14:19 -04003249 leaf = path->nodes[0];
Chris Mason179e29e2007-11-01 11:28:41 -04003250 slot = path->slots[0];
3251
3252 old_size = btrfs_item_size_nr(leaf, slot);
3253 if (old_size == new_size)
Jeff Mahoney143bede2012-03-01 14:56:26 +01003254 return;
Chris Masonb18c6682007-04-17 13:26:50 -04003255
Chris Mason5f39d392007-10-15 16:14:19 -04003256 nritems = btrfs_header_nritems(leaf);
Chris Masonb18c6682007-04-17 13:26:50 -04003257 data_end = leaf_data_end(root, leaf);
3258
Chris Mason5f39d392007-10-15 16:14:19 -04003259 old_data_start = btrfs_item_offset_nr(leaf, slot);
Chris Mason179e29e2007-11-01 11:28:41 -04003260
Chris Masonb18c6682007-04-17 13:26:50 -04003261 size_diff = old_size - new_size;
3262
3263 BUG_ON(slot < 0);
3264 BUG_ON(slot >= nritems);
3265
3266 /*
3267 * item0..itemN ... dataN.offset..dataN.size .. data0.size
3268 */
3269 /* first correct the data pointers */
3270 for (i = slot; i < nritems; i++) {
Chris Mason5f39d392007-10-15 16:14:19 -04003271 u32 ioff;
3272 item = btrfs_item_nr(leaf, i);
Chris Masondb945352007-10-15 16:15:53 -04003273
Chris Mason5f39d392007-10-15 16:14:19 -04003274 ioff = btrfs_item_offset(leaf, item);
3275 btrfs_set_item_offset(leaf, item, ioff + size_diff);
Chris Masonb18c6682007-04-17 13:26:50 -04003276 }
Chris Masondb945352007-10-15 16:15:53 -04003277
Chris Masonb18c6682007-04-17 13:26:50 -04003278 /* shift the data */
Chris Mason179e29e2007-11-01 11:28:41 -04003279 if (from_end) {
3280 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3281 data_end + size_diff, btrfs_leaf_data(leaf) +
3282 data_end, old_data_start + new_size - data_end);
3283 } else {
3284 struct btrfs_disk_key disk_key;
3285 u64 offset;
3286
3287 btrfs_item_key(leaf, &disk_key, slot);
3288
3289 if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
3290 unsigned long ptr;
3291 struct btrfs_file_extent_item *fi;
3292
3293 fi = btrfs_item_ptr(leaf, slot,
3294 struct btrfs_file_extent_item);
3295 fi = (struct btrfs_file_extent_item *)(
3296 (unsigned long)fi - size_diff);
3297
3298 if (btrfs_file_extent_type(leaf, fi) ==
3299 BTRFS_FILE_EXTENT_INLINE) {
3300 ptr = btrfs_item_ptr_offset(leaf, slot);
3301 memmove_extent_buffer(leaf, ptr,
Chris Masond3977122009-01-05 21:25:51 -05003302 (unsigned long)fi,
3303 offsetof(struct btrfs_file_extent_item,
Chris Mason179e29e2007-11-01 11:28:41 -04003304 disk_bytenr));
3305 }
3306 }
3307
3308 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3309 data_end + size_diff, btrfs_leaf_data(leaf) +
3310 data_end, old_data_start - data_end);
3311
3312 offset = btrfs_disk_key_offset(&disk_key);
3313 btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
3314 btrfs_set_item_key(leaf, &disk_key, slot);
3315 if (slot == 0)
3316 fixup_low_keys(trans, root, path, &disk_key, 1);
3317 }
Chris Mason5f39d392007-10-15 16:14:19 -04003318
3319 item = btrfs_item_nr(leaf, slot);
3320 btrfs_set_item_size(leaf, item, new_size);
3321 btrfs_mark_buffer_dirty(leaf);
Chris Masonb18c6682007-04-17 13:26:50 -04003322
Chris Mason5f39d392007-10-15 16:14:19 -04003323 if (btrfs_leaf_free_space(root, leaf) < 0) {
3324 btrfs_print_leaf(root, leaf);
Chris Masonb18c6682007-04-17 13:26:50 -04003325 BUG();
Chris Mason5f39d392007-10-15 16:14:19 -04003326 }
Chris Masonb18c6682007-04-17 13:26:50 -04003327}
3328
Chris Masond352ac62008-09-29 15:18:18 -04003329/*
3330 * make the item pointed to by the path bigger, data_size is the new size.
3331 */
Jeff Mahoney143bede2012-03-01 14:56:26 +01003332void btrfs_extend_item(struct btrfs_trans_handle *trans,
3333 struct btrfs_root *root, struct btrfs_path *path,
3334 u32 data_size)
Chris Mason6567e832007-04-16 09:22:45 -04003335{
Chris Mason6567e832007-04-16 09:22:45 -04003336 int slot;
Chris Mason5f39d392007-10-15 16:14:19 -04003337 struct extent_buffer *leaf;
3338 struct btrfs_item *item;
Chris Mason6567e832007-04-16 09:22:45 -04003339 u32 nritems;
3340 unsigned int data_end;
3341 unsigned int old_data;
3342 unsigned int old_size;
3343 int i;
3344
Chris Mason5f39d392007-10-15 16:14:19 -04003345 leaf = path->nodes[0];
Chris Mason6567e832007-04-16 09:22:45 -04003346
Chris Mason5f39d392007-10-15 16:14:19 -04003347 nritems = btrfs_header_nritems(leaf);
Chris Mason6567e832007-04-16 09:22:45 -04003348 data_end = leaf_data_end(root, leaf);
3349
Chris Mason5f39d392007-10-15 16:14:19 -04003350 if (btrfs_leaf_free_space(root, leaf) < data_size) {
3351 btrfs_print_leaf(root, leaf);
Chris Mason6567e832007-04-16 09:22:45 -04003352 BUG();
Chris Mason5f39d392007-10-15 16:14:19 -04003353 }
Chris Mason6567e832007-04-16 09:22:45 -04003354 slot = path->slots[0];
Chris Mason5f39d392007-10-15 16:14:19 -04003355 old_data = btrfs_item_end_nr(leaf, slot);
Chris Mason6567e832007-04-16 09:22:45 -04003356
3357 BUG_ON(slot < 0);
Chris Mason3326d1b2007-10-15 16:18:25 -04003358 if (slot >= nritems) {
3359 btrfs_print_leaf(root, leaf);
Chris Masond3977122009-01-05 21:25:51 -05003360 printk(KERN_CRIT "slot %d too large, nritems %d\n",
3361 slot, nritems);
Chris Mason3326d1b2007-10-15 16:18:25 -04003362 BUG_ON(1);
3363 }
Chris Mason6567e832007-04-16 09:22:45 -04003364
3365 /*
3366 * item0..itemN ... dataN.offset..dataN.size .. data0.size
3367 */
3368 /* first correct the data pointers */
3369 for (i = slot; i < nritems; i++) {
Chris Mason5f39d392007-10-15 16:14:19 -04003370 u32 ioff;
3371 item = btrfs_item_nr(leaf, i);
Chris Masondb945352007-10-15 16:15:53 -04003372
Chris Mason5f39d392007-10-15 16:14:19 -04003373 ioff = btrfs_item_offset(leaf, item);
3374 btrfs_set_item_offset(leaf, item, ioff - data_size);
Chris Mason6567e832007-04-16 09:22:45 -04003375 }
Chris Mason5f39d392007-10-15 16:14:19 -04003376
Chris Mason6567e832007-04-16 09:22:45 -04003377 /* shift the data */
Chris Mason5f39d392007-10-15 16:14:19 -04003378 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
Chris Mason6567e832007-04-16 09:22:45 -04003379 data_end - data_size, btrfs_leaf_data(leaf) +
3380 data_end, old_data - data_end);
Chris Mason5f39d392007-10-15 16:14:19 -04003381
Chris Mason6567e832007-04-16 09:22:45 -04003382 data_end = old_data;
Chris Mason5f39d392007-10-15 16:14:19 -04003383 old_size = btrfs_item_size_nr(leaf, slot);
3384 item = btrfs_item_nr(leaf, slot);
3385 btrfs_set_item_size(leaf, item, old_size + data_size);
3386 btrfs_mark_buffer_dirty(leaf);
Chris Mason6567e832007-04-16 09:22:45 -04003387
Chris Mason5f39d392007-10-15 16:14:19 -04003388 if (btrfs_leaf_free_space(root, leaf) < 0) {
3389 btrfs_print_leaf(root, leaf);
Chris Mason6567e832007-04-16 09:22:45 -04003390 BUG();
Chris Mason5f39d392007-10-15 16:14:19 -04003391 }
Chris Mason6567e832007-04-16 09:22:45 -04003392}
3393
Chris Mason74123bd2007-02-02 11:05:29 -05003394/*
Chris Masond352ac62008-09-29 15:18:18 -04003395 * Given a key and some data, insert items into the tree.
Chris Mason74123bd2007-02-02 11:05:29 -05003396 * This does all the path init required, making room in the tree if needed.
Josef Bacikf3465ca2008-11-12 14:19:50 -05003397 * Returns the number of keys that were inserted.
3398 */
3399int btrfs_insert_some_items(struct btrfs_trans_handle *trans,
3400 struct btrfs_root *root,
3401 struct btrfs_path *path,
3402 struct btrfs_key *cpu_key, u32 *data_size,
3403 int nr)
3404{
3405 struct extent_buffer *leaf;
3406 struct btrfs_item *item;
3407 int ret = 0;
3408 int slot;
Josef Bacikf3465ca2008-11-12 14:19:50 -05003409 int i;
3410 u32 nritems;
3411 u32 total_data = 0;
3412 u32 total_size = 0;
3413 unsigned int data_end;
3414 struct btrfs_disk_key disk_key;
3415 struct btrfs_key found_key;
3416
Yan Zheng87b29b22008-12-17 10:21:48 -05003417 for (i = 0; i < nr; i++) {
3418 if (total_size + data_size[i] + sizeof(struct btrfs_item) >
3419 BTRFS_LEAF_DATA_SIZE(root)) {
3420 break;
3421 nr = i;
3422 }
Josef Bacikf3465ca2008-11-12 14:19:50 -05003423 total_data += data_size[i];
Yan Zheng87b29b22008-12-17 10:21:48 -05003424 total_size += data_size[i] + sizeof(struct btrfs_item);
3425 }
3426 BUG_ON(nr == 0);
Josef Bacikf3465ca2008-11-12 14:19:50 -05003427
Josef Bacikf3465ca2008-11-12 14:19:50 -05003428 ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
3429 if (ret == 0)
3430 return -EEXIST;
3431 if (ret < 0)
3432 goto out;
3433
Josef Bacikf3465ca2008-11-12 14:19:50 -05003434 leaf = path->nodes[0];
3435
3436 nritems = btrfs_header_nritems(leaf);
3437 data_end = leaf_data_end(root, leaf);
3438
3439 if (btrfs_leaf_free_space(root, leaf) < total_size) {
3440 for (i = nr; i >= 0; i--) {
3441 total_data -= data_size[i];
3442 total_size -= data_size[i] + sizeof(struct btrfs_item);
3443 if (total_size < btrfs_leaf_free_space(root, leaf))
3444 break;
3445 }
3446 nr = i;
3447 }
3448
3449 slot = path->slots[0];
3450 BUG_ON(slot < 0);
3451
3452 if (slot != nritems) {
3453 unsigned int old_data = btrfs_item_end_nr(leaf, slot);
3454
3455 item = btrfs_item_nr(leaf, slot);
3456 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3457
3458 /* figure out how many keys we can insert in here */
3459 total_data = data_size[0];
3460 for (i = 1; i < nr; i++) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003461 if (btrfs_comp_cpu_keys(&found_key, cpu_key + i) <= 0)
Josef Bacikf3465ca2008-11-12 14:19:50 -05003462 break;
3463 total_data += data_size[i];
3464 }
3465 nr = i;
3466
3467 if (old_data < data_end) {
3468 btrfs_print_leaf(root, leaf);
Chris Masond3977122009-01-05 21:25:51 -05003469 printk(KERN_CRIT "slot %d old_data %d data_end %d\n",
Josef Bacikf3465ca2008-11-12 14:19:50 -05003470 slot, old_data, data_end);
3471 BUG_ON(1);
3472 }
3473 /*
3474 * item0..itemN ... dataN.offset..dataN.size .. data0.size
3475 */
3476 /* first correct the data pointers */
Josef Bacikf3465ca2008-11-12 14:19:50 -05003477 for (i = slot; i < nritems; i++) {
3478 u32 ioff;
3479
3480 item = btrfs_item_nr(leaf, i);
Josef Bacikf3465ca2008-11-12 14:19:50 -05003481 ioff = btrfs_item_offset(leaf, item);
3482 btrfs_set_item_offset(leaf, item, ioff - total_data);
3483 }
Josef Bacikf3465ca2008-11-12 14:19:50 -05003484 /* shift the items */
3485 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
3486 btrfs_item_nr_offset(slot),
3487 (nritems - slot) * sizeof(struct btrfs_item));
3488
3489 /* shift the data */
3490 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3491 data_end - total_data, btrfs_leaf_data(leaf) +
3492 data_end, old_data - data_end);
3493 data_end = old_data;
3494 } else {
3495 /*
3496 * this sucks but it has to be done, if we are inserting at
3497 * the end of the leaf only insert 1 of the items, since we
3498 * have no way of knowing whats on the next leaf and we'd have
3499 * to drop our current locks to figure it out
3500 */
3501 nr = 1;
3502 }
3503
3504 /* setup the item for the new data */
3505 for (i = 0; i < nr; i++) {
3506 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
3507 btrfs_set_item_key(leaf, &disk_key, slot + i);
3508 item = btrfs_item_nr(leaf, slot + i);
3509 btrfs_set_item_offset(leaf, item, data_end - data_size[i]);
3510 data_end -= data_size[i];
3511 btrfs_set_item_size(leaf, item, data_size[i]);
3512 }
3513 btrfs_set_header_nritems(leaf, nritems + nr);
3514 btrfs_mark_buffer_dirty(leaf);
3515
3516 ret = 0;
3517 if (slot == 0) {
3518 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
Jeff Mahoney143bede2012-03-01 14:56:26 +01003519 fixup_low_keys(trans, root, path, &disk_key, 1);
Josef Bacikf3465ca2008-11-12 14:19:50 -05003520 }
3521
3522 if (btrfs_leaf_free_space(root, leaf) < 0) {
3523 btrfs_print_leaf(root, leaf);
3524 BUG();
3525 }
3526out:
3527 if (!ret)
3528 ret = nr;
3529 return ret;
3530}
3531
3532/*
Chris Mason44871b12009-03-13 10:04:31 -04003533 * this is a helper for btrfs_insert_empty_items, the main goal here is
3534 * to save stack depth by doing the bulk of the work in a function
3535 * that doesn't call btrfs_search_slot
Chris Mason74123bd2007-02-02 11:05:29 -05003536 */
Jeff Mahoney143bede2012-03-01 14:56:26 +01003537void setup_items_for_insert(struct btrfs_trans_handle *trans,
3538 struct btrfs_root *root, struct btrfs_path *path,
3539 struct btrfs_key *cpu_key, u32 *data_size,
3540 u32 total_data, u32 total_size, int nr)
Chris Masonbe0e5c02007-01-26 15:51:26 -05003541{
Chris Mason5f39d392007-10-15 16:14:19 -04003542 struct btrfs_item *item;
Chris Mason9c583092008-01-29 15:15:18 -05003543 int i;
Chris Mason7518a232007-03-12 12:01:18 -04003544 u32 nritems;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003545 unsigned int data_end;
Chris Masone2fa7222007-03-12 16:22:34 -04003546 struct btrfs_disk_key disk_key;
Chris Mason44871b12009-03-13 10:04:31 -04003547 struct extent_buffer *leaf;
3548 int slot;
Chris Masone2fa7222007-03-12 16:22:34 -04003549
Chris Mason5f39d392007-10-15 16:14:19 -04003550 leaf = path->nodes[0];
Chris Mason44871b12009-03-13 10:04:31 -04003551 slot = path->slots[0];
Chris Mason74123bd2007-02-02 11:05:29 -05003552
Chris Mason5f39d392007-10-15 16:14:19 -04003553 nritems = btrfs_header_nritems(leaf);
Chris Mason123abc82007-03-14 14:14:43 -04003554 data_end = leaf_data_end(root, leaf);
Chris Masoneb60cea2007-02-02 09:18:22 -05003555
Chris Masonf25956c2008-09-12 15:32:53 -04003556 if (btrfs_leaf_free_space(root, leaf) < total_size) {
Chris Mason3326d1b2007-10-15 16:18:25 -04003557 btrfs_print_leaf(root, leaf);
Chris Masond3977122009-01-05 21:25:51 -05003558 printk(KERN_CRIT "not enough freespace need %u have %d\n",
Chris Mason9c583092008-01-29 15:15:18 -05003559 total_size, btrfs_leaf_free_space(root, leaf));
Chris Masonbe0e5c02007-01-26 15:51:26 -05003560 BUG();
Chris Masond4dbff92007-04-04 14:08:15 -04003561 }
Chris Mason5f39d392007-10-15 16:14:19 -04003562
Chris Masonbe0e5c02007-01-26 15:51:26 -05003563 if (slot != nritems) {
Chris Mason5f39d392007-10-15 16:14:19 -04003564 unsigned int old_data = btrfs_item_end_nr(leaf, slot);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003565
Chris Mason5f39d392007-10-15 16:14:19 -04003566 if (old_data < data_end) {
3567 btrfs_print_leaf(root, leaf);
Chris Masond3977122009-01-05 21:25:51 -05003568 printk(KERN_CRIT "slot %d old_data %d data_end %d\n",
Chris Mason5f39d392007-10-15 16:14:19 -04003569 slot, old_data, data_end);
3570 BUG_ON(1);
3571 }
Chris Masonbe0e5c02007-01-26 15:51:26 -05003572 /*
3573 * item0..itemN ... dataN.offset..dataN.size .. data0.size
3574 */
3575 /* first correct the data pointers */
Chris Mason0783fcf2007-03-12 20:12:07 -04003576 for (i = slot; i < nritems; i++) {
Chris Mason5f39d392007-10-15 16:14:19 -04003577 u32 ioff;
Chris Masondb945352007-10-15 16:15:53 -04003578
Chris Mason5f39d392007-10-15 16:14:19 -04003579 item = btrfs_item_nr(leaf, i);
3580 ioff = btrfs_item_offset(leaf, item);
Chris Mason9c583092008-01-29 15:15:18 -05003581 btrfs_set_item_offset(leaf, item, ioff - total_data);
Chris Mason0783fcf2007-03-12 20:12:07 -04003582 }
Chris Masonbe0e5c02007-01-26 15:51:26 -05003583 /* shift the items */
Chris Mason9c583092008-01-29 15:15:18 -05003584 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
Chris Mason5f39d392007-10-15 16:14:19 -04003585 btrfs_item_nr_offset(slot),
Chris Masond6025572007-03-30 14:27:56 -04003586 (nritems - slot) * sizeof(struct btrfs_item));
Chris Masonbe0e5c02007-01-26 15:51:26 -05003587
3588 /* shift the data */
Chris Mason5f39d392007-10-15 16:14:19 -04003589 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
Chris Mason9c583092008-01-29 15:15:18 -05003590 data_end - total_data, btrfs_leaf_data(leaf) +
Chris Masond6025572007-03-30 14:27:56 -04003591 data_end, old_data - data_end);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003592 data_end = old_data;
3593 }
Chris Mason5f39d392007-10-15 16:14:19 -04003594
Chris Mason62e27492007-03-15 12:56:47 -04003595 /* setup the item for the new data */
Chris Mason9c583092008-01-29 15:15:18 -05003596 for (i = 0; i < nr; i++) {
3597 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
3598 btrfs_set_item_key(leaf, &disk_key, slot + i);
3599 item = btrfs_item_nr(leaf, slot + i);
3600 btrfs_set_item_offset(leaf, item, data_end - data_size[i]);
3601 data_end -= data_size[i];
3602 btrfs_set_item_size(leaf, item, data_size[i]);
3603 }
Chris Mason44871b12009-03-13 10:04:31 -04003604
Chris Mason9c583092008-01-29 15:15:18 -05003605 btrfs_set_header_nritems(leaf, nritems + nr);
Chris Masonaa5d6be2007-02-28 16:35:06 -05003606
Chris Mason5a01a2e2008-01-30 11:43:54 -05003607 if (slot == 0) {
3608 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
Jeff Mahoney143bede2012-03-01 14:56:26 +01003609 fixup_low_keys(trans, root, path, &disk_key, 1);
Chris Mason5a01a2e2008-01-30 11:43:54 -05003610 }
Chris Masonb9473432009-03-13 11:00:37 -04003611 btrfs_unlock_up_safe(path, 1);
3612 btrfs_mark_buffer_dirty(leaf);
Chris Masonaa5d6be2007-02-28 16:35:06 -05003613
Chris Mason5f39d392007-10-15 16:14:19 -04003614 if (btrfs_leaf_free_space(root, leaf) < 0) {
3615 btrfs_print_leaf(root, leaf);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003616 BUG();
Chris Mason5f39d392007-10-15 16:14:19 -04003617 }
Chris Mason44871b12009-03-13 10:04:31 -04003618}
3619
3620/*
3621 * Given a key and some data, insert items into the tree.
3622 * This does all the path init required, making room in the tree if needed.
3623 */
3624int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
3625 struct btrfs_root *root,
3626 struct btrfs_path *path,
3627 struct btrfs_key *cpu_key, u32 *data_size,
3628 int nr)
3629{
Chris Mason44871b12009-03-13 10:04:31 -04003630 int ret = 0;
3631 int slot;
3632 int i;
3633 u32 total_size = 0;
3634 u32 total_data = 0;
3635
3636 for (i = 0; i < nr; i++)
3637 total_data += data_size[i];
3638
3639 total_size = total_data + (nr * sizeof(struct btrfs_item));
3640 ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
3641 if (ret == 0)
3642 return -EEXIST;
3643 if (ret < 0)
Jeff Mahoney143bede2012-03-01 14:56:26 +01003644 return ret;
Chris Mason44871b12009-03-13 10:04:31 -04003645
Chris Mason44871b12009-03-13 10:04:31 -04003646 slot = path->slots[0];
3647 BUG_ON(slot < 0);
3648
Jeff Mahoney143bede2012-03-01 14:56:26 +01003649 setup_items_for_insert(trans, root, path, cpu_key, data_size,
Chris Mason44871b12009-03-13 10:04:31 -04003650 total_data, total_size, nr);
Jeff Mahoney143bede2012-03-01 14:56:26 +01003651 return 0;
Chris Mason62e27492007-03-15 12:56:47 -04003652}
3653
3654/*
3655 * Given a key and some data, insert an item into the tree.
3656 * This does all the path init required, making room in the tree if needed.
3657 */
Chris Masone089f052007-03-16 16:20:31 -04003658int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
3659 *root, struct btrfs_key *cpu_key, void *data, u32
3660 data_size)
Chris Mason62e27492007-03-15 12:56:47 -04003661{
3662 int ret = 0;
Chris Mason2c90e5d62007-04-02 10:50:19 -04003663 struct btrfs_path *path;
Chris Mason5f39d392007-10-15 16:14:19 -04003664 struct extent_buffer *leaf;
3665 unsigned long ptr;
Chris Mason62e27492007-03-15 12:56:47 -04003666
Chris Mason2c90e5d62007-04-02 10:50:19 -04003667 path = btrfs_alloc_path();
Tsutomu Itohdb5b4932011-03-23 08:14:16 +00003668 if (!path)
3669 return -ENOMEM;
Chris Mason2c90e5d62007-04-02 10:50:19 -04003670 ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
Chris Mason62e27492007-03-15 12:56:47 -04003671 if (!ret) {
Chris Mason5f39d392007-10-15 16:14:19 -04003672 leaf = path->nodes[0];
3673 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
3674 write_extent_buffer(leaf, data, ptr, data_size);
3675 btrfs_mark_buffer_dirty(leaf);
Chris Mason62e27492007-03-15 12:56:47 -04003676 }
Chris Mason2c90e5d62007-04-02 10:50:19 -04003677 btrfs_free_path(path);
Chris Masonaa5d6be2007-02-28 16:35:06 -05003678 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003679}
3680
Chris Mason74123bd2007-02-02 11:05:29 -05003681/*
Chris Mason5de08d72007-02-24 06:24:44 -05003682 * delete the pointer from a given node.
Chris Mason74123bd2007-02-02 11:05:29 -05003683 *
Chris Masond352ac62008-09-29 15:18:18 -04003684 * the tree should have been previously balanced so the deletion does not
3685 * empty a node.
Chris Mason74123bd2007-02-02 11:05:29 -05003686 */
Jeff Mahoney143bede2012-03-01 14:56:26 +01003687static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3688 struct btrfs_path *path, int level, int slot)
Chris Masonbe0e5c02007-01-26 15:51:26 -05003689{
Chris Mason5f39d392007-10-15 16:14:19 -04003690 struct extent_buffer *parent = path->nodes[level];
Chris Mason7518a232007-03-12 12:01:18 -04003691 u32 nritems;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003692
Chris Mason5f39d392007-10-15 16:14:19 -04003693 nritems = btrfs_header_nritems(parent);
Chris Masond3977122009-01-05 21:25:51 -05003694 if (slot != nritems - 1) {
Chris Mason5f39d392007-10-15 16:14:19 -04003695 memmove_extent_buffer(parent,
3696 btrfs_node_key_ptr_offset(slot),
3697 btrfs_node_key_ptr_offset(slot + 1),
Chris Masond6025572007-03-30 14:27:56 -04003698 sizeof(struct btrfs_key_ptr) *
3699 (nritems - slot - 1));
Chris Masonbb803952007-03-01 12:04:21 -05003700 }
Chris Mason7518a232007-03-12 12:01:18 -04003701 nritems--;
Chris Mason5f39d392007-10-15 16:14:19 -04003702 btrfs_set_header_nritems(parent, nritems);
Chris Mason7518a232007-03-12 12:01:18 -04003703 if (nritems == 0 && parent == root->node) {
Chris Mason5f39d392007-10-15 16:14:19 -04003704 BUG_ON(btrfs_header_level(root->node) != 1);
Chris Masonbb803952007-03-01 12:04:21 -05003705 /* just turn the root into a leaf and break */
Chris Mason5f39d392007-10-15 16:14:19 -04003706 btrfs_set_header_level(root->node, 0);
Chris Masonbb803952007-03-01 12:04:21 -05003707 } else if (slot == 0) {
Chris Mason5f39d392007-10-15 16:14:19 -04003708 struct btrfs_disk_key disk_key;
3709
3710 btrfs_node_key(parent, &disk_key, 0);
Jeff Mahoney143bede2012-03-01 14:56:26 +01003711 fixup_low_keys(trans, root, path, &disk_key, level + 1);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003712 }
Chris Masond6025572007-03-30 14:27:56 -04003713 btrfs_mark_buffer_dirty(parent);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003714}
3715
Chris Mason74123bd2007-02-02 11:05:29 -05003716/*
Chris Mason323ac952008-10-01 19:05:46 -04003717 * a helper function to delete the leaf pointed to by path->slots[1] and
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003718 * path->nodes[1].
Chris Mason323ac952008-10-01 19:05:46 -04003719 *
3720 * This deletes the pointer in path->nodes[1] and frees the leaf
3721 * block extent. zero is returned if it all worked out, < 0 otherwise.
3722 *
3723 * The path must have already been setup for deleting the leaf, including
3724 * all the proper balancing. path->nodes[1] must be locked.
3725 */
Jeff Mahoney143bede2012-03-01 14:56:26 +01003726static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans,
3727 struct btrfs_root *root,
3728 struct btrfs_path *path,
3729 struct extent_buffer *leaf)
Chris Mason323ac952008-10-01 19:05:46 -04003730{
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003731 WARN_ON(btrfs_header_generation(leaf) != trans->transid);
Jeff Mahoney143bede2012-03-01 14:56:26 +01003732 del_ptr(trans, root, path, 1, path->slots[1]);
Chris Mason323ac952008-10-01 19:05:46 -04003733
Chris Mason4d081c42009-02-04 09:31:28 -05003734 /*
3735 * btrfs_free_extent is expensive, we want to make sure we
3736 * aren't holding any locks when we call it
3737 */
3738 btrfs_unlock_up_safe(path, 0);
3739
Yan, Zhengf0486c62010-05-16 10:46:25 -04003740 root_sub_used(root, leaf->len);
3741
Arne Jansen66d7e7f2011-09-12 15:26:38 +02003742 btrfs_free_tree_block(trans, root, leaf, 0, 1, 0);
Chris Mason323ac952008-10-01 19:05:46 -04003743}
3744/*
Chris Mason74123bd2007-02-02 11:05:29 -05003745 * delete the item at the leaf level in path. If that empties
3746 * the leaf, remove it from the tree
3747 */
Chris Mason85e21ba2008-01-29 15:11:36 -05003748int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3749 struct btrfs_path *path, int slot, int nr)
Chris Masonbe0e5c02007-01-26 15:51:26 -05003750{
Chris Mason5f39d392007-10-15 16:14:19 -04003751 struct extent_buffer *leaf;
3752 struct btrfs_item *item;
Chris Mason85e21ba2008-01-29 15:11:36 -05003753 int last_off;
3754 int dsize = 0;
Chris Masonaa5d6be2007-02-28 16:35:06 -05003755 int ret = 0;
3756 int wret;
Chris Mason85e21ba2008-01-29 15:11:36 -05003757 int i;
Chris Mason7518a232007-03-12 12:01:18 -04003758 u32 nritems;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003759
Chris Mason5f39d392007-10-15 16:14:19 -04003760 leaf = path->nodes[0];
Chris Mason85e21ba2008-01-29 15:11:36 -05003761 last_off = btrfs_item_offset_nr(leaf, slot + nr - 1);
3762
3763 for (i = 0; i < nr; i++)
3764 dsize += btrfs_item_size_nr(leaf, slot + i);
3765
Chris Mason5f39d392007-10-15 16:14:19 -04003766 nritems = btrfs_header_nritems(leaf);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003767
Chris Mason85e21ba2008-01-29 15:11:36 -05003768 if (slot + nr != nritems) {
Chris Mason123abc82007-03-14 14:14:43 -04003769 int data_end = leaf_data_end(root, leaf);
Chris Mason5f39d392007-10-15 16:14:19 -04003770
3771 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
Chris Masond6025572007-03-30 14:27:56 -04003772 data_end + dsize,
3773 btrfs_leaf_data(leaf) + data_end,
Chris Mason85e21ba2008-01-29 15:11:36 -05003774 last_off - data_end);
Chris Mason5f39d392007-10-15 16:14:19 -04003775
Chris Mason85e21ba2008-01-29 15:11:36 -05003776 for (i = slot + nr; i < nritems; i++) {
Chris Mason5f39d392007-10-15 16:14:19 -04003777 u32 ioff;
Chris Masondb945352007-10-15 16:15:53 -04003778
Chris Mason5f39d392007-10-15 16:14:19 -04003779 item = btrfs_item_nr(leaf, i);
3780 ioff = btrfs_item_offset(leaf, item);
3781 btrfs_set_item_offset(leaf, item, ioff + dsize);
Chris Mason0783fcf2007-03-12 20:12:07 -04003782 }
Chris Masondb945352007-10-15 16:15:53 -04003783
Chris Mason5f39d392007-10-15 16:14:19 -04003784 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
Chris Mason85e21ba2008-01-29 15:11:36 -05003785 btrfs_item_nr_offset(slot + nr),
Chris Masond6025572007-03-30 14:27:56 -04003786 sizeof(struct btrfs_item) *
Chris Mason85e21ba2008-01-29 15:11:36 -05003787 (nritems - slot - nr));
Chris Masonbe0e5c02007-01-26 15:51:26 -05003788 }
Chris Mason85e21ba2008-01-29 15:11:36 -05003789 btrfs_set_header_nritems(leaf, nritems - nr);
3790 nritems -= nr;
Chris Mason5f39d392007-10-15 16:14:19 -04003791
Chris Mason74123bd2007-02-02 11:05:29 -05003792 /* delete the leaf if we've emptied it */
Chris Mason7518a232007-03-12 12:01:18 -04003793 if (nritems == 0) {
Chris Mason5f39d392007-10-15 16:14:19 -04003794 if (leaf == root->node) {
3795 btrfs_set_header_level(leaf, 0);
Chris Mason9a8dd152007-02-23 08:38:36 -05003796 } else {
Yan, Zhengf0486c62010-05-16 10:46:25 -04003797 btrfs_set_path_blocking(path);
3798 clean_tree_block(trans, root, leaf);
Jeff Mahoney143bede2012-03-01 14:56:26 +01003799 btrfs_del_leaf(trans, root, path, leaf);
Chris Mason9a8dd152007-02-23 08:38:36 -05003800 }
Chris Masonbe0e5c02007-01-26 15:51:26 -05003801 } else {
Chris Mason7518a232007-03-12 12:01:18 -04003802 int used = leaf_space_used(leaf, 0, nritems);
Chris Masonaa5d6be2007-02-28 16:35:06 -05003803 if (slot == 0) {
Chris Mason5f39d392007-10-15 16:14:19 -04003804 struct btrfs_disk_key disk_key;
3805
3806 btrfs_item_key(leaf, &disk_key, 0);
Jeff Mahoney143bede2012-03-01 14:56:26 +01003807 fixup_low_keys(trans, root, path, &disk_key, 1);
Chris Masonaa5d6be2007-02-28 16:35:06 -05003808 }
Chris Masonaa5d6be2007-02-28 16:35:06 -05003809
Chris Mason74123bd2007-02-02 11:05:29 -05003810 /* delete the leaf if it is mostly empty */
Yan Zhengd717aa12009-07-24 12:42:46 -04003811 if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) {
Chris Masonbe0e5c02007-01-26 15:51:26 -05003812 /* push_leaf_left fixes the path.
3813 * make sure the path still points to our leaf
3814 * for possible call to del_ptr below
3815 */
Chris Mason4920c9a2007-01-26 16:38:42 -05003816 slot = path->slots[1];
Chris Mason5f39d392007-10-15 16:14:19 -04003817 extent_buffer_get(leaf);
3818
Chris Masonb9473432009-03-13 11:00:37 -04003819 btrfs_set_path_blocking(path);
Chris Mason99d8f832010-07-07 10:51:48 -04003820 wret = push_leaf_left(trans, root, path, 1, 1,
3821 1, (u32)-1);
Chris Mason54aa1f42007-06-22 14:16:25 -04003822 if (wret < 0 && wret != -ENOSPC)
Chris Masonaa5d6be2007-02-28 16:35:06 -05003823 ret = wret;
Chris Mason5f39d392007-10-15 16:14:19 -04003824
3825 if (path->nodes[0] == leaf &&
3826 btrfs_header_nritems(leaf)) {
Chris Mason99d8f832010-07-07 10:51:48 -04003827 wret = push_leaf_right(trans, root, path, 1,
3828 1, 1, 0);
Chris Mason54aa1f42007-06-22 14:16:25 -04003829 if (wret < 0 && wret != -ENOSPC)
Chris Masonaa5d6be2007-02-28 16:35:06 -05003830 ret = wret;
3831 }
Chris Mason5f39d392007-10-15 16:14:19 -04003832
3833 if (btrfs_header_nritems(leaf) == 0) {
Chris Mason323ac952008-10-01 19:05:46 -04003834 path->slots[1] = slot;
Jeff Mahoney143bede2012-03-01 14:56:26 +01003835 btrfs_del_leaf(trans, root, path, leaf);
Chris Mason5f39d392007-10-15 16:14:19 -04003836 free_extent_buffer(leaf);
Jeff Mahoney143bede2012-03-01 14:56:26 +01003837 ret = 0;
Chris Mason5de08d72007-02-24 06:24:44 -05003838 } else {
Chris Mason925baed2008-06-25 16:01:30 -04003839 /* if we're still in the path, make sure
3840 * we're dirty. Otherwise, one of the
3841 * push_leaf functions must have already
3842 * dirtied this buffer
3843 */
3844 if (path->nodes[0] == leaf)
3845 btrfs_mark_buffer_dirty(leaf);
Chris Mason5f39d392007-10-15 16:14:19 -04003846 free_extent_buffer(leaf);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003847 }
Chris Masond5719762007-03-23 10:01:08 -04003848 } else {
Chris Mason5f39d392007-10-15 16:14:19 -04003849 btrfs_mark_buffer_dirty(leaf);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003850 }
3851 }
Chris Masonaa5d6be2007-02-28 16:35:06 -05003852 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003853}
3854
Chris Mason97571fd2007-02-24 13:39:08 -05003855/*
Chris Mason925baed2008-06-25 16:01:30 -04003856 * search the tree again to find a leaf with lesser keys
Chris Mason7bb86312007-12-11 09:25:06 -05003857 * returns 0 if it found something or 1 if there are no lesser leaves.
3858 * returns < 0 on io errors.
Chris Masond352ac62008-09-29 15:18:18 -04003859 *
3860 * This may release the path, and so you may lose any locks held at the
3861 * time you call it.
Chris Mason7bb86312007-12-11 09:25:06 -05003862 */
3863int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
3864{
Chris Mason925baed2008-06-25 16:01:30 -04003865 struct btrfs_key key;
3866 struct btrfs_disk_key found_key;
3867 int ret;
Chris Mason7bb86312007-12-11 09:25:06 -05003868
Chris Mason925baed2008-06-25 16:01:30 -04003869 btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
Chris Mason7bb86312007-12-11 09:25:06 -05003870
Chris Mason925baed2008-06-25 16:01:30 -04003871 if (key.offset > 0)
3872 key.offset--;
3873 else if (key.type > 0)
3874 key.type--;
3875 else if (key.objectid > 0)
3876 key.objectid--;
3877 else
3878 return 1;
Chris Mason7bb86312007-12-11 09:25:06 -05003879
David Sterbab3b4aa72011-04-21 01:20:15 +02003880 btrfs_release_path(path);
Chris Mason925baed2008-06-25 16:01:30 -04003881 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3882 if (ret < 0)
3883 return ret;
3884 btrfs_item_key(path->nodes[0], &found_key, 0);
3885 ret = comp_keys(&found_key, &key);
3886 if (ret < 0)
3887 return 0;
3888 return 1;
Chris Mason7bb86312007-12-11 09:25:06 -05003889}
3890
Chris Mason3f157a22008-06-25 16:01:31 -04003891/*
3892 * A helper function to walk down the tree starting at min_key, and looking
3893 * for nodes or leaves that are either in cache or have a minimum
Chris Masond352ac62008-09-29 15:18:18 -04003894 * transaction id. This is used by the btree defrag code, and tree logging
Chris Mason3f157a22008-06-25 16:01:31 -04003895 *
3896 * This does not cow, but it does stuff the starting key it finds back
3897 * into min_key, so you can call btrfs_search_slot with cow=1 on the
3898 * key and get a writable path.
3899 *
3900 * This does lock as it descends, and path->keep_locks should be set
3901 * to 1 by the caller.
3902 *
3903 * This honors path->lowest_level to prevent descent past a given level
3904 * of the tree.
3905 *
Chris Masond352ac62008-09-29 15:18:18 -04003906 * min_trans indicates the oldest transaction that you are interested
3907 * in walking through. Any nodes or leaves older than min_trans are
3908 * skipped over (without reading them).
3909 *
Chris Mason3f157a22008-06-25 16:01:31 -04003910 * returns zero if something useful was found, < 0 on error and 1 if there
3911 * was nothing in the tree that matched the search criteria.
3912 */
3913int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
Chris Masone02119d2008-09-05 16:13:11 -04003914 struct btrfs_key *max_key,
Chris Mason3f157a22008-06-25 16:01:31 -04003915 struct btrfs_path *path, int cache_only,
3916 u64 min_trans)
3917{
3918 struct extent_buffer *cur;
3919 struct btrfs_key found_key;
3920 int slot;
Yan96524802008-07-24 12:19:49 -04003921 int sret;
Chris Mason3f157a22008-06-25 16:01:31 -04003922 u32 nritems;
3923 int level;
3924 int ret = 1;
3925
Chris Mason934d3752008-12-08 16:43:10 -05003926 WARN_ON(!path->keep_locks);
Chris Mason3f157a22008-06-25 16:01:31 -04003927again:
Chris Masonbd681512011-07-16 15:23:14 -04003928 cur = btrfs_read_lock_root_node(root);
Chris Mason3f157a22008-06-25 16:01:31 -04003929 level = btrfs_header_level(cur);
Chris Masone02119d2008-09-05 16:13:11 -04003930 WARN_ON(path->nodes[level]);
Chris Mason3f157a22008-06-25 16:01:31 -04003931 path->nodes[level] = cur;
Chris Masonbd681512011-07-16 15:23:14 -04003932 path->locks[level] = BTRFS_READ_LOCK;
Chris Mason3f157a22008-06-25 16:01:31 -04003933
3934 if (btrfs_header_generation(cur) < min_trans) {
3935 ret = 1;
3936 goto out;
3937 }
Chris Masond3977122009-01-05 21:25:51 -05003938 while (1) {
Chris Mason3f157a22008-06-25 16:01:31 -04003939 nritems = btrfs_header_nritems(cur);
3940 level = btrfs_header_level(cur);
Yan96524802008-07-24 12:19:49 -04003941 sret = bin_search(cur, min_key, level, &slot);
Chris Mason3f157a22008-06-25 16:01:31 -04003942
Chris Mason323ac952008-10-01 19:05:46 -04003943 /* at the lowest level, we're done, setup the path and exit */
3944 if (level == path->lowest_level) {
Chris Masone02119d2008-09-05 16:13:11 -04003945 if (slot >= nritems)
3946 goto find_next_key;
Chris Mason3f157a22008-06-25 16:01:31 -04003947 ret = 0;
3948 path->slots[level] = slot;
3949 btrfs_item_key_to_cpu(cur, &found_key, slot);
3950 goto out;
3951 }
Yan96524802008-07-24 12:19:49 -04003952 if (sret && slot > 0)
3953 slot--;
Chris Mason3f157a22008-06-25 16:01:31 -04003954 /*
3955 * check this node pointer against the cache_only and
3956 * min_trans parameters. If it isn't in cache or is too
3957 * old, skip to the next one.
3958 */
Chris Masond3977122009-01-05 21:25:51 -05003959 while (slot < nritems) {
Chris Mason3f157a22008-06-25 16:01:31 -04003960 u64 blockptr;
3961 u64 gen;
3962 struct extent_buffer *tmp;
Chris Masone02119d2008-09-05 16:13:11 -04003963 struct btrfs_disk_key disk_key;
3964
Chris Mason3f157a22008-06-25 16:01:31 -04003965 blockptr = btrfs_node_blockptr(cur, slot);
3966 gen = btrfs_node_ptr_generation(cur, slot);
3967 if (gen < min_trans) {
3968 slot++;
3969 continue;
3970 }
3971 if (!cache_only)
3972 break;
3973
Chris Masone02119d2008-09-05 16:13:11 -04003974 if (max_key) {
3975 btrfs_node_key(cur, &disk_key, slot);
3976 if (comp_keys(&disk_key, max_key) >= 0) {
3977 ret = 1;
3978 goto out;
3979 }
3980 }
3981
Chris Mason3f157a22008-06-25 16:01:31 -04003982 tmp = btrfs_find_tree_block(root, blockptr,
3983 btrfs_level_size(root, level - 1));
3984
3985 if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
3986 free_extent_buffer(tmp);
3987 break;
3988 }
3989 if (tmp)
3990 free_extent_buffer(tmp);
3991 slot++;
3992 }
Chris Masone02119d2008-09-05 16:13:11 -04003993find_next_key:
Chris Mason3f157a22008-06-25 16:01:31 -04003994 /*
3995 * we didn't find a candidate key in this node, walk forward
3996 * and find another one
3997 */
3998 if (slot >= nritems) {
Chris Masone02119d2008-09-05 16:13:11 -04003999 path->slots[level] = slot;
Chris Masonb4ce94d2009-02-04 09:25:08 -05004000 btrfs_set_path_blocking(path);
Chris Masone02119d2008-09-05 16:13:11 -04004001 sret = btrfs_find_next_key(root, path, min_key, level,
Chris Mason3f157a22008-06-25 16:01:31 -04004002 cache_only, min_trans);
Chris Masone02119d2008-09-05 16:13:11 -04004003 if (sret == 0) {
David Sterbab3b4aa72011-04-21 01:20:15 +02004004 btrfs_release_path(path);
Chris Mason3f157a22008-06-25 16:01:31 -04004005 goto again;
4006 } else {
4007 goto out;
4008 }
4009 }
4010 /* save our key for returning back */
4011 btrfs_node_key_to_cpu(cur, &found_key, slot);
4012 path->slots[level] = slot;
4013 if (level == path->lowest_level) {
4014 ret = 0;
4015 unlock_up(path, level, 1);
4016 goto out;
4017 }
Chris Masonb4ce94d2009-02-04 09:25:08 -05004018 btrfs_set_path_blocking(path);
Chris Mason3f157a22008-06-25 16:01:31 -04004019 cur = read_node_slot(root, cur, slot);
Tsutomu Itoh97d9a8a2011-03-24 06:33:21 +00004020 BUG_ON(!cur);
Chris Mason3f157a22008-06-25 16:01:31 -04004021
Chris Masonbd681512011-07-16 15:23:14 -04004022 btrfs_tree_read_lock(cur);
Chris Masonb4ce94d2009-02-04 09:25:08 -05004023
Chris Masonbd681512011-07-16 15:23:14 -04004024 path->locks[level - 1] = BTRFS_READ_LOCK;
Chris Mason3f157a22008-06-25 16:01:31 -04004025 path->nodes[level - 1] = cur;
4026 unlock_up(path, level, 1);
Chris Masonbd681512011-07-16 15:23:14 -04004027 btrfs_clear_path_blocking(path, NULL, 0);
Chris Mason3f157a22008-06-25 16:01:31 -04004028 }
4029out:
4030 if (ret == 0)
4031 memcpy(min_key, &found_key, sizeof(found_key));
Chris Masonb4ce94d2009-02-04 09:25:08 -05004032 btrfs_set_path_blocking(path);
Chris Mason3f157a22008-06-25 16:01:31 -04004033 return ret;
4034}
4035
4036/*
4037 * this is similar to btrfs_next_leaf, but does not try to preserve
4038 * and fixup the path. It looks for and returns the next key in the
4039 * tree based on the current path and the cache_only and min_trans
4040 * parameters.
4041 *
4042 * 0 is returned if another key is found, < 0 if there are any errors
4043 * and 1 is returned if there are no higher keys in the tree
4044 *
4045 * path->keep_locks should be set to 1 on the search made before
4046 * calling this function.
4047 */
Chris Masone7a84562008-06-25 16:01:31 -04004048int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
Yan Zheng33c66f42009-07-22 09:59:00 -04004049 struct btrfs_key *key, int level,
Chris Mason3f157a22008-06-25 16:01:31 -04004050 int cache_only, u64 min_trans)
Chris Masone7a84562008-06-25 16:01:31 -04004051{
Chris Masone7a84562008-06-25 16:01:31 -04004052 int slot;
4053 struct extent_buffer *c;
4054
Chris Mason934d3752008-12-08 16:43:10 -05004055 WARN_ON(!path->keep_locks);
Chris Masond3977122009-01-05 21:25:51 -05004056 while (level < BTRFS_MAX_LEVEL) {
Chris Masone7a84562008-06-25 16:01:31 -04004057 if (!path->nodes[level])
4058 return 1;
4059
4060 slot = path->slots[level] + 1;
4061 c = path->nodes[level];
Chris Mason3f157a22008-06-25 16:01:31 -04004062next:
Chris Masone7a84562008-06-25 16:01:31 -04004063 if (slot >= btrfs_header_nritems(c)) {
Yan Zheng33c66f42009-07-22 09:59:00 -04004064 int ret;
4065 int orig_lowest;
4066 struct btrfs_key cur_key;
4067 if (level + 1 >= BTRFS_MAX_LEVEL ||
4068 !path->nodes[level + 1])
Chris Masone7a84562008-06-25 16:01:31 -04004069 return 1;
Yan Zheng33c66f42009-07-22 09:59:00 -04004070
4071 if (path->locks[level + 1]) {
4072 level++;
4073 continue;
4074 }
4075
4076 slot = btrfs_header_nritems(c) - 1;
4077 if (level == 0)
4078 btrfs_item_key_to_cpu(c, &cur_key, slot);
4079 else
4080 btrfs_node_key_to_cpu(c, &cur_key, slot);
4081
4082 orig_lowest = path->lowest_level;
David Sterbab3b4aa72011-04-21 01:20:15 +02004083 btrfs_release_path(path);
Yan Zheng33c66f42009-07-22 09:59:00 -04004084 path->lowest_level = level;
4085 ret = btrfs_search_slot(NULL, root, &cur_key, path,
4086 0, 0);
4087 path->lowest_level = orig_lowest;
4088 if (ret < 0)
4089 return ret;
4090
4091 c = path->nodes[level];
4092 slot = path->slots[level];
4093 if (ret == 0)
4094 slot++;
4095 goto next;
Chris Masone7a84562008-06-25 16:01:31 -04004096 }
Yan Zheng33c66f42009-07-22 09:59:00 -04004097
Chris Masone7a84562008-06-25 16:01:31 -04004098 if (level == 0)
4099 btrfs_item_key_to_cpu(c, key, slot);
Chris Mason3f157a22008-06-25 16:01:31 -04004100 else {
4101 u64 blockptr = btrfs_node_blockptr(c, slot);
4102 u64 gen = btrfs_node_ptr_generation(c, slot);
4103
4104 if (cache_only) {
4105 struct extent_buffer *cur;
4106 cur = btrfs_find_tree_block(root, blockptr,
4107 btrfs_level_size(root, level - 1));
4108 if (!cur || !btrfs_buffer_uptodate(cur, gen)) {
4109 slot++;
4110 if (cur)
4111 free_extent_buffer(cur);
4112 goto next;
4113 }
4114 free_extent_buffer(cur);
4115 }
4116 if (gen < min_trans) {
4117 slot++;
4118 goto next;
4119 }
Chris Masone7a84562008-06-25 16:01:31 -04004120 btrfs_node_key_to_cpu(c, key, slot);
Chris Mason3f157a22008-06-25 16:01:31 -04004121 }
Chris Masone7a84562008-06-25 16:01:31 -04004122 return 0;
4123 }
4124 return 1;
4125}
4126
Chris Mason7bb86312007-12-11 09:25:06 -05004127/*
Chris Mason925baed2008-06-25 16:01:30 -04004128 * search the tree again to find a leaf with greater keys
Chris Mason0f70abe2007-02-28 16:46:22 -05004129 * returns 0 if it found something or 1 if there are no greater leaves.
4130 * returns < 0 on io errors.
Chris Mason97571fd2007-02-24 13:39:08 -05004131 */
Chris Mason234b63a2007-03-13 10:46:10 -04004132int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
Chris Masond97e63b2007-02-20 16:40:44 -05004133{
4134 int slot;
Chris Mason8e73f272009-04-03 10:14:18 -04004135 int level;
Chris Mason5f39d392007-10-15 16:14:19 -04004136 struct extent_buffer *c;
Chris Mason8e73f272009-04-03 10:14:18 -04004137 struct extent_buffer *next;
Chris Mason925baed2008-06-25 16:01:30 -04004138 struct btrfs_key key;
4139 u32 nritems;
4140 int ret;
Chris Mason8e73f272009-04-03 10:14:18 -04004141 int old_spinning = path->leave_spinning;
Chris Masonbd681512011-07-16 15:23:14 -04004142 int next_rw_lock = 0;
Chris Mason925baed2008-06-25 16:01:30 -04004143
4144 nritems = btrfs_header_nritems(path->nodes[0]);
Chris Masond3977122009-01-05 21:25:51 -05004145 if (nritems == 0)
Chris Mason925baed2008-06-25 16:01:30 -04004146 return 1;
Chris Mason925baed2008-06-25 16:01:30 -04004147
Chris Mason8e73f272009-04-03 10:14:18 -04004148 btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
4149again:
4150 level = 1;
4151 next = NULL;
Chris Masonbd681512011-07-16 15:23:14 -04004152 next_rw_lock = 0;
David Sterbab3b4aa72011-04-21 01:20:15 +02004153 btrfs_release_path(path);
Chris Mason8e73f272009-04-03 10:14:18 -04004154
Chris Masona2135012008-06-25 16:01:30 -04004155 path->keep_locks = 1;
Chris Mason31533fb2011-07-26 16:01:59 -04004156 path->leave_spinning = 1;
Chris Mason8e73f272009-04-03 10:14:18 -04004157
Chris Mason925baed2008-06-25 16:01:30 -04004158 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4159 path->keep_locks = 0;
4160
4161 if (ret < 0)
4162 return ret;
4163
Chris Masona2135012008-06-25 16:01:30 -04004164 nritems = btrfs_header_nritems(path->nodes[0]);
Chris Mason168fd7d2008-06-25 16:01:30 -04004165 /*
4166 * by releasing the path above we dropped all our locks. A balance
4167 * could have added more items next to the key that used to be
4168 * at the very end of the block. So, check again here and
4169 * advance the path if there are now more items available.
4170 */
Chris Masona2135012008-06-25 16:01:30 -04004171 if (nritems > 0 && path->slots[0] < nritems - 1) {
Yan Zhenge457afe2009-07-22 09:59:00 -04004172 if (ret == 0)
4173 path->slots[0]++;
Chris Mason8e73f272009-04-03 10:14:18 -04004174 ret = 0;
Chris Mason925baed2008-06-25 16:01:30 -04004175 goto done;
4176 }
Chris Masond97e63b2007-02-20 16:40:44 -05004177
Chris Masond3977122009-01-05 21:25:51 -05004178 while (level < BTRFS_MAX_LEVEL) {
Chris Mason8e73f272009-04-03 10:14:18 -04004179 if (!path->nodes[level]) {
4180 ret = 1;
4181 goto done;
4182 }
Chris Mason5f39d392007-10-15 16:14:19 -04004183
Chris Masond97e63b2007-02-20 16:40:44 -05004184 slot = path->slots[level] + 1;
4185 c = path->nodes[level];
Chris Mason5f39d392007-10-15 16:14:19 -04004186 if (slot >= btrfs_header_nritems(c)) {
Chris Masond97e63b2007-02-20 16:40:44 -05004187 level++;
Chris Mason8e73f272009-04-03 10:14:18 -04004188 if (level == BTRFS_MAX_LEVEL) {
4189 ret = 1;
4190 goto done;
4191 }
Chris Masond97e63b2007-02-20 16:40:44 -05004192 continue;
4193 }
Chris Mason5f39d392007-10-15 16:14:19 -04004194
Chris Mason925baed2008-06-25 16:01:30 -04004195 if (next) {
Chris Masonbd681512011-07-16 15:23:14 -04004196 btrfs_tree_unlock_rw(next, next_rw_lock);
Chris Mason5f39d392007-10-15 16:14:19 -04004197 free_extent_buffer(next);
Chris Mason925baed2008-06-25 16:01:30 -04004198 }
Chris Mason5f39d392007-10-15 16:14:19 -04004199
Chris Mason8e73f272009-04-03 10:14:18 -04004200 next = c;
Chris Masonbd681512011-07-16 15:23:14 -04004201 next_rw_lock = path->locks[level];
Chris Mason8e73f272009-04-03 10:14:18 -04004202 ret = read_block_for_search(NULL, root, path, &next, level,
4203 slot, &key);
4204 if (ret == -EAGAIN)
4205 goto again;
Chris Mason5f39d392007-10-15 16:14:19 -04004206
Chris Mason76a05b32009-05-14 13:24:30 -04004207 if (ret < 0) {
David Sterbab3b4aa72011-04-21 01:20:15 +02004208 btrfs_release_path(path);
Chris Mason76a05b32009-05-14 13:24:30 -04004209 goto done;
4210 }
4211
Chris Mason5cd57b22008-06-25 16:01:30 -04004212 if (!path->skip_locking) {
Chris Masonbd681512011-07-16 15:23:14 -04004213 ret = btrfs_try_tree_read_lock(next);
Chris Mason8e73f272009-04-03 10:14:18 -04004214 if (!ret) {
4215 btrfs_set_path_blocking(path);
Chris Masonbd681512011-07-16 15:23:14 -04004216 btrfs_tree_read_lock(next);
Chris Mason31533fb2011-07-26 16:01:59 -04004217 btrfs_clear_path_blocking(path, next,
Chris Masonbd681512011-07-16 15:23:14 -04004218 BTRFS_READ_LOCK);
Chris Mason8e73f272009-04-03 10:14:18 -04004219 }
Chris Mason31533fb2011-07-26 16:01:59 -04004220 next_rw_lock = BTRFS_READ_LOCK;
Chris Mason5cd57b22008-06-25 16:01:30 -04004221 }
Chris Masond97e63b2007-02-20 16:40:44 -05004222 break;
4223 }
4224 path->slots[level] = slot;
Chris Masond3977122009-01-05 21:25:51 -05004225 while (1) {
Chris Masond97e63b2007-02-20 16:40:44 -05004226 level--;
4227 c = path->nodes[level];
Chris Mason925baed2008-06-25 16:01:30 -04004228 if (path->locks[level])
Chris Masonbd681512011-07-16 15:23:14 -04004229 btrfs_tree_unlock_rw(c, path->locks[level]);
Chris Mason8e73f272009-04-03 10:14:18 -04004230
Chris Mason5f39d392007-10-15 16:14:19 -04004231 free_extent_buffer(c);
Chris Masond97e63b2007-02-20 16:40:44 -05004232 path->nodes[level] = next;
4233 path->slots[level] = 0;
Chris Masona74a4b92008-06-25 16:01:31 -04004234 if (!path->skip_locking)
Chris Masonbd681512011-07-16 15:23:14 -04004235 path->locks[level] = next_rw_lock;
Chris Masond97e63b2007-02-20 16:40:44 -05004236 if (!level)
4237 break;
Chris Masonb4ce94d2009-02-04 09:25:08 -05004238
Chris Mason8e73f272009-04-03 10:14:18 -04004239 ret = read_block_for_search(NULL, root, path, &next, level,
4240 0, &key);
4241 if (ret == -EAGAIN)
4242 goto again;
4243
Chris Mason76a05b32009-05-14 13:24:30 -04004244 if (ret < 0) {
David Sterbab3b4aa72011-04-21 01:20:15 +02004245 btrfs_release_path(path);
Chris Mason76a05b32009-05-14 13:24:30 -04004246 goto done;
4247 }
4248
Chris Mason5cd57b22008-06-25 16:01:30 -04004249 if (!path->skip_locking) {
Chris Masonbd681512011-07-16 15:23:14 -04004250 ret = btrfs_try_tree_read_lock(next);
Chris Mason8e73f272009-04-03 10:14:18 -04004251 if (!ret) {
4252 btrfs_set_path_blocking(path);
Chris Masonbd681512011-07-16 15:23:14 -04004253 btrfs_tree_read_lock(next);
Chris Mason31533fb2011-07-26 16:01:59 -04004254 btrfs_clear_path_blocking(path, next,
Chris Masonbd681512011-07-16 15:23:14 -04004255 BTRFS_READ_LOCK);
Chris Mason8e73f272009-04-03 10:14:18 -04004256 }
Chris Mason31533fb2011-07-26 16:01:59 -04004257 next_rw_lock = BTRFS_READ_LOCK;
Chris Mason5cd57b22008-06-25 16:01:30 -04004258 }
Chris Masond97e63b2007-02-20 16:40:44 -05004259 }
Chris Mason8e73f272009-04-03 10:14:18 -04004260 ret = 0;
Chris Mason925baed2008-06-25 16:01:30 -04004261done:
4262 unlock_up(path, 0, 1);
Chris Mason8e73f272009-04-03 10:14:18 -04004263 path->leave_spinning = old_spinning;
4264 if (!old_spinning)
4265 btrfs_set_path_blocking(path);
4266
4267 return ret;
Chris Masond97e63b2007-02-20 16:40:44 -05004268}
Chris Mason0b86a832008-03-24 15:01:56 -04004269
Chris Mason3f157a22008-06-25 16:01:31 -04004270/*
4271 * this uses btrfs_prev_leaf to walk backwards in the tree, and keeps
4272 * searching until it gets past min_objectid or finds an item of 'type'
4273 *
4274 * returns 0 if something is found, 1 if nothing was found and < 0 on error
4275 */
Chris Mason0b86a832008-03-24 15:01:56 -04004276int btrfs_previous_item(struct btrfs_root *root,
4277 struct btrfs_path *path, u64 min_objectid,
4278 int type)
4279{
4280 struct btrfs_key found_key;
4281 struct extent_buffer *leaf;
Chris Masone02119d2008-09-05 16:13:11 -04004282 u32 nritems;
Chris Mason0b86a832008-03-24 15:01:56 -04004283 int ret;
4284
Chris Masond3977122009-01-05 21:25:51 -05004285 while (1) {
Chris Mason0b86a832008-03-24 15:01:56 -04004286 if (path->slots[0] == 0) {
Chris Masonb4ce94d2009-02-04 09:25:08 -05004287 btrfs_set_path_blocking(path);
Chris Mason0b86a832008-03-24 15:01:56 -04004288 ret = btrfs_prev_leaf(root, path);
4289 if (ret != 0)
4290 return ret;
4291 } else {
4292 path->slots[0]--;
4293 }
4294 leaf = path->nodes[0];
Chris Masone02119d2008-09-05 16:13:11 -04004295 nritems = btrfs_header_nritems(leaf);
4296 if (nritems == 0)
4297 return 1;
4298 if (path->slots[0] == nritems)
4299 path->slots[0]--;
4300
Chris Mason0b86a832008-03-24 15:01:56 -04004301 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
Chris Masone02119d2008-09-05 16:13:11 -04004302 if (found_key.objectid < min_objectid)
4303 break;
Yan Zheng0a4eefb2009-07-24 11:06:53 -04004304 if (found_key.type == type)
4305 return 0;
Chris Masone02119d2008-09-05 16:13:11 -04004306 if (found_key.objectid == min_objectid &&
4307 found_key.type < type)
4308 break;
Chris Mason0b86a832008-03-24 15:01:56 -04004309 }
4310 return 1;
4311}