]>
Commit | Line | Data |
---|---|---|
052932c9 AM |
1 | diff -urN linux-2.4.22.org/fs/buffer.c linux-2.4.22/fs/buffer.c |
2 | --- linux-2.4.22.org/fs/buffer.c 2003-11-21 15:08:24.000000000 +0100 | |
3 | +++ linux-2.4.22/fs/buffer.c 2003-11-21 15:14:23.000000000 +0100 | |
4 | @@ -659,6 +659,20 @@ | |
e57e653a JR |
5 | spin_unlock(&lru_list_lock); |
6 | } | |
7 | ||
8 | +void buffer_insert_list_journal_head(struct buffer_head *bh, | |
9 | + struct list_head *list, | |
10 | + void *journal_head) | |
11 | +{ | |
12 | + spin_lock(&lru_list_lock); | |
13 | + if (buffer_attached(bh)) | |
14 | + list_del(&bh->b_inode_buffers); | |
15 | + set_buffer_attached(bh); | |
16 | + list_add(&bh->b_inode_buffers, list); | |
17 | + bh->b_journal_head = journal_head; | |
18 | + spin_unlock(&lru_list_lock); | |
19 | +} | |
20 | +EXPORT_SYMBOL(buffer_insert_list_journal_head); | |
21 | + | |
22 | /* | |
23 | * The caller must have the lru_list lock before calling the | |
24 | * remove_inode_queue functions. | |
052932c9 | 25 | @@ -1370,7 +1384,7 @@ |
e57e653a JR |
26 | /* |
27 | * Called when truncating a buffer on a page completely. | |
28 | */ | |
29 | -static void discard_buffer(struct buffer_head * bh) | |
30 | +void discard_buffer(struct buffer_head * bh) | |
31 | { | |
32 | if (buffer_mapped(bh) || buffer_delay(bh)) { | |
33 | mark_buffer_clean(bh); | |
052932c9 AM |
34 | diff -urN linux-2.4.22.org/fs/inode.c linux-2.4.22/fs/inode.c |
35 | --- linux-2.4.22.org/fs/inode.c 2003-11-21 15:08:24.000000000 +0100 | |
36 | +++ linux-2.4.22/fs/inode.c 2003-11-21 15:14:23.000000000 +0100 | |
37 | @@ -476,7 +476,7 @@ | |
e57e653a JR |
38 | } |
39 | } | |
40 | ||
41 | -static void try_to_sync_unused_inodes(void * arg) | |
42 | +static void try_to_sync_unused_inodes(void) | |
43 | { | |
44 | struct super_block * sb; | |
45 | int nr_inodes = inodes_stat.nr_unused; | |
052932c9 | 46 | @@ -495,7 +495,8 @@ |
e57e653a JR |
47 | spin_unlock(&inode_lock); |
48 | } | |
49 | ||
50 | -static struct tq_struct unused_inodes_flush_task; | |
51 | +static DECLARE_WAIT_QUEUE_HEAD(kinoded_wait) ; | |
52 | +static atomic_t kinoded_goal = ATOMIC_INIT(0) ; | |
53 | ||
54 | /** | |
55 | * write_inode_now - write an inode to disk | |
052932c9 | 56 | @@ -758,7 +759,7 @@ |
e57e653a JR |
57 | !inode_has_buffers(inode)) |
58 | #define INODE(entry) (list_entry(entry, struct inode, i_list)) | |
59 | ||
60 | -void prune_icache(int goal) | |
61 | +static void _prune_icache(int goal) | |
62 | { | |
63 | LIST_HEAD(list); | |
64 | struct list_head *entry, *freeable = &list; | |
052932c9 | 65 | @@ -792,35 +793,29 @@ |
e57e653a JR |
66 | spin_unlock(&inode_lock); |
67 | ||
68 | dispose_list(freeable); | |
69 | + kmem_cache_shrink(inode_cachep); | |
70 | ||
71 | /* | |
72 | - * If we didn't freed enough clean inodes schedule | |
73 | - * a sync of the dirty inodes, we cannot do it | |
74 | - * from here or we're either synchronously dogslow | |
75 | - * or we deadlock with oom. | |
76 | + * If we didn't freed enough clean inodes | |
77 | + * start a sync now | |
78 | */ | |
79 | if (goal) | |
80 | - schedule_task(&unused_inodes_flush_task); | |
81 | + try_to_sync_unused_inodes(); | |
82 | +} | |
83 | + | |
84 | +void prune_icache(int goal) { | |
85 | + atomic_add(goal, &kinoded_goal); | |
86 | + if (atomic_read(&kinoded_goal) > 16) { | |
87 | + wake_up_interruptible(&kinoded_wait); | |
88 | + } | |
89 | } | |
90 | ||
91 | int shrink_icache_memory(int priority, int gfp_mask) | |
92 | { | |
93 | int count = 0; | |
94 | - | |
95 | - /* | |
96 | - * Nasty deadlock avoidance.. | |
97 | - * | |
98 | - * We may hold various FS locks, and we don't | |
99 | - * want to recurse into the FS that called us | |
100 | - * in clear_inode() and friends.. | |
101 | - */ | |
102 | - if (!(gfp_mask & __GFP_FS)) | |
103 | - return 0; | |
104 | - | |
105 | count = inodes_stat.nr_unused / priority; | |
106 | - | |
107 | prune_icache(count); | |
108 | - return kmem_cache_shrink(inode_cachep); | |
109 | + return 0; | |
110 | } | |
111 | ||
112 | /* | |
052932c9 | 113 | @@ -1198,6 +1193,34 @@ |
e57e653a JR |
114 | return res; |
115 | } | |
116 | ||
117 | +int kinoded(void *startup) { | |
118 | + | |
119 | + struct task_struct *tsk = current; | |
120 | + int goal ; | |
121 | + | |
122 | + daemonize(); | |
123 | + strcpy(tsk->comm, "kinoded"); | |
124 | + | |
125 | + /* avoid getting signals */ | |
126 | + spin_lock_irq(&tsk->sigmask_lock); | |
127 | + flush_signals(tsk); | |
128 | + sigfillset(&tsk->blocked); | |
129 | + recalc_sigpending(tsk); | |
130 | + spin_unlock_irq(&tsk->sigmask_lock); | |
131 | + | |
132 | + printk("kinoded started\n") ; | |
133 | + complete((struct completion *)startup); | |
134 | + while(1) { | |
135 | + wait_event_interruptible(kinoded_wait, | |
136 | + atomic_read(&kinoded_goal)); | |
137 | + while((goal = atomic_read(&kinoded_goal))) { | |
138 | + _prune_icache(goal); | |
139 | + atomic_sub(goal, &kinoded_goal); | |
140 | + cond_resched(); | |
141 | + } | |
142 | + } | |
143 | +} | |
144 | + | |
145 | /* | |
146 | * Initialize the hash tables. | |
147 | */ | |
052932c9 | 148 | @@ -1249,8 +1272,17 @@ |
e57e653a JR |
149 | NULL); |
150 | if (!inode_cachep) | |
151 | panic("cannot create inode slab cache"); | |
152 | +} | |
153 | ||
154 | - unused_inodes_flush_task.routine = try_to_sync_unused_inodes; | |
155 | +/* we need to start a thread, and inode_init happens too early for that | |
156 | +** to work. So, add a second init func through module_init | |
157 | +*/ | |
158 | +static int __init inode_mod_init(void) | |
159 | +{ | |
160 | + static struct completion startup __initdata = COMPLETION_INITIALIZER(startup); | |
161 | + kernel_thread(kinoded, &startup, CLONE_FS | CLONE_FILES | CLONE_SIGNAL); | |
162 | + wait_for_completion(&startup); | |
163 | + return 0; | |
164 | } | |
165 | ||
166 | /** | |
052932c9 | 167 | @@ -1344,3 +1376,5 @@ |
e57e653a JR |
168 | } |
169 | ||
170 | #endif | |
171 | + | |
172 | +module_init(inode_mod_init) ; | |
052932c9 AM |
173 | diff -urN linux-2.4.22.org/fs/reiserfs/bitmap.c linux-2.4.22/fs/reiserfs/bitmap.c |
174 | --- linux-2.4.22.org/fs/reiserfs/bitmap.c 2003-11-21 15:08:29.000000000 +0100 | |
175 | +++ linux-2.4.22/fs/reiserfs/bitmap.c 2003-11-21 15:14:23.000000000 +0100 | |
e57e653a JR |
176 | @@ -10,6 +10,7 @@ |
177 | #include <linux/errno.h> | |
178 | #include <linux/locks.h> | |
179 | #include <linux/kernel.h> | |
180 | +#include <linux/quotaops.h> | |
181 | ||
182 | #include <linux/reiserfs_fs.h> | |
183 | #include <linux/reiserfs_fs_sb.h> | |
184 | @@ -287,7 +288,8 @@ | |
185 | } | |
186 | ||
187 | static void _reiserfs_free_block (struct reiserfs_transaction_handle *th, | |
188 | - b_blocknr_t block) | |
189 | + struct inode *inode, b_blocknr_t block, | |
190 | + int for_unformatted) | |
191 | { | |
192 | struct super_block * s = th->t_super; | |
193 | struct reiserfs_super_block * rs; | |
194 | @@ -296,7 +298,6 @@ | |
195 | int nr, offset; | |
196 | ||
197 | PROC_INFO_INC( s, free_block ); | |
198 | - | |
199 | rs = SB_DISK_SUPER_BLOCK (s); | |
200 | sbh = SB_BUFFER_WITH_SB (s); | |
201 | apbi = SB_AP_BITMAP(s); | |
202 | @@ -309,7 +310,6 @@ | |
203 | block, bdevname(s->s_dev)); | |
204 | return; | |
205 | } | |
206 | - | |
207 | reiserfs_prepare_for_journal(s, apbi[nr].bh, 1 ) ; | |
208 | ||
209 | /* clear bit for the given block in bit map */ | |
210 | @@ -329,39 +329,55 @@ | |
211 | set_sb_free_blocks( rs, sb_free_blocks(rs) + 1 ); | |
212 | ||
213 | journal_mark_dirty (th, s, sbh); | |
214 | + if (for_unformatted) { | |
215 | +#ifdef REISERQUOTA_DEBUG | |
216 | + printk(KERN_DEBUG "reiserquota: freeing block id=%u\n", inode->i_uid); | |
217 | +#endif | |
218 | + DQUOT_FREE_BLOCK_NODIRTY(inode, 1); | |
219 | + } | |
220 | + | |
221 | } | |
222 | ||
223 | void reiserfs_free_block (struct reiserfs_transaction_handle *th, | |
224 | - unsigned long block) { | |
225 | + struct inode *inode, unsigned long block, | |
226 | + int for_unformatted) | |
227 | +{ | |
228 | struct super_block * s = th->t_super; | |
229 | ||
230 | RFALSE(!s, "vs-4061: trying to free block on nonexistent device"); | |
231 | RFALSE(is_reusable (s, block, 1) == 0, "vs-4071: can not free such block"); | |
232 | /* mark it before we clear it, just in case */ | |
233 | journal_mark_freed(th, s, block) ; | |
234 | - _reiserfs_free_block(th, block) ; | |
235 | + _reiserfs_free_block(th, inode, block, for_unformatted) ; | |
236 | } | |
237 | ||
238 | /* preallocated blocks don't need to be run through journal_mark_freed */ | |
239 | void reiserfs_free_prealloc_block (struct reiserfs_transaction_handle *th, | |
240 | - unsigned long block) { | |
241 | + struct inode *inode, | |
242 | + unsigned long block) | |
243 | +{ | |
244 | RFALSE(!th->t_super, "vs-4060: trying to free block on nonexistent device"); | |
245 | RFALSE(is_reusable (th->t_super, block, 1) == 0, "vs-4070: can not free such block"); | |
246 | - _reiserfs_free_block(th, block) ; | |
247 | + _reiserfs_free_block(th, inode, block, 1) ; | |
248 | } | |
249 | ||
250 | static void __discard_prealloc (struct reiserfs_transaction_handle * th, | |
251 | struct inode * inode) | |
252 | { | |
253 | unsigned long save = inode->u.reiserfs_i.i_prealloc_block ; | |
254 | + int dirty=0; | |
255 | #ifdef CONFIG_REISERFS_CHECK | |
256 | if (inode->u.reiserfs_i.i_prealloc_count < 0) | |
257 | reiserfs_warning(th->t_super, "zam-4001:%s: inode has negative prealloc blocks count.\n", __FUNCTION__ ); | |
258 | #endif | |
259 | while (inode->u.reiserfs_i.i_prealloc_count > 0) { | |
260 | - reiserfs_free_prealloc_block(th,inode->u.reiserfs_i.i_prealloc_block); | |
261 | + reiserfs_free_prealloc_block(th, inode, inode->u.reiserfs_i.i_prealloc_block); | |
262 | inode->u.reiserfs_i.i_prealloc_block++; | |
263 | inode->u.reiserfs_i.i_prealloc_count --; | |
264 | + dirty = 1 ; | |
265 | + } | |
266 | + if (dirty) { | |
267 | + reiserfs_update_sd(th, inode) ; | |
268 | } | |
269 | inode->u.reiserfs_i.i_prealloc_block = save ; | |
270 | list_del (&(inode->u.reiserfs_i.i_prealloc_list)); | |
271 | @@ -599,7 +615,6 @@ | |
272 | if (hint->formatted_node || hint->inode == NULL) { | |
273 | return 0; | |
274 | } | |
275 | - | |
276 | hash_in = le32_to_cpu((INODE_PKEY(hint->inode))->k_dir_id); | |
277 | border = hint->beg + (unsigned long) keyed_hash(((char *) (&hash_in)), 4) % (hint->end - hint->beg - 1); | |
278 | if (border > hint->search_start) | |
279 | @@ -776,6 +791,24 @@ | |
280 | int nr_allocated = 0; | |
281 | ||
282 | determine_prealloc_size(hint); | |
283 | + if (!hint->formatted_node) { | |
284 | + int quota_ret; | |
285 | +#ifdef REISERQUOTA_DEBUG | |
286 | + printk(KERN_DEBUG "reiserquota: allocating %d blocks id=%u\n", amount_needed, hint->inode->i_uid); | |
287 | +#endif | |
288 | + quota_ret = DQUOT_ALLOC_BLOCK_NODIRTY(hint->inode, amount_needed); | |
289 | + if (quota_ret) /* Quota exceeded? */ | |
290 | + return QUOTA_EXCEEDED; | |
291 | + if (hint->preallocate && hint->prealloc_size ) { | |
292 | +#ifdef REISERQUOTA_DEBUG | |
293 | + printk(KERN_DEBUG "reiserquota: allocating (prealloc) %d blocks id=%u\n", hint->prealloc_size, hint->inode->i_uid); | |
294 | +#endif | |
295 | + quota_ret = DQUOT_PREALLOC_BLOCK_NODIRTY(hint->inode, hint->prealloc_size); | |
296 | + if (quota_ret) | |
297 | + hint->preallocate=hint->prealloc_size=0; | |
298 | + } | |
299 | + } | |
300 | + | |
301 | while((nr_allocated | |
302 | += allocate_without_wrapping_disk(hint, new_blocknrs + nr_allocated, start, finish, | |
303 | amount_needed - nr_allocated, hint->prealloc_size)) | |
304 | @@ -783,8 +816,14 @@ | |
305 | ||
306 | /* not all blocks were successfully allocated yet*/ | |
307 | if (second_pass) { /* it was a second pass; we must free all blocks */ | |
308 | + if (!hint->formatted_node) { | |
309 | +#ifdef REISERQUOTA_DEBUG | |
310 | + printk(KERN_DEBUG "reiserquota: freeing (nospace) %d blocks id=%u\n", amount_needed + hint->prealloc_size - nr_allocated, hint->inode->i_uid); | |
311 | +#endif | |
312 | + DQUOT_FREE_BLOCK_NODIRTY(hint->inode, amount_needed + hint->prealloc_size - nr_allocated); /* Free not allocated blocks */ | |
313 | + } | |
314 | while (nr_allocated --) | |
315 | - reiserfs_free_block(hint->th, new_blocknrs[nr_allocated]); | |
316 | + reiserfs_free_block(hint->th, hint->inode, new_blocknrs[nr_allocated], !hint->formatted_node); | |
317 | ||
318 | return NO_DISK_SPACE; | |
319 | } else { /* refine search parameters for next pass */ | |
320 | @@ -794,6 +833,13 @@ | |
321 | continue; | |
322 | } | |
323 | } | |
324 | + if ( !hint->formatted_node && amount_needed + hint->prealloc_size > nr_allocated + INODE_INFO(hint->inode)->i_prealloc_count) { | |
325 | + /* Some of preallocation blocks were not allocated */ | |
326 | +#ifdef REISERQUOTA_DEBUG | |
327 | + printk(KERN_DEBUG "reiserquota: freeing (failed prealloc) %d blocks id=%u\n", amount_needed + hint->prealloc_size - nr_allocated - INODE_INFO(hint->inode)->i_prealloc_count, hint->inode->i_uid); | |
328 | +#endif | |
329 | + DQUOT_FREE_BLOCK_NODIRTY(hint->inode, amount_needed + hint->prealloc_size - nr_allocated - INODE_INFO(hint->inode)->i_prealloc_count); | |
330 | + } | |
331 | return CARRY_ON; | |
332 | } | |
333 | ||
334 | @@ -862,7 +908,7 @@ | |
335 | ||
336 | if (ret != CARRY_ON) { | |
337 | while (amount_needed ++ < initial_amount_needed) { | |
338 | - reiserfs_free_block(hint->th, *(--new_blocknrs)); | |
339 | + reiserfs_free_block(hint->th, hint->inode, *(--new_blocknrs), 1); | |
340 | } | |
341 | } | |
342 | return ret; | |
052932c9 AM |
343 | diff -urN linux-2.4.22.org/fs/reiserfs/do_balan.c linux-2.4.22/fs/reiserfs/do_balan.c |
344 | --- linux-2.4.22.org/fs/reiserfs/do_balan.c 2003-11-21 15:08:29.000000000 +0100 | |
345 | +++ linux-2.4.22/fs/reiserfs/do_balan.c 2003-11-21 15:14:23.000000000 +0100 | |
e57e653a JR |
346 | @@ -33,16 +33,8 @@ |
347 | inline void do_balance_mark_leaf_dirty (struct tree_balance * tb, | |
348 | struct buffer_head * bh, int flag) | |
349 | { | |
350 | - if (reiserfs_dont_log(tb->tb_sb)) { | |
351 | - if (!test_and_set_bit(BH_Dirty, &bh->b_state)) { | |
352 | - __mark_buffer_dirty(bh) ; | |
353 | - tb->need_balance_dirty = 1; | |
354 | - } | |
355 | - } else { | |
356 | - int windex = push_journal_writer("do_balance") ; | |
357 | - journal_mark_dirty(tb->transaction_handle, tb->transaction_handle->t_super, bh) ; | |
358 | - pop_journal_writer(windex) ; | |
359 | - } | |
360 | + journal_mark_dirty(tb->transaction_handle, | |
361 | + tb->transaction_handle->t_super, bh) ; | |
362 | } | |
363 | ||
364 | #define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty | |
365 | @@ -1247,7 +1239,7 @@ | |
366 | if (buffer_dirty (tb->thrown[i])) | |
367 | reiserfs_warning (tb->tb_sb, "free_thrown deals with dirty buffer %ld\n", blocknr); | |
368 | brelse(tb->thrown[i]) ; /* incremented in store_thrown */ | |
369 | - reiserfs_free_block (tb->transaction_handle, blocknr); | |
370 | + reiserfs_free_block (tb->transaction_handle, NULL, blocknr, 0); | |
371 | } | |
372 | } | |
373 | } | |
374 | @@ -1259,9 +1251,11 @@ | |
375 | set_blkh_level( blkh, FREE_LEVEL ); | |
376 | set_blkh_nr_item( blkh, 0 ); | |
377 | ||
378 | - mark_buffer_clean (bh); | |
379 | + if (buffer_dirty(bh)) | |
380 | + BUG(); | |
381 | + // mark_buffer_clean (bh); | |
382 | /* reiserfs_free_block is no longer schedule safe | |
383 | - reiserfs_free_block (tb->transaction_handle, tb->tb_sb, bh->b_blocknr); | |
384 | + reiserfs_free_block (tb->transaction_handle, NULL, tb->tb_sb, bh->b_blocknr, 0); | |
385 | */ | |
386 | ||
387 | store_thrown (tb, bh); | |
388 | @@ -1575,6 +1569,7 @@ | |
389 | tb->tb_mode = flag; | |
390 | tb->need_balance_dirty = 0; | |
391 | ||
392 | + reiserfs_check_lock_depth("do balance"); | |
393 | if (FILESYSTEM_CHANGED_TB(tb)) { | |
394 | reiserfs_panic(tb->tb_sb, "clm-6000: do_balance, fs generation has changed\n") ; | |
395 | } | |
396 | @@ -1605,5 +1600,6 @@ | |
397 | ||
398 | ||
399 | do_balance_completed (tb); | |
400 | + reiserfs_check_lock_depth("do balance2"); | |
401 | ||
402 | } | |
052932c9 AM |
403 | diff -urN linux-2.4.22.org/fs/reiserfs/file.c linux-2.4.22/fs/reiserfs/file.c |
404 | --- linux-2.4.22.org/fs/reiserfs/file.c 2003-11-21 15:08:29.000000000 +0100 | |
405 | +++ linux-2.4.22/fs/reiserfs/file.c 2003-11-21 15:14:23.000000000 +0100 | |
e57e653a JR |
406 | @@ -6,6 +6,7 @@ |
407 | #include <linux/sched.h> | |
408 | #include <linux/reiserfs_fs.h> | |
409 | #include <linux/smp_lock.h> | |
410 | +#include <linux/quotaops.h> | |
411 | ||
412 | /* | |
413 | ** We pack the tails of files on file close, not at the time they are written. | |
414 | @@ -42,7 +43,6 @@ | |
415 | lock_kernel() ; | |
416 | down (&inode->i_sem); | |
417 | journal_begin(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3) ; | |
418 | - reiserfs_update_inode_transaction(inode) ; | |
419 | ||
420 | #ifdef REISERFS_PREALLOCATE | |
421 | reiserfs_discard_prealloc (&th, inode); | |
422 | @@ -93,7 +93,9 @@ | |
423 | static int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) { | |
424 | struct inode *inode = dentry->d_inode ; | |
425 | int error ; | |
426 | - if (attr->ia_valid & ATTR_SIZE) { | |
427 | + unsigned int ia_valid = attr->ia_valid ; | |
428 | + | |
429 | + if (ia_valid & ATTR_SIZE) { | |
430 | /* version 2 items will be caught by the s_maxbytes check | |
431 | ** done for us in vmtruncate | |
432 | */ | |
433 | @@ -101,8 +103,17 @@ | |
434 | attr->ia_size > MAX_NON_LFS) | |
435 | return -EFBIG ; | |
436 | ||
437 | + /* During a truncate, we have to make sure the new i_size is in | |
438 | + ** the transaction before we start dropping updates to data logged | |
439 | + ** or ordered write data pages. | |
440 | + */ | |
441 | + if (attr->ia_size < inode->i_size && reiserfs_file_data_log(inode)) { | |
442 | + struct reiserfs_transaction_handle th ; | |
443 | + journal_begin(&th, inode->i_sb, 1) ; | |
444 | + reiserfs_update_sd_size(&th, inode, attr->ia_size) ; | |
445 | + journal_end(&th, inode->i_sb, 1) ; | |
446 | /* fill in hole pointers in the expanding truncate case. */ | |
447 | - if (attr->ia_size > inode->i_size) { | |
448 | + } else if (attr->ia_size > inode->i_size) { | |
449 | error = generic_cont_expand(inode, attr->ia_size) ; | |
450 | if (inode->u.reiserfs_i.i_prealloc_count > 0) { | |
451 | struct reiserfs_transaction_handle th ; | |
452 | @@ -123,15 +134,35 @@ | |
453 | return -EINVAL; | |
454 | ||
455 | error = inode_change_ok(inode, attr) ; | |
456 | - if (!error) | |
457 | - inode_setattr(inode, attr) ; | |
458 | + if (!error) { | |
459 | + if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || | |
460 | + (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) | |
461 | + error = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0; | |
462 | ||
463 | + if (!error) | |
464 | + error = inode_setattr(inode, attr) ; | |
465 | + } | |
466 | return error ; | |
467 | } | |
468 | ||
469 | +static ssize_t | |
470 | +reiserfs_file_write(struct file *f, const char *b, size_t count, loff_t *ppos) | |
471 | +{ | |
472 | + ssize_t ret; | |
473 | + struct inode *inode = f->f_dentry->d_inode; | |
474 | + | |
475 | + ret = generic_file_write(f, b, count, ppos); | |
476 | + if (ret >= 0 && f->f_flags & O_SYNC) { | |
477 | + lock_kernel(); | |
478 | + reiserfs_commit_for_inode(inode); | |
479 | + unlock_kernel(); | |
480 | + } | |
481 | + return ret; | |
482 | +} | |
483 | + | |
484 | struct file_operations reiserfs_file_operations = { | |
485 | read: generic_file_read, | |
486 | - write: generic_file_write, | |
487 | + write: reiserfs_file_write, | |
488 | ioctl: reiserfs_ioctl, | |
489 | mmap: generic_file_mmap, | |
490 | release: reiserfs_file_release, | |
052932c9 AM |
491 | diff -urN linux-2.4.22.org/fs/reiserfs/fix_node.c linux-2.4.22/fs/reiserfs/fix_node.c |
492 | --- linux-2.4.22.org/fs/reiserfs/fix_node.c 2003-11-21 15:08:29.000000000 +0100 | |
493 | +++ linux-2.4.22/fs/reiserfs/fix_node.c 2003-11-21 15:14:23.000000000 +0100 | |
e57e653a JR |
494 | @@ -795,8 +795,9 @@ |
495 | else /* If we have enough already then there is nothing to do. */ | |
496 | return CARRY_ON; | |
497 | ||
498 | - if ( reiserfs_new_form_blocknrs (p_s_tb, a_n_blocknrs, | |
499 | - n_amount_needed) == NO_DISK_SPACE ) | |
500 | + /* No need to check quota - is not allocated for blocks used for formatted nodes */ | |
501 | + if (reiserfs_new_form_blocknrs (p_s_tb, a_n_blocknrs, | |
502 | + n_amount_needed) == NO_DISK_SPACE) | |
503 | return NO_DISK_SPACE; | |
504 | ||
505 | /* for each blocknumber we just got, get a buffer and stick it on FEB */ | |
506 | @@ -2121,7 +2122,8 @@ | |
507 | ||
508 | static void clear_all_dirty_bits(struct super_block *s, | |
509 | struct buffer_head *bh) { | |
510 | - reiserfs_prepare_for_journal(s, bh, 0) ; | |
511 | + // reiserfs_prepare_for_journal(s, bh, 0) ; | |
512 | + set_bit(BH_JPrepared, &bh->b_state) ; | |
513 | } | |
514 | ||
515 | static int wait_tb_buffers_until_unlocked (struct tree_balance * p_s_tb) | |
516 | @@ -2518,7 +2520,7 @@ | |
517 | /* de-allocated block which was not used by balancing and | |
518 | bforget about buffer for it */ | |
519 | brelse (tb->FEB[i]); | |
520 | - reiserfs_free_block (tb->transaction_handle, blocknr); | |
521 | + reiserfs_free_block (tb->transaction_handle, NULL, blocknr, 0); | |
522 | } | |
523 | if (tb->used[i]) { | |
524 | /* release used as new nodes including a new root */ | |
052932c9 AM |
525 | diff -urN linux-2.4.22.org/fs/reiserfs/ibalance.c linux-2.4.22/fs/reiserfs/ibalance.c |
526 | --- linux-2.4.22.org/fs/reiserfs/ibalance.c 2003-11-21 15:08:29.000000000 +0100 | |
527 | +++ linux-2.4.22/fs/reiserfs/ibalance.c 2003-11-21 15:14:23.000000000 +0100 | |
e57e653a JR |
528 | @@ -632,7 +632,6 @@ |
529 | /* use check_internal if new root is an internal node */ | |
530 | check_internal (new_root); | |
531 | /*&&&&&&&&&&&&&&&&&&&&&&*/ | |
532 | - tb->tb_sb->s_dirt = 1; | |
533 | ||
534 | /* do what is needed for buffer thrown from tree */ | |
535 | reiserfs_invalidate_buffer(tb, tbSh); | |
536 | @@ -950,7 +949,6 @@ | |
537 | PUT_SB_ROOT_BLOCK( tb->tb_sb, tbSh->b_blocknr ); | |
538 | PUT_SB_TREE_HEIGHT( tb->tb_sb, SB_TREE_HEIGHT(tb->tb_sb) + 1 ); | |
539 | do_balance_mark_sb_dirty (tb, tb->tb_sb->u.reiserfs_sb.s_sbh, 1); | |
540 | - tb->tb_sb->s_dirt = 1; | |
541 | } | |
542 | ||
543 | if ( tb->blknum[h] == 2 ) { | |
052932c9 AM |
544 | diff -urN linux-2.4.22.org/fs/reiserfs/inode.c linux-2.4.22/fs/reiserfs/inode.c |
545 | --- linux-2.4.22.org/fs/reiserfs/inode.c 2003-11-21 15:08:29.000000000 +0100 | |
546 | +++ linux-2.4.22/fs/reiserfs/inode.c 2003-11-21 15:14:23.000000000 +0100 | |
e57e653a JR |
547 | @@ -4,9 +4,11 @@ |
548 | ||
549 | #include <linux/config.h> | |
550 | #include <linux/sched.h> | |
551 | +#include <linux/fs.h> | |
552 | #include <linux/reiserfs_fs.h> | |
553 | #include <linux/locks.h> | |
554 | #include <linux/smp_lock.h> | |
555 | +#include <linux/quotaops.h> | |
556 | #include <asm/uaccess.h> | |
557 | #include <asm/unaligned.h> | |
558 | ||
559 | @@ -17,6 +19,8 @@ | |
560 | #define GET_BLOCK_READ_DIRECT 4 /* read the tail if indirect item not found */ | |
561 | #define GET_BLOCK_NO_ISEM 8 /* i_sem is not held, don't preallocate */ | |
562 | ||
563 | +static int reiserfs_commit_write(struct file *, struct page *, | |
564 | + unsigned from, unsigned to) ; | |
565 | static int reiserfs_get_block (struct inode * inode, long block, | |
566 | struct buffer_head * bh_result, int create); | |
567 | ||
568 | @@ -33,6 +37,7 @@ | |
569 | ||
570 | lock_kernel() ; | |
571 | ||
572 | + DQUOT_FREE_INODE(inode); | |
573 | /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */ | |
574 | if (INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */ | |
575 | down (&inode->i_sem); | |
576 | @@ -106,9 +111,13 @@ | |
577 | } | |
578 | ||
579 | static void add_to_flushlist(struct inode *inode, struct buffer_head *bh) { | |
580 | - struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb) ; | |
581 | + struct reiserfs_journal_list *jl = SB_JOURNAL(inode->i_sb)->j_current_jl; | |
582 | + buffer_insert_list_journal_head(bh, &jl->j_ordered_bh_list, jl); | |
583 | +} | |
584 | ||
585 | - buffer_insert_list(bh, &j->j_dirty_buffers) ; | |
586 | +static void add_to_tail_list(struct inode *inode, struct buffer_head *bh) { | |
587 | + struct reiserfs_journal_list *jl = SB_JOURNAL(inode->i_sb)->j_current_jl; | |
588 | + buffer_insert_list_journal_head(bh, &jl->j_tail_bh_list, jl); | |
589 | } | |
590 | ||
591 | // | |
592 | @@ -201,15 +210,16 @@ | |
593 | return 0; | |
594 | } | |
595 | ||
596 | -/*static*/ void restart_transaction(struct reiserfs_transaction_handle *th, | |
597 | - struct inode *inode, struct path *path) { | |
598 | - struct super_block *s = th->t_super ; | |
599 | - int len = th->t_blocks_allocated ; | |
600 | - | |
601 | +static void restart_transaction(struct reiserfs_transaction_handle *th, | |
602 | + struct inode *inode, struct path *path, | |
603 | + int jbegin_count) { | |
604 | + /* we cannot restart while nested unless the parent allows it */ | |
605 | + if (!reiserfs_restartable_handle(th) && th->t_refcount > 1) { | |
606 | + return ; | |
607 | + } | |
608 | pathrelse(path) ; | |
609 | reiserfs_update_sd(th, inode) ; | |
610 | - journal_end(th, s, len) ; | |
611 | - journal_begin(th, s, len) ; | |
612 | + reiserfs_restart_transaction(th, jbegin_count) ; | |
613 | reiserfs_update_inode_transaction(inode) ; | |
614 | } | |
615 | ||
616 | @@ -327,6 +337,10 @@ | |
617 | } | |
618 | } | |
619 | p += offset ; | |
620 | + if ((offset + inode->i_sb->s_blocksize) > PAGE_CACHE_SIZE) { | |
621 | +printk("get_block_create_0 offset %lu too large\n", offset); | |
622 | + } | |
623 | + | |
624 | memset (p, 0, inode->i_sb->s_blocksize); | |
625 | do { | |
626 | if (!is_direct_le_ih (ih)) { | |
627 | @@ -421,10 +435,32 @@ | |
628 | static int reiserfs_get_block_direct_io (struct inode * inode, long block, | |
629 | struct buffer_head * bh_result, int create) { | |
630 | int ret ; | |
631 | - | |
632 | + struct reiserfs_transaction_handle *th; | |
633 | + int refcount = 0; | |
634 | + struct super_block *s = inode->i_sb; | |
635 | + | |
636 | + /* get_block might start a new transaction and leave it running. | |
637 | + * test for that by checking for a transaction running right now | |
638 | + * and recording its refcount. Run a journal_end if the refcount | |
639 | + * after reiserfs_get_block is higher than it was before. | |
640 | + */ | |
641 | + if (reiserfs_transaction_running(s)) { | |
642 | + th = current->journal_info; | |
643 | + refcount = th->t_refcount; | |
644 | + } | |
645 | bh_result->b_page = NULL; | |
646 | ret = reiserfs_get_block(inode, block, bh_result, create) ; | |
647 | ||
648 | + if (!ret && reiserfs_transaction_running(s)) { | |
649 | + th = current->journal_info; | |
650 | + if (th->t_refcount > refcount) { | |
651 | + lock_kernel(); | |
652 | + reiserfs_update_sd(th, inode) ; | |
653 | + journal_end(th, s, th->t_blocks_allocated); | |
654 | + unlock_kernel(); | |
655 | + } | |
656 | + } | |
657 | + | |
658 | /* don't allow direct io onto tail pages */ | |
659 | if (ret == 0 && buffer_mapped(bh_result) && bh_result->b_blocknr == 0) { | |
660 | /* make sure future calls to the direct io funcs for this offset | |
661 | @@ -459,7 +495,6 @@ | |
662 | struct buffer_head *bh_result, | |
663 | loff_t tail_offset) { | |
664 | unsigned long index ; | |
665 | - unsigned long tail_end ; | |
666 | unsigned long tail_start ; | |
667 | struct page * tail_page ; | |
668 | struct page * hole_page = bh_result->b_page ; | |
669 | @@ -470,7 +505,6 @@ | |
670 | ||
671 | /* always try to read until the end of the block */ | |
672 | tail_start = tail_offset & (PAGE_CACHE_SIZE - 1) ; | |
673 | - tail_end = (tail_start | (bh_result->b_size - 1)) + 1 ; | |
674 | ||
675 | index = tail_offset >> PAGE_CACHE_SHIFT ; | |
676 | if ( !hole_page || index != hole_page->index) { | |
677 | @@ -492,16 +526,13 @@ | |
678 | ** data that has been read directly into the page, and block_prepare_write | |
679 | ** won't trigger a get_block in this case. | |
680 | */ | |
681 | - fix_tail_page_for_writing(tail_page) ; | |
682 | - retval = block_prepare_write(tail_page, tail_start, tail_end, | |
683 | - reiserfs_get_block) ; | |
684 | + retval = reiserfs_prepare_write(NULL, tail_page, tail_start, tail_start) ; | |
685 | if (retval) | |
686 | goto unlock ; | |
687 | ||
688 | /* tail conversion might change the data in the page */ | |
689 | flush_dcache_page(tail_page) ; | |
690 | - | |
691 | - retval = generic_commit_write(NULL, tail_page, tail_start, tail_end) ; | |
692 | + retval = reiserfs_commit_write(NULL, tail_page, tail_start, tail_start) ; | |
693 | ||
694 | unlock: | |
695 | if (tail_page != hole_page) { | |
696 | @@ -541,20 +572,34 @@ | |
697 | int done; | |
698 | int fs_gen; | |
699 | int windex ; | |
700 | - struct reiserfs_transaction_handle th ; | |
701 | + struct reiserfs_transaction_handle *th = NULL ; | |
702 | /* space reserved in transaction batch: | |
703 | . 3 balancings in direct->indirect conversion | |
704 | . 1 block involved into reiserfs_update_sd() | |
705 | + . 1 bitmap block | |
706 | XXX in practically impossible worst case direct2indirect() | |
707 | - can incur (much) more that 3 balancings. */ | |
708 | - int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 1; | |
709 | + can incur (much) more that 3 balancings, but we deal with | |
710 | + direct2indirect lower down */ | |
711 | + int jbegin_count = JOURNAL_PER_BALANCE_CNT + 2; | |
712 | int version; | |
713 | - int transaction_started = 0 ; | |
714 | + int dangle = 1; | |
715 | loff_t new_offset = (((loff_t)block) << inode->i_sb->s_blocksize_bits) + 1 ; | |
716 | + int blocks_per_page = PAGE_CACHE_SIZE >> inode->i_blkbits; | |
717 | ||
718 | - /* bad.... */ | |
719 | + /* if this block might contain a tail, we need to be more conservative */ | |
720 | + if (new_offset <= (loff_t)(16 * 1024)) { | |
721 | + jbegin_count += JOURNAL_PER_BALANCE_CNT * 2; | |
722 | + } | |
723 | + /* we might nest for the entire page, so we need to make sure | |
724 | + * to reserve enough to insert pointers in the tree for each block | |
725 | + * in the file | |
726 | + */ | |
727 | + jbegin_count *= blocks_per_page; | |
728 | + if (reiserfs_file_data_log(inode)) { | |
729 | + jbegin_count += blocks_per_page; | |
730 | + | |
731 | + } | |
732 | lock_kernel() ; | |
733 | - th.t_trans_id = 0 ; | |
734 | version = get_inode_item_key_version (inode); | |
735 | ||
736 | if (block < 0) { | |
737 | @@ -579,6 +624,10 @@ | |
738 | return ret; | |
739 | } | |
740 | ||
741 | + /* don't leave the trans running if we are already nested */ | |
742 | + if (reiserfs_transaction_running(inode->i_sb)) | |
743 | + dangle = 0; | |
744 | + | |
745 | /* If file is of such a size, that it might have a tail and tails are enabled | |
746 | ** we should mark it as possibly needing tail packing on close | |
747 | */ | |
748 | @@ -591,10 +640,18 @@ | |
749 | /* set the key of the first byte in the 'block'-th block of file */ | |
750 | make_cpu_key (&key, inode, new_offset, | |
751 | TYPE_ANY, 3/*key length*/); | |
752 | + | |
753 | + /* reiserfs_commit_write will close any transaction currently | |
754 | + ** running. So, if we are nesting into someone else, we have to | |
755 | + ** make sure and bump the refcount | |
756 | + */ | |
757 | if ((new_offset + inode->i_sb->s_blocksize - 1) > inode->i_size) { | |
758 | - journal_begin(&th, inode->i_sb, jbegin_count) ; | |
759 | + th = reiserfs_persistent_transaction(inode->i_sb, jbegin_count) ; | |
760 | + if (IS_ERR(th)) { | |
761 | + retval = PTR_ERR(th) ; | |
762 | + goto failure ; | |
763 | + } | |
764 | reiserfs_update_inode_transaction(inode) ; | |
765 | - transaction_started = 1 ; | |
766 | } | |
767 | research: | |
768 | ||
769 | @@ -614,28 +671,34 @@ | |
770 | ||
771 | if (allocation_needed (retval, allocated_block_nr, ih, item, pos_in_item)) { | |
772 | /* we have to allocate block for the unformatted node */ | |
773 | - if (!transaction_started) { | |
774 | + if (!reiserfs_active_handle(th)) { | |
775 | pathrelse(&path) ; | |
776 | - journal_begin(&th, inode->i_sb, jbegin_count) ; | |
777 | + th = reiserfs_persistent_transaction(inode->i_sb, jbegin_count) ; | |
778 | + if (IS_ERR(th)) { | |
779 | + retval = PTR_ERR(th) ; | |
780 | + goto failure ; | |
781 | + } | |
782 | reiserfs_update_inode_transaction(inode) ; | |
783 | - transaction_started = 1 ; | |
784 | goto research ; | |
785 | } | |
786 | ||
787 | - repeat = _allocate_block(&th, block, inode, &allocated_block_nr, &path, create); | |
788 | + repeat = _allocate_block(th, block, inode, &allocated_block_nr, &path, create); | |
789 | ||
790 | - if (repeat == NO_DISK_SPACE) { | |
791 | + if (repeat == NO_DISK_SPACE || repeat == QUOTA_EXCEEDED) { | |
792 | /* restart the transaction to give the journal a chance to free | |
793 | ** some blocks. releases the path, so we have to go back to | |
794 | ** research if we succeed on the second try | |
795 | */ | |
796 | - restart_transaction(&th, inode, &path) ; | |
797 | - repeat = _allocate_block(&th, block, inode, &allocated_block_nr, NULL, create); | |
798 | + restart_transaction(th, inode, &path, jbegin_count) ; | |
799 | + repeat = _allocate_block(th, block, inode, &allocated_block_nr, NULL, create); | |
800 | ||
801 | - if (repeat != NO_DISK_SPACE) { | |
802 | + if (repeat != NO_DISK_SPACE && repeat != QUOTA_EXCEEDED) { | |
803 | goto research ; | |
804 | } | |
805 | - retval = -ENOSPC; | |
806 | + if (repeat == QUOTA_EXCEEDED) | |
807 | + retval = -EDQUOT; | |
808 | + else | |
809 | + retval = -ENOSPC; | |
810 | goto failure; | |
811 | } | |
812 | ||
813 | @@ -660,15 +723,12 @@ | |
814 | bh_result->b_state |= (1UL << BH_New); | |
815 | put_block_num(item, pos_in_item, allocated_block_nr) ; | |
816 | unfm_ptr = allocated_block_nr; | |
817 | - journal_mark_dirty (&th, inode->i_sb, bh); | |
818 | - inode->i_blocks += (inode->i_sb->s_blocksize / 512) ; | |
819 | - reiserfs_update_sd(&th, inode) ; | |
820 | + journal_mark_dirty (th, inode->i_sb, bh); | |
821 | + reiserfs_update_sd(th, inode) ; | |
822 | } | |
823 | set_block_dev_mapped(bh_result, unfm_ptr, inode); | |
824 | pathrelse (&path); | |
825 | pop_journal_writer(windex) ; | |
826 | - if (transaction_started) | |
827 | - journal_end(&th, inode->i_sb, jbegin_count) ; | |
828 | ||
829 | unlock_kernel() ; | |
830 | ||
831 | @@ -676,18 +736,23 @@ | |
832 | ** there is no need to make sure the inode is updated with this | |
833 | ** transaction | |
834 | */ | |
835 | + if (!dangle && reiserfs_active_handle(th)) | |
836 | + journal_end(th, inode->i_sb, jbegin_count) ; | |
837 | return 0; | |
838 | } | |
839 | ||
840 | - if (!transaction_started) { | |
841 | + if (!reiserfs_active_handle(th)) { | |
842 | /* if we don't pathrelse, we could vs-3050 on the buffer if | |
843 | ** someone is waiting for it (they can't finish until the buffer | |
844 | - ** is released, we can start a new transaction until they finish) | |
845 | + ** is released, we can't start a new transaction until they finish) | |
846 | */ | |
847 | pathrelse(&path) ; | |
848 | - journal_begin(&th, inode->i_sb, jbegin_count) ; | |
849 | + th = reiserfs_persistent_transaction(inode->i_sb, jbegin_count) ; | |
850 | + if (IS_ERR(th)) { | |
851 | + retval = PTR_ERR(th) ; | |
852 | + goto failure ; | |
853 | + } | |
854 | reiserfs_update_inode_transaction(inode) ; | |
855 | - transaction_started = 1 ; | |
856 | goto research; | |
857 | } | |
858 | ||
859 | @@ -716,13 +781,11 @@ | |
860 | set_cpu_key_k_offset (&tmp_key, 1); | |
861 | PATH_LAST_POSITION(&path) ++; | |
862 | ||
863 | - retval = reiserfs_insert_item (&th, &path, &tmp_key, &tmp_ih, (char *)&unp); | |
864 | + retval = reiserfs_insert_item (th, &path, &tmp_key, &tmp_ih, inode, (char *)&unp); | |
865 | if (retval) { | |
866 | - reiserfs_free_block (&th, allocated_block_nr); | |
867 | - goto failure; // retval == -ENOSPC or -EIO or -EEXIST | |
868 | + reiserfs_free_block (th, inode, allocated_block_nr, 1); | |
869 | + goto failure; // retval == -ENOSPC, -EDQUOT or -EIO or -EEXIST | |
870 | } | |
871 | - if (unp) | |
872 | - inode->i_blocks += inode->i_sb->s_blocksize / 512; | |
873 | //mark_tail_converted (inode); | |
874 | } else if (is_direct_le_ih (ih)) { | |
875 | /* direct item has to be converted */ | |
876 | @@ -742,8 +805,13 @@ | |
877 | node. FIXME: this should also get into page cache */ | |
878 | ||
879 | pathrelse(&path) ; | |
880 | - journal_end(&th, inode->i_sb, jbegin_count) ; | |
881 | - transaction_started = 0 ; | |
882 | + /* ugly, but we should only end the transaction if | |
883 | + ** we aren't nested | |
884 | + */ | |
885 | + if (th->t_refcount == 1) { | |
886 | + journal_end(th, inode->i_sb, jbegin_count) ; | |
887 | + th = NULL ; | |
888 | + } | |
889 | ||
890 | retval = convert_tail_for_hole(inode, bh_result, tail_offset) ; | |
891 | if (retval) { | |
892 | @@ -751,20 +819,27 @@ | |
893 | reiserfs_warning(inode->i_sb, "clm-6004: convert tail failed inode %lu, error %d\n", inode->i_ino, retval) ; | |
894 | if (allocated_block_nr) { | |
895 | /* the bitmap, the super, and the stat data == 3 */ | |
896 | - journal_begin(&th, inode->i_sb, 3) ; | |
897 | - reiserfs_free_block (&th, allocated_block_nr); | |
898 | - transaction_started = 1 ; | |
899 | + if (!reiserfs_active_handle(th)) { | |
900 | + th = reiserfs_persistent_transaction(inode->i_sb,3); | |
901 | + } | |
902 | + if (!IS_ERR(th)) { | |
903 | + reiserfs_free_block(th,inode,allocated_block_nr,1); | |
904 | + } | |
905 | + | |
906 | } | |
907 | goto failure ; | |
908 | } | |
909 | goto research ; | |
910 | } | |
911 | - retval = direct2indirect (&th, inode, &path, unbh, tail_offset); | |
912 | + retval = direct2indirect (th, inode, &path, unbh, tail_offset); | |
913 | if (retval) { | |
914 | reiserfs_unmap_buffer(unbh); | |
915 | - reiserfs_free_block (&th, allocated_block_nr); | |
916 | + reiserfs_free_block (th, inode, allocated_block_nr, 1); | |
917 | goto failure; | |
918 | } | |
919 | + | |
920 | + reiserfs_update_sd(th, inode) ; | |
921 | + | |
922 | /* it is important the mark_buffer_uptodate is done after | |
923 | ** the direct2indirect. The buffer might contain valid | |
924 | ** data newer than the data on disk (read by readpage, changed, | |
925 | @@ -775,24 +850,25 @@ | |
926 | */ | |
927 | mark_buffer_uptodate (unbh, 1); | |
928 | ||
929 | - /* unbh->b_page == NULL in case of DIRECT_IO request, this means | |
930 | - buffer will disappear shortly, so it should not be added to | |
931 | - any of our lists. | |
932 | + /* we've converted the tail, so we must | |
933 | + ** flush unbh before the transaction commits. | |
934 | + ** unbh->b_page will be NULL for direct io requests, and | |
935 | + ** in that case there's no data to log, dirty or order | |
936 | */ | |
937 | if ( unbh->b_page ) { | |
938 | - /* we've converted the tail, so we must | |
939 | - ** flush unbh before the transaction commits | |
940 | - */ | |
941 | - add_to_flushlist(inode, unbh) ; | |
942 | - | |
943 | - /* mark it dirty now to prevent commit_write from adding | |
944 | - ** this buffer to the inode's dirty buffer list | |
945 | - */ | |
946 | - __mark_buffer_dirty(unbh) ; | |
947 | + if (reiserfs_file_data_log(inode)) { | |
948 | + reiserfs_prepare_for_journal(inode->i_sb, unbh, 1) ; | |
949 | + journal_mark_dirty(th, inode->i_sb, unbh) ; | |
950 | + } else { | |
951 | + /* mark it dirty now to prevent commit_write from adding | |
952 | + ** this buffer to the inode's dirty buffer list | |
953 | + */ | |
954 | + __mark_buffer_dirty(unbh) ; | |
955 | + /* note, this covers the data=ordered case too */ | |
956 | + add_to_tail_list(inode, unbh) ; | |
957 | + } | |
958 | } | |
959 | ||
960 | - //inode->i_blocks += inode->i_sb->s_blocksize / 512; | |
961 | - //mark_tail_converted (inode); | |
962 | } else { | |
963 | /* append indirect item with holes if needed, when appending | |
964 | pointer to 'block'-th block use block, which is already | |
965 | @@ -840,18 +916,16 @@ | |
966 | only have space for one block */ | |
967 | blocks_needed=max_to_insert?max_to_insert:1; | |
968 | } | |
969 | - retval = reiserfs_paste_into_item (&th, &path, &tmp_key, (char *)un, UNFM_P_SIZE * blocks_needed); | |
970 | + retval = reiserfs_paste_into_item (th, &path, &tmp_key, inode, (char *)un, UNFM_P_SIZE * blocks_needed); | |
971 | ||
972 | if (blocks_needed != 1) | |
973 | kfree(un); | |
974 | ||
975 | if (retval) { | |
976 | - reiserfs_free_block (&th, allocated_block_nr); | |
977 | + reiserfs_free_block (th, inode, allocated_block_nr, 1); | |
978 | goto failure; | |
979 | } | |
980 | - if (done) { | |
981 | - inode->i_blocks += inode->i_sb->s_blocksize / 512; | |
982 | - } else { | |
983 | + if (!done) { | |
984 | /* We need to mark new file size in case this function will be | |
985 | interrupted/aborted later on. And we may do this only for | |
986 | holes. */ | |
987 | @@ -870,9 +944,12 @@ | |
988 | ** | |
989 | ** release the path so that anybody waiting on the path before | |
990 | ** ending their transaction will be able to continue. | |
991 | + ** | |
992 | + ** this only happens when inserting holes into the file, so it | |
993 | + ** does not affect data=ordered safety at all | |
994 | */ | |
995 | - if (journal_transaction_should_end(&th, th.t_blocks_allocated)) { | |
996 | - restart_transaction(&th, inode, &path) ; | |
997 | + if (journal_transaction_should_end(th, jbegin_count)) { | |
998 | + restart_transaction(th, inode, &path, jbegin_count) ; | |
999 | } | |
1000 | /* inserting indirect pointers for a hole can take a | |
1001 | ** long time. reschedule if needed | |
1002 | @@ -890,7 +967,7 @@ | |
1003 | "%K should not be found\n", &key); | |
1004 | retval = -EEXIST; | |
1005 | if (allocated_block_nr) | |
1006 | - reiserfs_free_block (&th, allocated_block_nr); | |
1007 | + reiserfs_free_block (th, inode, allocated_block_nr, 1); | |
1008 | pathrelse(&path) ; | |
1009 | goto failure; | |
1010 | } | |
1011 | @@ -902,20 +979,82 @@ | |
1012 | ||
1013 | ||
1014 | retval = 0; | |
1015 | - reiserfs_check_path(&path) ; | |
1016 | ||
1017 | failure: | |
1018 | - if (transaction_started) { | |
1019 | - reiserfs_update_sd(&th, inode) ; | |
1020 | - journal_end(&th, inode->i_sb, jbegin_count) ; | |
1021 | + pathrelse(&path) ; | |
1022 | + /* if we had an error, end the transaction */ | |
1023 | + if (!IS_ERR(th) && reiserfs_active_handle(th)) { | |
1024 | + if (retval != 0) { | |
1025 | + reiserfs_update_sd(th, inode) ; | |
1026 | + journal_end(th, inode->i_sb, jbegin_count) ; | |
1027 | + th = NULL ; | |
1028 | + } else if (!dangle) { | |
1029 | + journal_end(th, inode->i_sb, jbegin_count) ; | |
1030 | + th = NULL ; | |
1031 | + } | |
1032 | } | |
1033 | pop_journal_writer(windex) ; | |
1034 | + if (retval == 0 && reiserfs_active_handle(th) && | |
1035 | + current->journal_info != th) { | |
1036 | + BUG() ; | |
1037 | + } | |
1038 | unlock_kernel() ; | |
1039 | - reiserfs_check_path(&path) ; | |
1040 | return retval; | |
1041 | } | |
1042 | ||
1043 | ||
1044 | +/* Compute real number of used bytes by file | |
1045 | + * Following three functions can go away when we'll have enough space in stat item | |
1046 | + */ | |
1047 | +static int real_space_diff(struct inode *inode, int sd_size) | |
1048 | +{ | |
1049 | + int bytes; | |
1050 | + loff_t blocksize = inode->i_sb->s_blocksize ; | |
1051 | + | |
1052 | + if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) | |
1053 | + return sd_size ; | |
1054 | + | |
1055 | + /* End of file is also in full block with indirect reference, so round | |
1056 | + ** up to the next block. | |
1057 | + ** | |
1058 | + ** there is just no way to know if the tail is actually packed | |
1059 | + ** on the file, so we have to assume it isn't. When we pack the | |
1060 | + ** tail, we add 4 bytes to pretend there really is an unformatted | |
1061 | + ** node pointer | |
1062 | + */ | |
1063 | + bytes = ((inode->i_size + (blocksize-1)) >> inode->i_sb->s_blocksize_bits) * UNFM_P_SIZE + sd_size; | |
1064 | + return bytes ; | |
1065 | +} | |
1066 | + | |
1067 | +static inline loff_t to_real_used_space(struct inode *inode, ulong blocks, | |
1068 | + int sd_size) | |
1069 | +{ | |
1070 | + if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) { | |
1071 | + return inode->i_size + (loff_t)(real_space_diff(inode, sd_size)) ; | |
1072 | + } | |
1073 | + return ((loff_t)real_space_diff(inode, sd_size)) + (((loff_t)blocks) << 9); | |
1074 | +} | |
1075 | + | |
1076 | +/* Compute number of blocks used by file in ReiserFS counting */ | |
1077 | +static inline ulong to_fake_used_blocks(struct inode *inode, int sd_size) | |
1078 | +{ | |
1079 | + loff_t bytes = inode_get_bytes(inode) ; | |
1080 | + loff_t real_space = real_space_diff(inode, sd_size) ; | |
1081 | + | |
1082 | + /* keeps fsck and non-quota versions of reiserfs happy */ | |
1083 | + if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) { | |
1084 | + bytes += (loff_t)511 ; | |
1085 | + } | |
1086 | + | |
1087 | + /* files from before the quota patch might i_blocks such that | |
1088 | + ** bytes < real_space. Deal with that here to prevent it from | |
1089 | + ** going negative. | |
1090 | + */ | |
1091 | + if (bytes < real_space) | |
1092 | + return 0 ; | |
1093 | + return (bytes - real_space) >> 9; | |
1094 | +} | |
1095 | + | |
1096 | // | |
1097 | // BAD: new directories have stat data of new type and all other items | |
1098 | // of old type. Version stored in the inode says about body items, so | |
1099 | @@ -971,6 +1110,14 @@ | |
1100 | ||
1101 | rdev = sd_v1_rdev(sd); | |
1102 | inode->u.reiserfs_i.i_first_direct_byte = sd_v1_first_direct_byte(sd); | |
1103 | + /* an early bug in the quota code can give us an odd number for the | |
1104 | + ** block count. This is incorrect, fix it here. | |
1105 | + */ | |
1106 | + if (inode->i_blocks & 1) { | |
1107 | + inode->i_blocks++ ; | |
1108 | + } | |
1109 | + inode_set_bytes(inode, to_real_used_space(inode, inode->i_blocks, | |
1110 | + SD_V1_SIZE)); | |
1111 | /* nopack is initially zero for v1 objects. For v2 objects, | |
1112 | nopack is initialised from sd_attrs */ | |
1113 | inode->u.reiserfs_i.i_flags &= ~i_nopack_mask; | |
1114 | @@ -1000,6 +1147,8 @@ | |
1115 | set_inode_item_key_version (inode, KEY_FORMAT_3_6); | |
1116 | ||
1117 | set_inode_sd_version (inode, STAT_DATA_V2); | |
1118 | + inode_set_bytes(inode, to_real_used_space(inode, inode->i_blocks, | |
1119 | + SD_V2_SIZE)); | |
1120 | /* read persistent inode attributes from sd and initalise | |
1121 | generic inode flags from them */ | |
1122 | inode -> u.reiserfs_i.i_attrs = sd_v2_attrs( sd ); | |
1123 | @@ -1026,7 +1175,7 @@ | |
1124 | ||
1125 | ||
1126 | // update new stat data with inode fields | |
1127 | -static void inode2sd (void * sd, struct inode * inode) | |
1128 | +static void inode2sd (void * sd, struct inode * inode, loff_t new_size) | |
1129 | { | |
1130 | struct stat_data * sd_v2 = (struct stat_data *)sd; | |
1131 | __u16 flags; | |
1132 | @@ -1034,12 +1183,12 @@ | |
1133 | set_sd_v2_mode(sd_v2, inode->i_mode ); | |
1134 | set_sd_v2_nlink(sd_v2, inode->i_nlink ); | |
1135 | set_sd_v2_uid(sd_v2, inode->i_uid ); | |
1136 | - set_sd_v2_size(sd_v2, inode->i_size ); | |
1137 | + set_sd_v2_size(sd_v2, new_size); | |
1138 | set_sd_v2_gid(sd_v2, inode->i_gid ); | |
1139 | set_sd_v2_mtime(sd_v2, inode->i_mtime ); | |
1140 | set_sd_v2_atime(sd_v2, inode->i_atime ); | |
1141 | set_sd_v2_ctime(sd_v2, inode->i_ctime ); | |
1142 | - set_sd_v2_blocks(sd_v2, inode->i_blocks ); | |
1143 | + set_sd_v2_blocks(sd_v2, to_fake_used_blocks(inode, SD_V2_SIZE)); | |
1144 | if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) | |
1145 | set_sd_v2_rdev(sd_v2, inode->i_rdev ); | |
1146 | else | |
1147 | @@ -1051,7 +1200,7 @@ | |
1148 | ||
1149 | ||
1150 | // used to copy inode's fields to old stat data | |
1151 | -static void inode2sd_v1 (void * sd, struct inode * inode) | |
1152 | +static void inode2sd_v1 (void * sd, struct inode * inode, loff_t new_size) | |
1153 | { | |
1154 | struct stat_data_v1 * sd_v1 = (struct stat_data_v1 *)sd; | |
1155 | ||
1156 | @@ -1059,7 +1208,7 @@ | |
1157 | set_sd_v1_uid(sd_v1, inode->i_uid ); | |
1158 | set_sd_v1_gid(sd_v1, inode->i_gid ); | |
1159 | set_sd_v1_nlink(sd_v1, inode->i_nlink ); | |
1160 | - set_sd_v1_size(sd_v1, inode->i_size ); | |
1161 | + set_sd_v1_size(sd_v1, new_size); | |
1162 | set_sd_v1_atime(sd_v1, inode->i_atime ); | |
1163 | set_sd_v1_ctime(sd_v1, inode->i_ctime ); | |
1164 | set_sd_v1_mtime(sd_v1, inode->i_mtime ); | |
1165 | @@ -1067,7 +1216,7 @@ | |
1166 | if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) | |
1167 | set_sd_v1_rdev(sd_v1, inode->i_rdev ); | |
1168 | else | |
1169 | - set_sd_v1_blocks(sd_v1, inode->i_blocks ); | |
1170 | + set_sd_v1_blocks(sd_v1, to_fake_used_blocks(inode, SD_V1_SIZE)); | |
1171 | ||
1172 | // Sigh. i_first_direct_byte is back | |
1173 | set_sd_v1_first_direct_byte(sd_v1, inode->u.reiserfs_i.i_first_direct_byte); | |
1174 | @@ -1077,7 +1226,8 @@ | |
1175 | /* NOTE, you must prepare the buffer head before sending it here, | |
1176 | ** and then log it after the call | |
1177 | */ | |
1178 | -static void update_stat_data (struct path * path, struct inode * inode) | |
1179 | +static void update_stat_data (struct path * path, struct inode * inode, | |
1180 | + loff_t new_size) | |
1181 | { | |
1182 | struct buffer_head * bh; | |
1183 | struct item_head * ih; | |
1184 | @@ -1091,17 +1241,16 @@ | |
1185 | ||
1186 | if (stat_data_v1 (ih)) { | |
1187 | // path points to old stat data | |
1188 | - inode2sd_v1 (B_I_PITEM (bh, ih), inode); | |
1189 | + inode2sd_v1 (B_I_PITEM (bh, ih), inode, new_size); | |
1190 | } else { | |
1191 | - inode2sd (B_I_PITEM (bh, ih), inode); | |
1192 | + inode2sd (B_I_PITEM (bh, ih), inode, new_size); | |
1193 | } | |
1194 | ||
1195 | return; | |
1196 | } | |
1197 | ||
1198 | - | |
1199 | -void reiserfs_update_sd (struct reiserfs_transaction_handle *th, | |
1200 | - struct inode * inode) | |
1201 | +void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th, | |
1202 | + struct inode *inode, loff_t new_size) | |
1203 | { | |
1204 | struct cpu_key key; | |
1205 | INITIALIZE_PATH(path); | |
1206 | @@ -1151,7 +1300,7 @@ | |
1207 | } | |
1208 | break; | |
1209 | } | |
1210 | - update_stat_data (&path, inode); | |
1211 | + update_stat_data (&path, inode, new_size); | |
1212 | journal_mark_dirty(th, th->t_super, bh) ; | |
1213 | pathrelse (&path); | |
1214 | return; | |
1215 | @@ -1236,6 +1385,7 @@ | |
1216 | reiserfs_make_bad_inode( inode ); | |
1217 | } | |
1218 | ||
1219 | + reiserfs_update_inode_transaction(inode); | |
1220 | reiserfs_check_path(&path_to_sd) ; /* init inode should be relsing */ | |
1221 | ||
1222 | } | |
1223 | @@ -1415,8 +1565,6 @@ | |
1224 | ** does something when called for a synchronous update. | |
1225 | */ | |
1226 | void reiserfs_write_inode (struct inode * inode, int do_sync) { | |
1227 | - struct reiserfs_transaction_handle th ; | |
1228 | - int jbegin_count = 1 ; | |
1229 | ||
1230 | if (inode->i_sb->s_flags & MS_RDONLY) { | |
1231 | reiserfs_warning(inode->i_sb, "clm-6005: writing inode %lu on readonly FS\n", | |
1232 | @@ -1430,9 +1578,7 @@ | |
1233 | */ | |
1234 | if (do_sync && !(current->flags & PF_MEMALLOC)) { | |
1235 | lock_kernel() ; | |
1236 | - journal_begin(&th, inode->i_sb, jbegin_count) ; | |
1237 | - reiserfs_update_sd (&th, inode); | |
1238 | - journal_end_sync(&th, inode->i_sb, jbegin_count) ; | |
1239 | + reiserfs_commit_for_inode(inode) ; | |
1240 | unlock_kernel() ; | |
1241 | } | |
1242 | } | |
1243 | @@ -1450,6 +1596,7 @@ | |
1244 | /* stat data of new object is inserted already, this inserts the item | |
1245 | containing "." and ".." entries */ | |
1246 | static int reiserfs_new_directory (struct reiserfs_transaction_handle *th, | |
1247 | + struct inode *inode, | |
1248 | struct item_head * ih, struct path * path, | |
1249 | const struct inode * dir) | |
1250 | { | |
1251 | @@ -1494,13 +1641,14 @@ | |
1252 | } | |
1253 | ||
1254 | /* insert item, that is empty directory item */ | |
1255 | - return reiserfs_insert_item (th, path, &key, ih, body); | |
1256 | + return reiserfs_insert_item (th, path, &key, ih, inode, body); | |
1257 | } | |
1258 | ||
1259 | ||
1260 | /* stat data of object has been inserted, this inserts the item | |
1261 | containing the body of symlink */ | |
1262 | static int reiserfs_new_symlink (struct reiserfs_transaction_handle *th, | |
1263 | + struct inode *inode, /* Inode of symlink */ | |
1264 | struct item_head * ih, | |
1265 | struct path * path, const char * symname, int item_len) | |
1266 | { | |
1267 | @@ -1530,7 +1678,7 @@ | |
1268 | } | |
1269 | ||
1270 | /* insert item, that is body of symlink */ | |
1271 | - return reiserfs_insert_item (th, path, &key, ih, symname); | |
1272 | + return reiserfs_insert_item (th, path, &key, ih, inode, symname); | |
1273 | } | |
1274 | ||
1275 | ||
1276 | @@ -1604,7 +1752,8 @@ | |
1277 | ||
1278 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | |
1279 | inode->i_size = i_size; | |
1280 | - inode->i_blocks = (inode->i_size + 511) >> 9; | |
1281 | + inode->i_blocks = 0; | |
1282 | + inode->i_bytes = 0; | |
1283 | inode->u.reiserfs_i.i_first_direct_byte = S_ISLNK(mode) ? 1 : | |
1284 | U32_MAX/*NO_BYTES_IN_DIRECT_ITEM*/; | |
1285 | ||
1286 | @@ -1638,9 +1787,9 @@ | |
1287 | err = -EINVAL; | |
1288 | goto out_bad_inode; | |
1289 | } | |
1290 | - inode2sd_v1 (&sd, inode); | |
1291 | + inode2sd_v1 (&sd, inode, inode->i_size); | |
1292 | } else | |
1293 | - inode2sd (&sd, inode); | |
1294 | + inode2sd (&sd, inode, inode->i_size); | |
1295 | ||
1296 | // these do not go to on-disk stat data | |
1297 | inode->i_ino = le32_to_cpu (ih.ih_key.k_objectid); | |
1298 | @@ -1665,7 +1814,7 @@ | |
1299 | if (dir->u.reiserfs_i.new_packing_locality) | |
1300 | th->displace_new_blocks = 1; | |
1301 | #endif | |
1302 | - retval = reiserfs_insert_item (th, &path_to_key, &key, &ih, (char *)(&sd)); | |
1303 | + retval = reiserfs_insert_item (th, &path_to_key, &key, &ih, inode, (char *)(&sd)); | |
1304 | if (retval) { | |
1305 | reiserfs_check_path(&path_to_key) ; | |
1306 | err = retval; | |
1307 | @@ -1678,14 +1827,14 @@ | |
1308 | #endif | |
1309 | if (S_ISDIR(mode)) { | |
1310 | /* insert item with "." and ".." */ | |
1311 | - retval = reiserfs_new_directory (th, &ih, &path_to_key, dir); | |
1312 | + retval = reiserfs_new_directory (th, inode, &ih, &path_to_key, dir); | |
1313 | } | |
1314 | ||
1315 | if (S_ISLNK(mode)) { | |
1316 | /* insert body of symlink */ | |
1317 | if (!old_format_only (sb)) | |
1318 | i_size = ROUND_UP(i_size); | |
1319 | - retval = reiserfs_new_symlink (th, &ih, &path_to_key, symname, i_size); | |
1320 | + retval = reiserfs_new_symlink (th, inode, &ih, &path_to_key, symname, i_size); | |
1321 | } | |
1322 | if (retval) { | |
1323 | err = retval; | |
1324 | @@ -1705,6 +1854,9 @@ | |
1325 | ||
1326 | /* dquot_drop must be done outside a transaction */ | |
1327 | journal_end(th, th->t_super, th->t_blocks_allocated) ; | |
1328 | + DQUOT_FREE_INODE(inode); | |
1329 | + DQUOT_DROP(inode); | |
1330 | + inode->i_flags |= S_NOQUOTA; | |
1331 | make_bad_inode(inode); | |
1332 | ||
1333 | out_inserted_sd: | |
1334 | @@ -1816,6 +1968,7 @@ | |
1335 | unsigned length ; | |
1336 | struct page *page = NULL ; | |
1337 | int error ; | |
1338 | + int need_balance_dirty = 0 ; | |
1339 | struct buffer_head *bh = NULL ; | |
1340 | ||
1341 | if (p_s_inode->i_size > 0) { | |
1342 | @@ -1848,34 +2001,58 @@ | |
1343 | transaction of truncating gets committed - on reboot the file | |
1344 | either appears truncated properly or not truncated at all */ | |
1345 | add_save_link (&th, p_s_inode, 1); | |
1346 | + if (page) | |
1347 | + kmap(page); | |
1348 | reiserfs_do_truncate (&th, p_s_inode, page, update_timestamps) ; | |
1349 | pop_journal_writer(windex) ; | |
1350 | - journal_end(&th, p_s_inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 + 1 ) ; | |
1351 | - | |
1352 | - if (update_timestamps) | |
1353 | - remove_save_link (p_s_inode, 1/* truncate */); | |
1354 | ||
1355 | if (page) { | |
1356 | + if (!PageLocked(page)) | |
1357 | + BUG(); | |
1358 | length = offset & (blocksize - 1) ; | |
1359 | /* if we are not on a block boundary */ | |
1360 | if (length) { | |
1361 | length = blocksize - length ; | |
1362 | - memset((char *)kmap(page) + offset, 0, length) ; | |
1363 | + if ((offset + length) > PAGE_CACHE_SIZE) { | |
1364 | + BUG(); | |
1365 | + } | |
1366 | + memset((char *)page_address(page) + offset, 0, length) ; | |
1367 | flush_dcache_page(page) ; | |
1368 | - kunmap(page) ; | |
1369 | if (buffer_mapped(bh) && bh->b_blocknr != 0) { | |
1370 | - if (!atomic_set_buffer_dirty(bh)) { | |
1371 | + if (reiserfs_file_data_log(p_s_inode)) { | |
1372 | + reiserfs_prepare_for_journal(p_s_inode->i_sb, bh, 1) ; | |
1373 | + journal_mark_dirty(&th, p_s_inode->i_sb, bh) ; | |
1374 | + } else { | |
1375 | + /* it is safe to block here, but it would be faster | |
1376 | + ** to balance dirty after the journal lock is dropped | |
1377 | + */ | |
1378 | + if (!atomic_set_buffer_dirty(bh)) { | |
1379 | set_buffer_flushtime(bh); | |
1380 | refile_buffer(bh); | |
1381 | buffer_insert_inode_data_queue(bh, p_s_inode); | |
1382 | - balance_dirty(); | |
1383 | + need_balance_dirty = 1; | |
1384 | + | |
1385 | + if (reiserfs_data_ordered(p_s_inode->i_sb)) { | |
1386 | + add_to_flushlist(p_s_inode, bh) ; | |
1387 | + } | |
1388 | + } | |
1389 | } | |
1390 | } | |
1391 | } | |
1392 | + kunmap(page); | |
1393 | + } | |
1394 | + journal_end(&th, p_s_inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 + 1) ; | |
1395 | + | |
1396 | + if (update_timestamps) | |
1397 | + remove_save_link(p_s_inode, 1/* truncate */); | |
1398 | + | |
1399 | + if (page) { | |
1400 | UnlockPage(page) ; | |
1401 | page_cache_release(page) ; | |
1402 | } | |
1403 | - | |
1404 | + if (need_balance_dirty) { | |
1405 | + balance_dirty() ; | |
1406 | + } | |
1407 | return ; | |
1408 | } | |
1409 | ||
1410 | @@ -1944,6 +2121,8 @@ | |
1411 | goto research; | |
1412 | } | |
1413 | ||
1414 | + if (((B_I_PITEM(bh, ih) - bh->b_data) + pos_in_item + copy_size) > inode->i_sb->s_blocksize) | |
1415 | + BUG(); | |
1416 | memcpy( B_I_PITEM(bh, ih) + pos_in_item, p + bytes_copied, copy_size) ; | |
1417 | ||
1418 | journal_mark_dirty(&th, inode->i_sb, bh) ; | |
1419 | @@ -1971,9 +2150,37 @@ | |
1420 | ||
1421 | /* this is where we fill in holes in the file. */ | |
1422 | if (use_get_block) { | |
1423 | + int old_refcount = 0 ; | |
1424 | + struct reiserfs_transaction_handle *hole_th ; | |
1425 | + if (reiserfs_transaction_running(inode->i_sb)) { | |
1426 | + hole_th = current->journal_info ; | |
1427 | + old_refcount = hole_th->t_refcount ; | |
1428 | + } | |
1429 | retval = reiserfs_get_block(inode, block, bh_result, | |
1430 | GET_BLOCK_CREATE | GET_BLOCK_NO_ISEM) ; | |
1431 | if (!retval) { | |
1432 | + /* did reiserfs_get_block leave us a running transaction? */ | |
1433 | + if (reiserfs_transaction_running(inode->i_sb)) { | |
1434 | + hole_th = current->journal_info ; | |
1435 | + if (old_refcount < hole_th->t_refcount) { | |
1436 | + lock_kernel() ; | |
1437 | + /* we've filled a hole, make sure the new block | |
1438 | + * gets to disk before transaction commit | |
1439 | + */ | |
1440 | + if (buffer_mapped(bh_result) && bh_result->b_blocknr != 0 && | |
1441 | + reiserfs_data_ordered(inode->i_sb)) | |
1442 | + { | |
1443 | + __mark_buffer_dirty(bh_result) ; | |
1444 | + mark_buffer_uptodate(bh_result, 1); | |
1445 | + /* no need to update the inode trans, already done */ | |
1446 | + add_to_flushlist(inode, bh_result) ; | |
1447 | + } | |
1448 | + reiserfs_update_sd(hole_th, inode) ; | |
1449 | + journal_end(hole_th, hole_th->t_super, | |
1450 | + hole_th->t_blocks_allocated) ; | |
1451 | + unlock_kernel() ; | |
1452 | + } | |
1453 | + } | |
1454 | if (!buffer_mapped(bh_result) || bh_result->b_blocknr == 0) { | |
1455 | /* get_block failed to find a mapped unformatted node. */ | |
1456 | use_get_block = 0 ; | |
1457 | @@ -1988,33 +2195,41 @@ | |
1458 | /* helper func to get a buffer head ready for writepage to send to | |
1459 | ** ll_rw_block | |
1460 | */ | |
1461 | -static inline void submit_bh_for_writepage(struct buffer_head **bhp, int nr) { | |
1462 | +static void submit_bh_for_writepage(struct page *page, | |
1463 | + struct buffer_head **bhp, int nr) { | |
1464 | struct buffer_head *bh ; | |
1465 | int i; | |
1466 | ||
1467 | - /* lock them all first so the end_io handler doesn't unlock the page | |
1468 | - ** too early | |
1469 | + /* lock them all first so the end_io handler doesn't | |
1470 | + ** unlock too early | |
1471 | + ** | |
1472 | + ** There's just no safe way to log the buffers during writepage, | |
1473 | + ** we'll deadlock if kswapd tries to start a transaction. | |
1474 | + ** | |
1475 | + ** There's also no useful way to tie them to a specific transaction, | |
1476 | + ** so we just don't bother. | |
1477 | */ | |
1478 | for(i = 0 ; i < nr ; i++) { | |
1479 | - bh = bhp[i] ; | |
1480 | - lock_buffer(bh) ; | |
1481 | - set_buffer_async_io(bh) ; | |
1482 | + bh = bhp[i] ; | |
1483 | + lock_buffer(bh); | |
1484 | + set_buffer_async_io(bh); | |
1485 | + set_bit(BH_Uptodate, &bh->b_state) ; | |
1486 | } | |
1487 | for(i = 0 ; i < nr ; i++) { | |
1488 | + bh = bhp[i] ; | |
1489 | /* submit_bh doesn't care if the buffer is dirty, but nobody | |
1490 | ** later on in the call chain will be cleaning it. So, we | |
1491 | ** clean the buffer here, it still gets written either way. | |
1492 | */ | |
1493 | - bh = bhp[i] ; | |
1494 | clear_bit(BH_Dirty, &bh->b_state) ; | |
1495 | - set_bit(BH_Uptodate, &bh->b_state) ; | |
1496 | submit_bh(WRITE, bh) ; | |
1497 | } | |
1498 | } | |
1499 | ||
1500 | static int reiserfs_write_full_page(struct page *page) { | |
1501 | struct inode *inode = page->mapping->host ; | |
1502 | - unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT ; | |
1503 | + loff_t size = inode->i_size; | |
1504 | + unsigned long end_index = size >> PAGE_CACHE_SHIFT ; | |
1505 | unsigned last_offset = PAGE_CACHE_SIZE; | |
1506 | int error = 0; | |
1507 | unsigned long block ; | |
1508 | @@ -2024,21 +2239,36 @@ | |
1509 | struct buffer_head *arr[PAGE_CACHE_SIZE/512] ; | |
1510 | int nr = 0 ; | |
1511 | ||
1512 | + if (reiserfs_transaction_running(inode->i_sb)) { | |
1513 | + BUG(); | |
1514 | + } | |
1515 | + | |
1516 | + if (!PageLocked(page)) | |
1517 | + BUG(); | |
1518 | + | |
1519 | if (!page->buffers) { | |
1520 | block_prepare_write(page, 0, 0, NULL) ; | |
1521 | kunmap(page) ; | |
1522 | } | |
1523 | + | |
1524 | + if (reiserfs_transaction_running(inode->i_sb)) { | |
1525 | + BUG(); | |
1526 | + } | |
1527 | /* last page in the file, zero out any contents past the | |
1528 | ** last byte in the file | |
1529 | */ | |
1530 | if (page->index >= end_index) { | |
1531 | - last_offset = inode->i_size & (PAGE_CACHE_SIZE - 1) ; | |
1532 | + char *p ; | |
1533 | + last_offset = size & (PAGE_CACHE_SIZE - 1) ; | |
1534 | /* no file contents in this page */ | |
1535 | if (page->index >= end_index + 1 || !last_offset) { | |
1536 | error = -EIO ; | |
1537 | goto fail ; | |
1538 | } | |
1539 | - memset((char *)kmap(page)+last_offset, 0, PAGE_CACHE_SIZE-last_offset) ; | |
1540 | + p = kmap(page); | |
1541 | + if (last_offset > PAGE_CACHE_SIZE) | |
1542 | + BUG(); | |
1543 | + memset(p + last_offset, 0, PAGE_CACHE_SIZE-last_offset) ; | |
1544 | flush_dcache_page(page) ; | |
1545 | kunmap(page) ; | |
1546 | } | |
1547 | @@ -2079,7 +2309,7 @@ | |
1548 | ** nr == 0 without there being any kind of error. | |
1549 | */ | |
1550 | if (nr) { | |
1551 | - submit_bh_for_writepage(arr, nr) ; | |
1552 | + submit_bh_for_writepage(page, arr, nr) ; | |
1553 | wakeup_page_waiters(page); | |
1554 | } else { | |
1555 | UnlockPage(page) ; | |
1556 | @@ -2091,7 +2321,7 @@ | |
1557 | ||
1558 | fail: | |
1559 | if (nr) { | |
1560 | - submit_bh_for_writepage(arr, nr) ; | |
1561 | + submit_bh_for_writepage(page, arr, nr) ; | |
1562 | } else { | |
1563 | UnlockPage(page) ; | |
1564 | } | |
1565 | @@ -2116,10 +2346,46 @@ | |
1566 | ||
1567 | int reiserfs_prepare_write(struct file *f, struct page *page, | |
1568 | unsigned from, unsigned to) { | |
1569 | + int cur_refcount = 0 ; | |
1570 | + int ret ; | |
1571 | struct inode *inode = page->mapping->host ; | |
1572 | + struct reiserfs_transaction_handle *th ; | |
1573 | + | |
1574 | reiserfs_wait_on_write_block(inode->i_sb) ; | |
1575 | fix_tail_page_for_writing(page) ; | |
1576 | - return block_prepare_write(page, from, to, reiserfs_get_block) ; | |
1577 | + | |
1578 | + /* we look for a running transaction before the block_prepare_write | |
1579 | + ** call, and then again afterwards. This lets us know if | |
1580 | + ** reiserfs_get_block added any additional transactions, so we can | |
1581 | + ** let reiserfs_commit_write know if he needs to close them. | |
1582 | + ** this is just nasty | |
1583 | + */ | |
1584 | + if (reiserfs_transaction_running(inode->i_sb)) { | |
1585 | + th = current->journal_info ; | |
1586 | + cur_refcount = th->t_refcount ; | |
1587 | + } | |
1588 | + ret = block_prepare_write(page, from, to, reiserfs_get_block) ; | |
1589 | + | |
1590 | + /* it is very important that we only set the dangling bit when | |
1591 | + ** there is no chance of additional nested transactions. | |
1592 | + */ | |
1593 | + if (reiserfs_transaction_running(inode->i_sb)) { | |
1594 | + th = current->journal_info ; | |
1595 | + if (th->t_refcount > cur_refcount) { | |
1596 | + /* if we return an error, commit_write isn't going to get called | |
1597 | + * we need to make sure we end any transactions | |
1598 | + * reiserfs_get_block left hanging around | |
1599 | + */ | |
1600 | + if (ret) { | |
1601 | + lock_kernel(); | |
1602 | + journal_end(th, th->t_super, th->t_blocks_allocated) ; | |
1603 | + unlock_kernel(); | |
1604 | + } else { | |
1605 | + reiserfs_set_handle_dangling(th) ; | |
1606 | + } | |
1607 | + } | |
1608 | + } | |
1609 | + return ret ; | |
1610 | } | |
1611 | ||
1612 | ||
1613 | @@ -2127,20 +2393,96 @@ | |
1614 | return generic_block_bmap(as, block, reiserfs_bmap) ; | |
1615 | } | |
1616 | ||
1617 | +/* taken from fs/buffer.c */ | |
1618 | +static int __commit_write(struct reiserfs_transaction_handle *th, | |
1619 | + struct inode *inode, struct page *page, | |
1620 | + unsigned from, unsigned to, int *balance) | |
1621 | +{ | |
1622 | + unsigned block_start, block_end; | |
1623 | + int partial = 0; | |
1624 | + unsigned blocksize; | |
1625 | + struct buffer_head *bh, *head; | |
1626 | + int logbh = 0 ; | |
1627 | + | |
1628 | + blocksize = 1 << inode->i_blkbits; | |
1629 | + if (reiserfs_file_data_log(inode)) { | |
1630 | + logbh = 1 ; | |
1631 | + lock_kernel() ; | |
1632 | + /* one for each block + the stat data, the caller closes the handle */ | |
1633 | + journal_begin(th, inode->i_sb,(PAGE_CACHE_SIZE >> inode->i_blkbits)+1); | |
1634 | + reiserfs_update_inode_transaction(inode) ; | |
1635 | + unlock_kernel() ; | |
1636 | + } | |
1637 | + | |
1638 | + for(bh = head = page->buffers, block_start = 0; | |
1639 | + bh != head || !block_start; | |
1640 | + block_start=block_end, bh = bh->b_this_page) { | |
1641 | + block_end = block_start + blocksize; | |
1642 | + if (block_end <= from || block_start >= to) { | |
1643 | + if (!buffer_uptodate(bh)) | |
1644 | + partial = 1; | |
1645 | + } else { | |
1646 | + set_bit(BH_Uptodate, &bh->b_state); | |
1647 | + if (logbh) { | |
1648 | + lock_kernel() ; | |
1649 | + reiserfs_prepare_for_journal(inode->i_sb, bh, 1) ; | |
1650 | + journal_mark_dirty (th, inode->i_sb, bh); | |
1651 | + unlock_kernel() ; | |
1652 | + } else if (!atomic_set_buffer_dirty(bh)) { | |
1653 | + __mark_dirty(bh); | |
1654 | + if (reiserfs_data_ordered(inode->i_sb)) { | |
1655 | + lock_kernel(); | |
1656 | + add_to_flushlist(inode, bh); | |
1657 | + /* if we don't update the inode trans information, | |
1658 | + * an fsync(fd) might not catch these data blocks | |
1659 | + */ | |
1660 | + reiserfs_update_inode_transaction(inode); | |
1661 | + unlock_kernel(); | |
1662 | + } else { | |
1663 | + buffer_insert_inode_data_queue(bh, inode); | |
1664 | + } | |
1665 | + *balance = 1; | |
1666 | + } | |
1667 | + } | |
1668 | + } | |
1669 | + | |
1670 | + /* | |
1671 | + * is this a partial write that happened to make all buffers | |
1672 | + * uptodate then we can optimize away a bogus readpage() for | |
1673 | + * the next read(). Here we 'discover' wether the page went | |
1674 | + * uptodate as a result of this (potentially partial) write. | |
1675 | + */ | |
1676 | + if (!partial) | |
1677 | + SetPageUptodate(page); | |
1678 | + return 0; | |
1679 | +} | |
1680 | + | |
1681 | static int reiserfs_commit_write(struct file *f, struct page *page, | |
1682 | unsigned from, unsigned to) { | |
1683 | struct inode *inode = page->mapping->host ; | |
1684 | loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; | |
1685 | int ret ; | |
1686 | - | |
1687 | + int need_balance = 0; | |
1688 | + struct reiserfs_transaction_handle th ; | |
1689 | + struct reiserfs_transaction_handle *dth = NULL ; | |
1690 | + | |
1691 | + /* we must do this before anything that might nest a transaction or | |
1692 | + ** mess with the handle flags | |
1693 | + */ | |
1694 | + if (reiserfs_transaction_running(inode->i_sb)) { | |
1695 | + dth = current->journal_info ; | |
1696 | + if (reiserfs_dangling_handle(dth)) { | |
1697 | + reiserfs_clear_handle_dangling(dth) ; | |
1698 | + } else { | |
1699 | + dth = NULL ; | |
1700 | + } | |
1701 | + } | |
1702 | reiserfs_wait_on_write_block(inode->i_sb) ; | |
1703 | + | |
1704 | + th.t_flags = 0 ; | |
1705 | + ret = __commit_write(&th, inode, page, from, to, &need_balance) ; | |
1706 | ||
1707 | - /* generic_commit_write does this for us, but does not update the | |
1708 | - ** transaction tracking stuff when the size changes. So, we have | |
1709 | - ** to do the i_size updates here. | |
1710 | - */ | |
1711 | if (pos > inode->i_size) { | |
1712 | - struct reiserfs_transaction_handle th ; | |
1713 | lock_kernel(); | |
1714 | /* If the file have grown beyond the border where it | |
1715 | can have a tail, unmark it as needing a tail | |
1716 | @@ -2149,24 +2491,135 @@ | |
1717 | (have_small_tails (inode->i_sb) && inode->i_size > block_size(inode)) ) | |
1718 | inode->u.reiserfs_i.i_flags &= ~i_pack_on_close_mask; | |
1719 | ||
1720 | - journal_begin(&th, inode->i_sb, 1) ; | |
1721 | + if (!reiserfs_active_handle(&th)) { | |
1722 | + journal_begin(&th, inode->i_sb, 1) ; | |
1723 | + } | |
1724 | reiserfs_update_inode_transaction(inode) ; | |
1725 | inode->i_size = pos ; | |
1726 | reiserfs_update_sd(&th, inode) ; | |
1727 | - journal_end(&th, inode->i_sb, 1) ; | |
1728 | - unlock_kernel(); | |
1729 | + journal_end(&th, th.t_super, th.t_blocks_allocated) ; | |
1730 | + unlock_kernel() ; | |
1731 | + } else if (reiserfs_active_handle(&th)) { | |
1732 | + /* in case commit_write left one running and the i_size update did | |
1733 | + ** not close it | |
1734 | + */ | |
1735 | + lock_kernel() ; | |
1736 | + journal_end(&th, th.t_super, th.t_blocks_allocated) ; | |
1737 | + unlock_kernel() ; | |
1738 | } | |
1739 | - | |
1740 | - ret = generic_commit_write(f, page, from, to) ; | |
1741 | ||
1742 | - /* we test for O_SYNC here so we can commit the transaction | |
1743 | - ** for any packed tails the file might have had | |
1744 | + /* did reiserfs_get_block leave us with a running transaction? | |
1745 | */ | |
1746 | - if (f && (f->f_flags & O_SYNC)) { | |
1747 | + if (dth) { | |
1748 | lock_kernel() ; | |
1749 | - reiserfs_commit_for_inode(inode) ; | |
1750 | + journal_end(dth, dth->t_super, dth->t_blocks_allocated) ; | |
1751 | unlock_kernel(); | |
1752 | } | |
1753 | + | |
1754 | + kunmap(page) ; | |
1755 | + | |
1756 | + if (need_balance) | |
1757 | + balance_dirty(); | |
1758 | + | |
1759 | + return ret ; | |
1760 | +} | |
1761 | + | |
1762 | +/* decide if this buffer needs to stay around for data logging or ordered | |
1763 | +** write purposes | |
1764 | +*/ | |
1765 | +static int flushpage_can_drop(struct inode *inode, struct buffer_head *bh) { | |
1766 | + int ret = 1 ; | |
1767 | + | |
1768 | + if (!buffer_mapped(bh)) { | |
1769 | + return 1 ; | |
1770 | + } | |
1771 | + if (reiserfs_file_data_log(inode)) { | |
1772 | + lock_kernel() ; | |
1773 | + /* very conservative, leave the buffer pinned if anyone might need it. | |
1774 | + ** this should be changed to drop the buffer if it is only in the | |
1775 | + ** current transaction | |
1776 | + */ | |
1777 | + if (buffer_journaled(bh) || buffer_journal_dirty(bh)) { | |
1778 | + ret = 0 ; | |
1779 | + } | |
1780 | + unlock_kernel() ; | |
1781 | + } | |
1782 | + if (reiserfs_data_ordered(inode->i_sb)) { | |
1783 | + if (buffer_dirty(bh) && bh->b_journal_head) { | |
1784 | + struct reiserfs_journal_list *jl = NULL; | |
1785 | + lock_kernel(); | |
1786 | + | |
1787 | + /* we can race against fsync_inode_buffers if we aren't careful */ | |
1788 | + if (buffer_attached(bh) && buffer_dirty(bh)) | |
1789 | + jl = bh->b_journal_head; | |
1790 | + | |
1791 | + /* why is this safe? | |
1792 | + * reiserfs_setattr updates i_size in the on disk | |
1793 | + * stat data before allowing vmtruncate to be called. | |
1794 | + * | |
1795 | + * If buffer was put onto the ordered list for this | |
1796 | + * transaction, we know for sure either this transaction | |
1797 | + * or an older one already has updated i_size on disk, | |
1798 | + * and this ordered data won't be referenced in the file | |
1799 | + * if we crash. | |
1800 | + * | |
1801 | + * if the buffer was put onto the ordered list for an older | |
1802 | + * transaction, we need to leave it around | |
1803 | + */ | |
1804 | + if (jl != SB_JOURNAL(inode->i_sb)->j_current_jl) { | |
1805 | + ret = 0; | |
1806 | + } | |
1807 | + unlock_kernel(); | |
1808 | + } | |
1809 | + } | |
1810 | + return ret ; | |
1811 | +} | |
1812 | + | |
1813 | +/* stolen from fs/buffer.c:discard_bh_page */ | |
1814 | +static int reiserfs_flushpage(struct page *page, unsigned long offset) { | |
1815 | + struct buffer_head *head, *bh, *next; | |
1816 | + struct inode *inode = page->mapping->host ; | |
1817 | + unsigned int curr_off = 0; | |
1818 | + int ret = 1; | |
1819 | + | |
1820 | + if (!PageLocked(page)) | |
1821 | + BUG(); | |
1822 | + if (!page->buffers) | |
1823 | + return 1; | |
1824 | + | |
1825 | + head = page->buffers; | |
1826 | + bh = head; | |
1827 | + do { | |
1828 | + unsigned int next_off = curr_off + bh->b_size; | |
1829 | + next = bh->b_this_page; | |
1830 | + | |
1831 | + /* is this buffer to be completely truncated away? */ | |
1832 | + if (offset <= curr_off) { | |
1833 | + if (flushpage_can_drop(inode, bh)) | |
1834 | + discard_buffer(bh); | |
1835 | + else | |
1836 | + ret = 0 ; | |
1837 | + } | |
1838 | + curr_off = next_off; | |
1839 | + bh = next; | |
1840 | + } while (bh != head); | |
1841 | + | |
1842 | + /* | |
1843 | + * subtle. We release buffer-heads only if this is | |
1844 | + * the 'final' flushpage. We have invalidated the get_block | |
1845 | + * cached value unconditionally, so real IO is not | |
1846 | + * possible anymore. | |
1847 | + * | |
1848 | + * If the free doesn't work out, the buffers can be | |
1849 | + * left around - they just turn into anonymous buffers | |
1850 | + * instead. | |
1851 | + */ | |
1852 | + if (!offset) { | |
1853 | + if (!ret || !try_to_free_buffers(page, 0)) | |
1854 | + return 0; | |
1855 | + if (page->buffers) | |
1856 | + BUG(); | |
1857 | + } | |
1858 | return ret ; | |
1859 | } | |
1860 | ||
1861 | @@ -2222,6 +2675,9 @@ | |
1862 | struct kiobuf *iobuf, unsigned long blocknr, | |
1863 | int blocksize) | |
1864 | { | |
1865 | + if (reiserfs_data_ordered(inode->i_sb) || reiserfs_file_data_log(inode)) { | |
1866 | + return -EINVAL; | |
1867 | + } | |
1868 | lock_kernel(); | |
1869 | reiserfs_commit_for_tail(inode); | |
1870 | unlock_kernel(); | |
1871 | @@ -2237,4 +2693,5 @@ | |
1872 | commit_write: reiserfs_commit_write, | |
1873 | bmap: reiserfs_aop_bmap, | |
1874 | direct_IO: reiserfs_direct_io, | |
1875 | + flushpage: reiserfs_flushpage, | |
1876 | } ; | |
052932c9 AM |
1877 | diff -urN linux-2.4.22.org/fs/reiserfs/ioctl.c linux-2.4.22/fs/reiserfs/ioctl.c |
1878 | --- linux-2.4.22.org/fs/reiserfs/ioctl.c 2003-11-21 15:08:29.000000000 +0100 | |
1879 | +++ linux-2.4.22/fs/reiserfs/ioctl.c 2003-11-21 15:14:23.000000000 +0100 | |
e57e653a JR |
1880 | @@ -25,12 +25,21 @@ |
1881 | switch (cmd) { | |
1882 | case REISERFS_IOC_UNPACK: | |
1883 | if( S_ISREG( inode -> i_mode ) ) { | |
1884 | - if (arg) | |
1885 | - return reiserfs_unpack (inode, filp); | |
1886 | - else | |
1887 | - return 0; | |
1888 | + if (arg) { | |
1889 | + int result; | |
1890 | + result = reiserfs_unpack (inode, filp); | |
1891 | + if (reiserfs_file_data_log(inode)) { | |
1892 | + struct reiserfs_transaction_handle th; | |
1893 | + lock_kernel(); | |
1894 | + journal_begin(&th, inode->i_sb, 1); | |
1895 | + SB_JOURNAL(inode->i_sb)->j_must_wait = 1; | |
1896 | + journal_end_sync(&th, inode->i_sb, 1); | |
1897 | + unlock_kernel(); | |
1898 | + } | |
1899 | + } else | |
1900 | + return 0; | |
1901 | } else | |
1902 | - return -ENOTTY; | |
1903 | + return -ENOTTY; | |
1904 | /* | |
1905 | * Following {G,S}ETFLAGS, and {G,S}ETVERSION are providing ext2 | |
1906 | * binary compatible interface (used by lsattr(1), and chattr(1)) and | |
1907 | @@ -97,6 +106,7 @@ | |
1908 | int retval = 0; | |
1909 | int index ; | |
1910 | struct page *page ; | |
1911 | + struct address_space *mapping ; | |
1912 | unsigned long write_from ; | |
1913 | unsigned long blocksize = inode->i_sb->s_blocksize ; | |
1914 | ||
1915 | @@ -127,19 +137,20 @@ | |
1916 | ** reiserfs_get_block to unpack the tail for us. | |
1917 | */ | |
1918 | index = inode->i_size >> PAGE_CACHE_SHIFT ; | |
1919 | - page = grab_cache_page(inode->i_mapping, index) ; | |
1920 | + mapping = inode->i_mapping ; | |
1921 | + page = grab_cache_page(mapping, index) ; | |
1922 | retval = -ENOMEM; | |
1923 | if (!page) { | |
1924 | goto out ; | |
1925 | } | |
1926 | - retval = reiserfs_prepare_write(NULL, page, write_from, blocksize) ; | |
1927 | + retval = mapping->a_ops->prepare_write(NULL, page, write_from, write_from) ; | |
1928 | if (retval) | |
1929 | goto out_unlock ; | |
1930 | ||
1931 | /* conversion can change page contents, must flush */ | |
1932 | flush_dcache_page(page) ; | |
1933 | inode->u.reiserfs_i.i_flags |= i_nopack_mask; | |
1934 | - kunmap(page) ; /* mapped by prepare_write */ | |
1935 | + retval = mapping->a_ops->commit_write(NULL, page, write_from, write_from) ; | |
1936 | ||
1937 | out_unlock: | |
1938 | UnlockPage(page) ; | |
052932c9 AM |
1939 | diff -urN linux-2.4.22.org/fs/reiserfs/journal.c linux-2.4.22/fs/reiserfs/journal.c |
1940 | --- linux-2.4.22.org/fs/reiserfs/journal.c 2003-11-21 15:08:29.000000000 +0100 | |
1941 | +++ linux-2.4.22/fs/reiserfs/journal.c 2003-11-21 15:14:23.000000000 +0100 | |
e57e653a JR |
1942 | @@ -33,17 +33,17 @@ |
1943 | ** -- Note, if you call this as an immediate flush from | |
1944 | ** from within kupdate, it will ignore the immediate flag | |
1945 | ** | |
1946 | -** The commit thread -- a writer process for async commits. It allows a | |
1947 | -** a process to request a log flush on a task queue. | |
1948 | -** the commit will happen once the commit thread wakes up. | |
1949 | -** The benefit here is the writer (with whatever | |
1950 | -** related locks it has) doesn't have to wait for the | |
1951 | -** log blocks to hit disk if it doesn't want to. | |
1952 | +** The commit thread -- a writer process for metadata and async commits. | |
1953 | +** this allows us to do less io with the journal lock | |
1954 | +** held. | |
1955 | */ | |
1956 | ||
1957 | +#define EXPORT_SYMTAB | |
1958 | +#include <linux/module.h> | |
1959 | #include <linux/config.h> | |
1960 | #include <asm/uaccess.h> | |
1961 | #include <asm/system.h> | |
1962 | +#include <linux/init.h> | |
1963 | ||
1964 | #include <linux/sched.h> | |
1965 | #include <asm/semaphore.h> | |
1966 | @@ -59,17 +59,25 @@ | |
1967 | #include <linux/string.h> | |
1968 | #include <linux/smp_lock.h> | |
1969 | ||
1970 | +/* gets a struct reiserfs_journal_list * from a list head */ | |
1971 | +#define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \ | |
1972 | + j_list)) | |
1973 | +#define JOURNAL_WORK_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \ | |
1974 | + j_working_list)) | |
1975 | + | |
1976 | /* the number of mounted filesystems. This is used to decide when to | |
1977 | ** start and kill the commit thread | |
1978 | */ | |
1979 | static int reiserfs_mounted_fs_count = 0 ; | |
1980 | ||
1981 | -/* wake this up when you add something to the commit thread task queue */ | |
1982 | +static struct list_head kreiserfsd_supers = LIST_HEAD_INIT(kreiserfsd_supers); | |
1983 | + | |
1984 | +/* wake this up when you want help from the commit thread */ | |
1985 | DECLARE_WAIT_QUEUE_HEAD(reiserfs_commit_thread_wait) ; | |
1986 | ||
1987 | -/* wait on this if you need to be sure you task queue entries have been run */ | |
1988 | +/* so we can wait for the commit thread to make progress */ | |
1989 | static DECLARE_WAIT_QUEUE_HEAD(reiserfs_commit_thread_done) ; | |
1990 | -DECLARE_TASK_QUEUE(reiserfs_commit_thread_tq) ; | |
1991 | +DECLARE_MUTEX(kreiserfsd_sem) ; | |
1992 | ||
1993 | #define JOURNAL_TRANS_HALF 1018 /* must be correct to keep the desc and commit | |
1994 | structs at 4k */ | |
1995 | @@ -82,6 +90,9 @@ | |
1996 | ||
1997 | #define BLOCK_NEEDS_FLUSH 4 /* used in flush_journal_list */ | |
1998 | ||
1999 | +/* journal list state bits */ | |
2000 | +#define LIST_TOUCHED 1 | |
2001 | + | |
2002 | /* flags for do_journal_end */ | |
2003 | #define FLUSH_ALL 1 /* flush commit and real blocks */ | |
2004 | #define COMMIT_NOW 2 /* end and commit this transaction */ | |
2005 | @@ -89,6 +100,9 @@ | |
2006 | ||
2007 | /* state bits for the journal */ | |
2008 | #define WRITERS_BLOCKED 1 /* set when new writers not allowed */ | |
2009 | +#define WRITERS_QUEUED 2 /* set when log is full due to too many | |
2010 | + * writers | |
2011 | + */ | |
2012 | ||
2013 | static int do_journal_end(struct reiserfs_transaction_handle *,struct super_block *,unsigned long nblocks,int flags) ; | |
2014 | static int flush_journal_list(struct super_block *s, struct reiserfs_journal_list *jl, int flushall) ; | |
2015 | @@ -107,7 +121,7 @@ | |
2016 | ** make schedule happen after I've freed a block. Look at remove_from_transaction and journal_mark_freed for | |
2017 | ** more details. | |
2018 | */ | |
2019 | -static int reiserfs_clean_and_file_buffer(struct buffer_head *bh) { | |
2020 | +static inline int reiserfs_clean_and_file_buffer(struct buffer_head *bh) { | |
2021 | if (bh) { | |
2022 | clear_bit(BH_Dirty, &bh->b_state) ; | |
2023 | refile_buffer(bh) ; | |
2024 | @@ -473,6 +487,8 @@ | |
2025 | int pop_journal_writer(int index) { | |
2026 | #ifdef CONFIG_REISERFS_CHECK | |
2027 | if (index >= 0) { | |
2028 | + if (index >= 512) | |
2029 | + BUG(); | |
2030 | journal_writers[index] = NULL ; | |
2031 | } | |
2032 | #endif | |
2033 | @@ -522,6 +538,12 @@ | |
2034 | return 0 ; | |
2035 | } | |
2036 | ||
2037 | + /* when data logging is on, no special action is needed for the data | |
2038 | + * blocks | |
2039 | + */ | |
2040 | + if (reiserfs_data_log(p_s_sb)) | |
2041 | + search_all = 0; | |
2042 | + | |
2043 | PROC_INFO_INC( p_s_sb, journal.in_journal ); | |
2044 | /* If we aren't doing a search_all, this is a metablock, and it will be logged before use. | |
2045 | ** if we crash before the transaction that freed it commits, this transaction won't | |
2046 | @@ -549,6 +571,7 @@ | |
2047 | ||
2048 | /* is it in the current transaction. This should never happen */ | |
2049 | if ((cn = get_journal_hash_dev(SB_JOURNAL(p_s_sb)->j_hash_table, dev,bl,size))) { | |
2050 | + BUG(); | |
2051 | return 1; | |
2052 | } | |
2053 | ||
2054 | @@ -574,17 +597,12 @@ | |
2055 | /* lock the current transaction */ | |
2056 | inline static void lock_journal(struct super_block *p_s_sb) { | |
2057 | PROC_INFO_INC( p_s_sb, journal.lock_journal ); | |
2058 | - while(atomic_read(&(SB_JOURNAL(p_s_sb)->j_wlock)) > 0) { | |
2059 | - PROC_INFO_INC( p_s_sb, journal.lock_journal_wait ); | |
2060 | - sleep_on(&(SB_JOURNAL(p_s_sb)->j_wait)) ; | |
2061 | - } | |
2062 | - atomic_set(&(SB_JOURNAL(p_s_sb)->j_wlock), 1) ; | |
2063 | + down(&SB_JOURNAL(p_s_sb)->j_lock); | |
2064 | } | |
2065 | ||
2066 | /* unlock the current transaction */ | |
2067 | inline static void unlock_journal(struct super_block *p_s_sb) { | |
2068 | - atomic_dec(&(SB_JOURNAL(p_s_sb)->j_wlock)) ; | |
2069 | - wake_up(&(SB_JOURNAL(p_s_sb)->j_wait)) ; | |
2070 | + up(&SB_JOURNAL(p_s_sb)->j_lock); | |
2071 | } | |
2072 | ||
2073 | /* | |
2074 | @@ -602,6 +620,83 @@ | |
2075 | jl->j_list_bitmap = NULL ; | |
2076 | } | |
2077 | ||
2078 | +static int journal_list_still_alive(struct super_block *s, | |
2079 | + unsigned long trans_id) | |
2080 | +{ | |
2081 | + struct list_head *entry = &SB_JOURNAL(s)->j_journal_list; | |
2082 | + struct reiserfs_journal_list *jl; | |
2083 | + | |
2084 | + if (!list_empty(entry)) { | |
2085 | + jl = JOURNAL_LIST_ENTRY(entry->next); | |
2086 | + if (jl->j_trans_id <= trans_id) { | |
2087 | + return 1; | |
2088 | + } | |
2089 | + } | |
2090 | + return 0; | |
2091 | +} | |
2092 | + | |
2093 | +static int flush_older_commits(struct super_block *s, struct reiserfs_journal_list *jl) { | |
2094 | + struct reiserfs_journal_list *other_jl; | |
2095 | + struct reiserfs_journal_list *first_jl; | |
2096 | + struct list_head *entry; | |
2097 | + unsigned long trans_id = jl->j_trans_id; | |
2098 | + unsigned long other_trans_id; | |
2099 | + unsigned long first_trans_id; | |
2100 | + | |
2101 | +find_first: | |
2102 | + /* | |
2103 | + * first we walk backwards to find the oldest uncommitted transation | |
2104 | + */ | |
2105 | + first_jl = jl; | |
2106 | + entry = jl->j_list.prev; | |
2107 | + while(1) { | |
2108 | + other_jl = JOURNAL_LIST_ENTRY(entry); | |
2109 | + if (entry == &SB_JOURNAL(s)->j_journal_list || | |
2110 | + atomic_read(&other_jl->j_older_commits_done)) | |
2111 | + break; | |
2112 | + | |
2113 | + first_jl = other_jl; | |
2114 | + entry = other_jl->j_list.prev; | |
2115 | + } | |
2116 | + | |
2117 | + /* if we didn't find any older uncommitted transactions, return now */ | |
2118 | + if (first_jl == jl) { | |
2119 | + return 0; | |
2120 | + } | |
2121 | + | |
2122 | + first_trans_id = first_jl->j_trans_id; | |
2123 | + | |
2124 | + entry = &first_jl->j_list; | |
2125 | + while(1) { | |
2126 | + other_jl = JOURNAL_LIST_ENTRY(entry); | |
2127 | + other_trans_id = other_jl->j_trans_id; | |
2128 | + | |
2129 | + if (other_trans_id < trans_id) { | |
2130 | + if (atomic_read(&other_jl->j_commit_left) != 0) { | |
2131 | + flush_commit_list(s, other_jl, 0); | |
2132 | + | |
2133 | + /* list we were called with is gone, return */ | |
2134 | + if (!journal_list_still_alive(s, trans_id)) | |
2135 | + return 1; | |
2136 | + | |
2137 | + /* the one we just flushed is gone, this means all | |
2138 | + * older lists are also gone, so first_jl is no longer | |
2139 | + * valid either. Go back to the beginning. | |
2140 | + */ | |
2141 | + if (!journal_list_still_alive(s, other_trans_id)) { | |
2142 | + goto find_first; | |
2143 | + } | |
2144 | + } | |
2145 | + entry = entry->next; | |
2146 | + if (entry == &SB_JOURNAL(s)->j_journal_list) | |
2147 | + return 0; | |
2148 | + } else { | |
2149 | + return 0; | |
2150 | + } | |
2151 | + } | |
2152 | + return 0; | |
2153 | +} | |
2154 | + | |
2155 | /* | |
2156 | ** if this journal list still has commit blocks unflushed, send them to disk. | |
2157 | ** | |
2158 | @@ -611,16 +706,19 @@ | |
2159 | */ | |
2160 | static int flush_commit_list(struct super_block *s, struct reiserfs_journal_list *jl, int flushall) { | |
2161 | int i, count ; | |
2162 | - int index = 0 ; | |
2163 | int bn ; | |
2164 | int retry_count = 0 ; | |
2165 | int orig_commit_left = 0 ; | |
2166 | struct buffer_head *tbh = NULL ; | |
2167 | - struct reiserfs_journal_list *other_jl ; | |
2168 | + unsigned long trans_id = jl->j_trans_id; | |
2169 | ||
2170 | reiserfs_check_lock_depth("flush_commit_list") ; | |
2171 | ||
2172 | if (atomic_read(&jl->j_older_commits_done)) { | |
2173 | + if (!list_empty(&jl->j_ordered_bh_list)) | |
2174 | + BUG(); | |
2175 | + if (!list_empty(&jl->j_tail_bh_list)) | |
2176 | + BUG(); | |
2177 | return 0 ; | |
2178 | } | |
2179 | ||
2180 | @@ -628,50 +726,51 @@ | |
2181 | ** us is on disk too | |
2182 | */ | |
2183 | if (jl->j_len <= 0) { | |
2184 | + BUG(); | |
2185 | return 0 ; | |
2186 | } | |
2187 | + if (trans_id == SB_JOURNAL(s)->j_trans_id) | |
2188 | + BUG(); | |
2189 | + | |
2190 | if (flushall) { | |
2191 | - /* we _must_ make sure the transactions are committed in order. Start with the | |
2192 | - ** index after this one, wrap all the way around | |
2193 | - */ | |
2194 | - index = (jl - SB_JOURNAL_LIST(s)) + 1 ; | |
2195 | - for (i = 0 ; i < JOURNAL_LIST_COUNT ; i++) { | |
2196 | - other_jl = SB_JOURNAL_LIST(s) + ( (index + i) % JOURNAL_LIST_COUNT) ; | |
2197 | - if (other_jl && other_jl != jl && other_jl->j_len > 0 && other_jl->j_trans_id > 0 && | |
2198 | - other_jl->j_trans_id <= jl->j_trans_id && (atomic_read(&(jl->j_older_commits_done)) == 0)) { | |
2199 | - flush_commit_list(s, other_jl, 0) ; | |
2200 | - } | |
2201 | + if (flush_older_commits(s, jl) == 1) { | |
2202 | + /* list disappeared during flush_older_commits. return */ | |
2203 | + return 0; | |
2204 | } | |
2205 | } | |
2206 | ||
2207 | count = 0 ; | |
2208 | - /* don't flush the commit list for the current transactoin */ | |
2209 | - if (jl == ((SB_JOURNAL_LIST(s) + SB_JOURNAL_LIST_INDEX(s)))) { | |
2210 | - return 0 ; | |
2211 | - } | |
2212 | ||
2213 | /* make sure nobody is trying to flush this one at the same time */ | |
2214 | - if (atomic_read(&(jl->j_commit_flushing))) { | |
2215 | - sleep_on(&(jl->j_commit_wait)) ; | |
2216 | - if (flushall) { | |
2217 | - atomic_set(&(jl->j_older_commits_done), 1) ; | |
2218 | - } | |
2219 | - return 0 ; | |
2220 | + down(&jl->j_commit_lock); | |
2221 | + if (!journal_list_still_alive(s, trans_id)) { | |
2222 | + up(&jl->j_commit_lock); | |
2223 | + return 0; | |
2224 | } | |
2225 | + if (jl->j_trans_id == 0) | |
2226 | + BUG(); | |
2227 | ||
2228 | /* this commit is done, exit */ | |
2229 | if (atomic_read(&(jl->j_commit_left)) <= 0) { | |
2230 | if (flushall) { | |
2231 | atomic_set(&(jl->j_older_commits_done), 1) ; | |
2232 | } | |
2233 | + if (!list_empty(&jl->j_ordered_bh_list)) | |
2234 | + BUG(); | |
2235 | + if (!list_empty(&jl->j_tail_bh_list)) | |
2236 | + BUG(); | |
2237 | + up(&jl->j_commit_lock); | |
2238 | return 0 ; | |
2239 | } | |
2240 | - /* keeps others from flushing while we are flushing */ | |
2241 | - atomic_set(&(jl->j_commit_flushing), 1) ; | |
2242 | - | |
2243 | ||
2244 | + /* write any buffers that must hit disk before the commit is done */ | |
2245 | + while(!list_empty(&jl->j_ordered_bh_list)) { | |
2246 | + unlock_kernel(); | |
2247 | + fsync_buffers_list(&jl->j_ordered_bh_list); | |
2248 | + lock_kernel(); | |
2249 | + } | |
2250 | if (jl->j_len > SB_JOURNAL_TRANS_MAX(s)) { | |
2251 | - reiserfs_panic(s, "journal-512: flush_commit_list: length is %lu, list number %d\n", jl->j_len, jl - SB_JOURNAL_LIST(s)) ; | |
2252 | + reiserfs_panic(s, "journal-512: flush_commit_list: length is %lu, trans_id %lu\n", jl->j_len, jl->j_trans_id) ; | |
2253 | return 0 ; | |
2254 | } | |
2255 | ||
2256 | @@ -701,7 +800,7 @@ | |
2257 | if (buffer_dirty(tbh)) { | |
2258 | reiserfs_warning(s, "journal-569: flush_commit_list, block already dirty!\n") ; | |
2259 | } else { | |
2260 | - mark_buffer_dirty(tbh) ; | |
2261 | + atomic_set_buffer_dirty(tbh); | |
2262 | } | |
2263 | ll_rw_block(WRITE, 1, &tbh) ; | |
2264 | count++ ; | |
2265 | @@ -745,16 +844,22 @@ | |
2266 | atomic_dec(&(jl->j_commit_left)) ; | |
2267 | bforget(jl->j_commit_bh) ; | |
2268 | ||
2269 | + if (SB_JOURNAL(s)->j_last_commit_id != 0 && | |
2270 | + (jl->j_trans_id - SB_JOURNAL(s)->j_last_commit_id) != 1) { | |
2271 | + reiserfs_warning(s, "clm-2200: dev %s, last commit %lu, current %lu\n", | |
2272 | + kdevname(s->s_dev), SB_JOURNAL(s)->j_last_commit_id, | |
2273 | + SB_JOURNAL(s)->j_last_commit_id); | |
2274 | + } | |
2275 | + SB_JOURNAL(s)->j_last_commit_id = jl->j_trans_id; | |
2276 | + | |
2277 | /* now, every commit block is on the disk. It is safe to allow blocks freed during this transaction to be reallocated */ | |
2278 | cleanup_freed_for_journal_list(s, jl) ; | |
2279 | ||
2280 | if (flushall) { | |
2281 | atomic_set(&(jl->j_older_commits_done), 1) ; | |
2282 | } | |
2283 | - atomic_set(&(jl->j_commit_flushing), 0) ; | |
2284 | - wake_up(&(jl->j_commit_wait)) ; | |
2285 | + up(&jl->j_commit_lock); | |
2286 | ||
2287 | - s->s_dirt = 1 ; | |
2288 | return 0 ; | |
2289 | } | |
2290 | ||
2291 | @@ -853,22 +958,27 @@ | |
2292 | ** flush any and all journal lists older than you are | |
2293 | ** can only be called from flush_journal_list | |
2294 | */ | |
2295 | -static int flush_older_journal_lists(struct super_block *p_s_sb, struct reiserfs_journal_list *jl, unsigned long trans_id) { | |
2296 | - int i, index ; | |
2297 | - struct reiserfs_journal_list *other_jl ; | |
2298 | - | |
2299 | - index = jl - SB_JOURNAL_LIST(p_s_sb) ; | |
2300 | - for (i = 0 ; i < JOURNAL_LIST_COUNT ; i++) { | |
2301 | - other_jl = SB_JOURNAL_LIST(p_s_sb) + ((index + i) % JOURNAL_LIST_COUNT) ; | |
2302 | - if (other_jl && other_jl->j_len > 0 && | |
2303 | - other_jl->j_trans_id > 0 && | |
2304 | - other_jl->j_trans_id < trans_id && | |
2305 | - other_jl != jl) { | |
2306 | - /* do not flush all */ | |
2307 | - flush_journal_list(p_s_sb, other_jl, 0) ; | |
2308 | +static int flush_older_journal_lists(struct super_block *p_s_sb, | |
2309 | + struct reiserfs_journal_list *jl) | |
2310 | +{ | |
2311 | + struct list_head *entry; | |
2312 | + struct reiserfs_journal_list *other_jl ; | |
2313 | + unsigned long trans_id = jl->j_trans_id; | |
2314 | + | |
2315 | + /* we know we are the only ones flushing things, no extra race | |
2316 | + * protection is required. | |
2317 | + */ | |
2318 | +restart: | |
2319 | + entry = SB_JOURNAL(p_s_sb)->j_journal_list.next; | |
2320 | + other_jl = JOURNAL_LIST_ENTRY(entry); | |
2321 | + if (other_jl->j_trans_id < trans_id) { | |
2322 | + /* do not flush all */ | |
2323 | + flush_journal_list(p_s_sb, other_jl, 0) ; | |
2324 | + | |
2325 | + /* other_jl is now deleted from the list */ | |
2326 | + goto restart; | |
2327 | } | |
2328 | - } | |
2329 | - return 0 ; | |
2330 | + return 0 ; | |
2331 | } | |
2332 | ||
2333 | static void reiserfs_end_buffer_io_sync(struct buffer_head *bh, int uptodate) { | |
2334 | @@ -881,14 +991,23 @@ | |
2335 | put_bh(bh) ; | |
2336 | } | |
2337 | static void submit_logged_buffer(struct buffer_head *bh) { | |
2338 | - lock_buffer(bh) ; | |
2339 | get_bh(bh) ; | |
2340 | bh->b_end_io = reiserfs_end_buffer_io_sync ; | |
2341 | mark_buffer_notjournal_new(bh) ; | |
2342 | clear_bit(BH_Dirty, &bh->b_state) ; | |
2343 | + if (!buffer_uptodate(bh)) | |
2344 | + BUG(); | |
2345 | submit_bh(WRITE, bh) ; | |
2346 | } | |
2347 | ||
2348 | +static void del_from_work_list(struct super_block *s, | |
2349 | + struct reiserfs_journal_list *jl) { | |
2350 | + if (!list_empty(&jl->j_working_list)) { | |
2351 | + list_del_init(&jl->j_working_list); | |
2352 | + SB_JOURNAL(s)->j_num_work_lists--; | |
2353 | + } | |
2354 | +} | |
2355 | + | |
2356 | /* flush a journal list, both commit and real blocks | |
2357 | ** | |
2358 | ** always set flushall to 1, unless you are calling from inside | |
2359 | @@ -909,29 +1028,27 @@ | |
2360 | unsigned long j_len_saved = jl->j_len ; | |
2361 | ||
2362 | if (j_len_saved <= 0) { | |
2363 | - return 0 ; | |
2364 | + BUG(); | |
2365 | } | |
2366 | ||
2367 | if (atomic_read(&SB_JOURNAL(s)->j_wcount) != 0) { | |
2368 | reiserfs_warning(s, "clm-2048: flush_journal_list called with wcount %d\n", | |
2369 | atomic_read(&SB_JOURNAL(s)->j_wcount)) ; | |
2370 | } | |
2371 | - /* if someone is getting the commit list, we must wait for them */ | |
2372 | - while (atomic_read(&(jl->j_commit_flushing))) { | |
2373 | - sleep_on(&(jl->j_commit_wait)) ; | |
2374 | - } | |
2375 | - /* if someone is flushing this list, we must wait for them */ | |
2376 | - while (atomic_read(&(jl->j_flushing))) { | |
2377 | - sleep_on(&(jl->j_flush_wait)) ; | |
2378 | - } | |
2379 | ||
2380 | - /* this list is now ours, we can change anything we want */ | |
2381 | - atomic_set(&(jl->j_flushing), 1) ; | |
2382 | + if (jl->j_trans_id == 0) | |
2383 | + BUG(); | |
2384 | + | |
2385 | + /* if flushall == 0, the lock is already held */ | |
2386 | + if (flushall) { | |
2387 | + down(&SB_JOURNAL(s)->j_flush_sem); | |
2388 | + } else if (!down_trylock(&SB_JOURNAL(s)->j_flush_sem)) { | |
2389 | + BUG(); | |
2390 | + } | |
2391 | ||
2392 | count = 0 ; | |
2393 | if (j_len_saved > SB_JOURNAL_TRANS_MAX(s)) { | |
2394 | - reiserfs_panic(s, "journal-715: flush_journal_list, length is %lu, list number %d\n", j_len_saved, jl - SB_JOURNAL_LIST(s)) ; | |
2395 | - atomic_dec(&(jl->j_flushing)) ; | |
2396 | + reiserfs_panic(s, "journal-715: flush_journal_list, length is %lu, transid %lu\n", j_len_saved, jl->j_trans_id) ; | |
2397 | return 0 ; | |
2398 | } | |
2399 | ||
2400 | @@ -981,13 +1098,13 @@ | |
2401 | get_bh(saved_bh) ; | |
2402 | ||
2403 | if (buffer_journal_dirty(saved_bh)) { | |
2404 | + if (!can_dirty(cn)) | |
2405 | + BUG(); | |
2406 | was_jwait = 1 ; | |
2407 | - mark_buffer_notjournal_dirty(saved_bh) ; | |
2408 | - /* undo the inc from journal_mark_dirty */ | |
2409 | - put_bh(saved_bh) ; | |
2410 | - } | |
2411 | - if (can_dirty(cn)) { | |
2412 | - was_dirty = 1 ; | |
2413 | + was_dirty = 1; | |
2414 | + } else if (can_dirty(cn)) { | |
2415 | + /* everything with !pjl && jwait should be writable */ | |
2416 | + BUG(); | |
2417 | } | |
2418 | } | |
2419 | ||
2420 | @@ -995,7 +1112,8 @@ | |
2421 | ** sure they are commited, and don't try writing it to disk | |
2422 | */ | |
2423 | if (pjl) { | |
2424 | - flush_commit_list(s, pjl, 1) ; | |
2425 | + if (atomic_read(&pjl->j_commit_left)) | |
2426 | + flush_commit_list(s, pjl, 1) ; | |
2427 | goto free_cnode ; | |
2428 | } | |
2429 | ||
2430 | @@ -1029,7 +1147,12 @@ | |
2431 | /* we inc again because saved_bh gets decremented at free_cnode */ | |
2432 | get_bh(saved_bh) ; | |
2433 | set_bit(BLOCK_NEEDS_FLUSH, &cn->state) ; | |
2434 | + lock_buffer(saved_bh); | |
2435 | submit_logged_buffer(saved_bh) ; | |
2436 | + if (cn->blocknr != saved_bh->b_blocknr) { | |
2437 | +printk("cn %lu does not match bh %lu\n", cn->blocknr, saved_bh->b_blocknr); | |
2438 | + BUG(); | |
2439 | + } | |
2440 | count++ ; | |
2441 | } else { | |
2442 | reiserfs_warning(s, "clm-2082: Unable to flush buffer %lu in flush_journal_list\n", | |
2443 | @@ -1057,9 +1180,23 @@ | |
2444 | if (!cn->bh) { | |
2445 | reiserfs_panic(s, "journal-1012: cn->bh is NULL\n") ; | |
2446 | } | |
2447 | + if (cn->blocknr != cn->bh->b_blocknr) { | |
2448 | +printk("2cn %lu does not match bh %lu\n", cn->blocknr, cn->bh->b_blocknr); | |
2449 | + BUG(); | |
2450 | + } | |
2451 | if (!buffer_uptodate(cn->bh)) { | |
2452 | - reiserfs_panic(s, "journal-949: buffer write failed\n") ; | |
2453 | + reiserfs_panic(s, "journal-949: buffer %lu write failed\n", cn->bh->b_blocknr) ; | |
2454 | } | |
2455 | + | |
2456 | + /* note, we must clear the JDirty_wait bit after the up to date | |
2457 | + ** check, otherwise we race against our flushpage routine | |
2458 | + */ | |
2459 | + if (!test_and_clear_bit(BH_JDirty_wait, &cn->bh->b_state)) | |
2460 | + BUG(); | |
2461 | + | |
2462 | + /* undo the inc from journal_mark_dirty */ | |
2463 | + put_bh(cn->bh) ; | |
2464 | + | |
2465 | refile_buffer(cn->bh) ; | |
2466 | brelse(cn->bh) ; | |
2467 | } | |
2468 | @@ -1074,7 +1211,7 @@ | |
2469 | ** replayed after a crash | |
2470 | */ | |
2471 | if (flushall) { | |
2472 | - flush_older_journal_lists(s, jl, jl->j_trans_id) ; | |
2473 | + flush_older_journal_lists(s, jl); | |
2474 | } | |
2475 | ||
2476 | /* before we can remove everything from the hash tables for this | |
2477 | @@ -1089,46 +1226,137 @@ | |
2478 | update_journal_header_block(s, (jl->j_start + jl->j_len + 2) % SB_ONDISK_JOURNAL_SIZE(s), jl->j_trans_id) ; | |
2479 | } | |
2480 | remove_all_from_journal_list(s, jl, 0) ; | |
2481 | + list_del(&jl->j_list); | |
2482 | + SB_JOURNAL(s)->j_num_lists--; | |
2483 | + del_from_work_list(s, jl); | |
2484 | + | |
2485 | + if (SB_JOURNAL(s)->j_last_flush_id != 0 && | |
2486 | + (jl->j_trans_id - SB_JOURNAL(s)->j_last_flush_id) != 1) { | |
2487 | + reiserfs_warning(s, "clm-2201: dev %s, last flush %lu, current %lu\n", | |
2488 | + kdevname(s->s_dev), SB_JOURNAL(s)->j_last_flush_id, | |
2489 | + SB_JOURNAL(s)->j_last_flush_id); | |
2490 | + } | |
2491 | + SB_JOURNAL(s)->j_last_flush_id = jl->j_trans_id; | |
2492 | + | |
2493 | + /* not strictly required since we are freeing the list, but it should | |
2494 | + * help find code using dead lists later on | |
2495 | + */ | |
2496 | jl->j_len = 0 ; | |
2497 | atomic_set(&(jl->j_nonzerolen), 0) ; | |
2498 | jl->j_start = 0 ; | |
2499 | jl->j_realblock = NULL ; | |
2500 | jl->j_commit_bh = NULL ; | |
2501 | jl->j_trans_id = 0 ; | |
2502 | - atomic_dec(&(jl->j_flushing)) ; | |
2503 | - wake_up(&(jl->j_flush_wait)) ; | |
2504 | + jl->j_state = 0; | |
2505 | + | |
2506 | + if (!list_empty(&jl->j_ordered_bh_list)) | |
2507 | + BUG(); | |
2508 | + | |
2509 | + if (!list_empty(&jl->j_tail_bh_list)) | |
2510 | + BUG(); | |
2511 | + | |
2512 | + // kmem_cache_free(journal_list_cachep, jl); | |
2513 | + reiserfs_kfree(jl, sizeof(struct reiserfs_journal_list), s); | |
2514 | + | |
2515 | + if (flushall) | |
2516 | + up(&SB_JOURNAL(s)->j_flush_sem); | |
2517 | return 0 ; | |
2518 | } | |
2519 | ||
2520 | ||
2521 | -static int kupdate_one_transaction(struct super_block *s, | |
2522 | +#define CHUNK_SIZE 32 | |
2523 | +struct buffer_chunk { | |
2524 | + struct buffer_head *bh[CHUNK_SIZE]; | |
2525 | + int nr; | |
2526 | +}; | |
2527 | + | |
2528 | +static void write_chunk(struct buffer_chunk *chunk) { | |
2529 | + int i; | |
2530 | + for (i = 0; i < chunk->nr ; i++) { | |
2531 | + submit_logged_buffer(chunk->bh[i]) ; | |
2532 | + } | |
2533 | + chunk->nr = 0; | |
2534 | +} | |
2535 | + | |
2536 | +static void add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh) { | |
2537 | + if (chunk->nr >= CHUNK_SIZE) | |
2538 | + BUG(); | |
2539 | + chunk->bh[chunk->nr++] = bh; | |
2540 | + if (chunk->nr >= CHUNK_SIZE) | |
2541 | + write_chunk(chunk); | |
2542 | +} | |
2543 | + | |
2544 | +static int write_one_transaction(struct super_block *s, | |
2545 | + struct reiserfs_journal_list *jl, | |
2546 | + struct buffer_chunk *chunk) | |
2547 | +{ | |
2548 | + struct reiserfs_journal_list *pjl ; /* previous list for this cn */ | |
2549 | + struct reiserfs_journal_cnode *cn; | |
2550 | + int ret = 0 ; | |
2551 | + | |
2552 | + jl->j_state |= LIST_TOUCHED; | |
2553 | + if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0) { | |
2554 | + del_from_work_list(s, jl); | |
2555 | + return 0; | |
2556 | + } | |
2557 | + del_from_work_list(s, jl); | |
2558 | + | |
2559 | + cn = jl->j_realblock ; | |
2560 | + while(cn) { | |
2561 | + /* if the blocknr == 0, this has been cleared from the hash, | |
2562 | + ** skip it | |
2563 | + */ | |
2564 | + if (cn->blocknr == 0) { | |
2565 | + goto next ; | |
2566 | + } | |
2567 | + /* look for a more recent transaction that logged this | |
2568 | + ** buffer. Only the most recent transaction with a buffer in | |
2569 | + ** it is allowed to send that buffer to disk | |
2570 | + */ | |
2571 | + pjl = find_newer_jl_for_cn(cn) ; | |
2572 | + if (!pjl && cn->bh && buffer_journal_dirty(cn->bh) && can_dirty(cn)) { | |
2573 | + if (!test_bit(BH_JPrepared, &cn->bh->b_state)) { | |
2574 | + struct buffer_head *tmp_bh; | |
2575 | + /* we can race against journal_mark_freed when we try | |
2576 | + * to lock_buffer(cn->bh), so we have to inc the buffer | |
2577 | + * count, and recheck things after locking | |
2578 | + */ | |
2579 | + tmp_bh = cn->bh; | |
2580 | + get_bh(tmp_bh); | |
2581 | + set_bit(BLOCK_NEEDS_FLUSH, &cn->state) ; | |
2582 | + lock_buffer(tmp_bh); | |
2583 | + if (cn->bh && buffer_journal_dirty(tmp_bh) && | |
2584 | + !test_bit(BH_JPrepared, &tmp_bh->b_state)) | |
2585 | + { | |
2586 | + add_to_chunk(chunk, tmp_bh); | |
2587 | + ret++; | |
2588 | + } else { | |
2589 | + /* note, cn->bh might be null now */ | |
2590 | + unlock_buffer(tmp_bh); | |
2591 | + } | |
2592 | + put_bh(tmp_bh); | |
2593 | + } | |
2594 | + } | |
2595 | +next: | |
2596 | + cn = cn->next ; | |
2597 | + if (current->need_resched) | |
2598 | + schedule(); | |
2599 | + } | |
2600 | + return ret ; | |
2601 | +} | |
2602 | + | |
2603 | +static int wait_one_transaction(struct super_block *s, | |
2604 | struct reiserfs_journal_list *jl) | |
2605 | { | |
2606 | struct reiserfs_journal_list *pjl ; /* previous list for this cn */ | |
2607 | struct reiserfs_journal_cnode *cn, *walk_cn ; | |
2608 | unsigned long blocknr ; | |
2609 | - int run = 0 ; | |
2610 | - int orig_trans_id = jl->j_trans_id ; | |
2611 | struct buffer_head *saved_bh ; | |
2612 | int ret = 0 ; | |
2613 | ||
2614 | - /* if someone is getting the commit list, we must wait for them */ | |
2615 | - while (atomic_read(&(jl->j_commit_flushing))) { | |
2616 | - sleep_on(&(jl->j_commit_wait)) ; | |
2617 | - } | |
2618 | - /* if someone is flushing this list, we must wait for them */ | |
2619 | - while (atomic_read(&(jl->j_flushing))) { | |
2620 | - sleep_on(&(jl->j_flush_wait)) ; | |
2621 | - } | |
2622 | - /* was it flushed while we slept? */ | |
2623 | - if (jl->j_len <= 0 || jl->j_trans_id != orig_trans_id) { | |
2624 | - return 0 ; | |
2625 | + if (atomic_read(&jl->j_commit_left) != 0 || jl->j_len <= 0) { | |
2626 | + BUG(); | |
2627 | } | |
2628 | - | |
2629 | - /* this list is now ours, we can change anything we want */ | |
2630 | - atomic_set(&(jl->j_flushing), 1) ; | |
2631 | - | |
2632 | -loop_start: | |
2633 | cn = jl->j_realblock ; | |
2634 | while(cn) { | |
2635 | saved_bh = NULL ; | |
2636 | @@ -1143,27 +1371,14 @@ | |
2637 | ** it is allowed to send that buffer to disk | |
2638 | */ | |
2639 | pjl = find_newer_jl_for_cn(cn) ; | |
2640 | - if (run == 0 && !pjl && cn->bh && buffer_journal_dirty(cn->bh) && | |
2641 | - can_dirty(cn)) | |
2642 | - { | |
2643 | - if (!test_bit(BH_JPrepared, &cn->bh->b_state)) { | |
2644 | - set_bit(BLOCK_NEEDS_FLUSH, &cn->state) ; | |
2645 | - submit_logged_buffer(cn->bh) ; | |
2646 | - } else { | |
2647 | - /* someone else is using this buffer. We can't | |
2648 | - ** send it to disk right now because they might | |
2649 | - ** be changing/logging it. | |
2650 | - */ | |
2651 | - ret = 1 ; | |
2652 | - } | |
2653 | - } else if (test_bit(BLOCK_NEEDS_FLUSH, &cn->state)) { | |
2654 | + if (test_bit(BLOCK_NEEDS_FLUSH, &cn->state)) { | |
2655 | clear_bit(BLOCK_NEEDS_FLUSH, &cn->state) ; | |
2656 | if (!pjl && cn->bh) { | |
2657 | wait_on_buffer(cn->bh) ; | |
2658 | - } | |
2659 | - /* check again, someone could have logged while we scheduled */ | |
2660 | - pjl = find_newer_jl_for_cn(cn) ; | |
2661 | + /* check again, someone could have logged while we scheduled */ | |
2662 | + pjl = find_newer_jl_for_cn(cn) ; | |
2663 | ||
2664 | + } | |
2665 | /* before the JDirty_wait bit is set, the | |
2666 | ** buffer is added to the hash list. So, if we are | |
2667 | ** run in the middle of a do_journal_end, we will notice | |
2668 | @@ -1210,60 +1425,182 @@ | |
2669 | } | |
2670 | next: | |
2671 | cn = cn->next ; | |
2672 | + if (current->need_resched) | |
2673 | + schedule(); | |
2674 | } | |
2675 | - /* the first run through the loop sends all the dirty buffers to | |
2676 | - ** ll_rw_block. | |
2677 | - ** the second run through the loop does all the accounting | |
2678 | - */ | |
2679 | - if (run++ == 0) { | |
2680 | - goto loop_start ; | |
2681 | + return ret ; | |
2682 | +} | |
2683 | + | |
2684 | +static int kupdate_transactions(struct super_block *s, | |
2685 | + struct reiserfs_journal_list *jl, | |
2686 | + struct reiserfs_journal_list **next_jl, | |
2687 | + unsigned long *next_trans_id, | |
2688 | + int num_blocks, | |
2689 | + int num_trans) { | |
2690 | + int ret = 0; | |
2691 | + int written = 0 ; | |
2692 | + int transactions_flushed = 0; | |
2693 | + unsigned long orig_trans_id = jl->j_trans_id; | |
2694 | + struct reiserfs_journal_list *orig_jl = jl; | |
2695 | + struct buffer_chunk chunk; | |
2696 | + struct list_head *entry; | |
2697 | + chunk.nr = 0; | |
2698 | + | |
2699 | + down(&SB_JOURNAL(s)->j_flush_sem); | |
2700 | + if (!journal_list_still_alive(s, orig_trans_id)) { | |
2701 | + goto done; | |
2702 | } | |
2703 | ||
2704 | - atomic_set(&(jl->j_flushing), 0) ; | |
2705 | - wake_up(&(jl->j_flush_wait)) ; | |
2706 | - return ret ; | |
2707 | + /* we've got j_flush_sem held, nobody is going to delete any | |
2708 | + * of these lists out from underneath us | |
2709 | + */ | |
2710 | + while((num_trans && transactions_flushed < num_trans) || | |
2711 | + (!num_trans && written < num_blocks)) { | |
2712 | + | |
2713 | + if (jl->j_len == 0 || (jl->j_state & LIST_TOUCHED) || | |
2714 | + atomic_read(&jl->j_commit_left)) | |
2715 | + { | |
2716 | + del_from_work_list(s, jl); | |
2717 | + break; | |
2718 | + } | |
2719 | + ret = write_one_transaction(s, jl, &chunk); | |
2720 | + | |
2721 | + if (ret < 0) | |
2722 | + goto done; | |
2723 | + transactions_flushed++; | |
2724 | + written += ret; | |
2725 | + entry = jl->j_list.next; | |
2726 | + | |
2727 | + /* did we wrap? */ | |
2728 | + if (entry == &SB_JOURNAL(s)->j_journal_list) { | |
2729 | + break; | |
2730 | + } | |
2731 | + jl = JOURNAL_LIST_ENTRY(entry); | |
2732 | + | |
2733 | + /* don't bother with older transactions */ | |
2734 | + if (jl->j_trans_id <= orig_trans_id) | |
2735 | + break; | |
2736 | + } | |
2737 | + if (chunk.nr) { | |
2738 | + write_chunk(&chunk); | |
2739 | + } | |
2740 | + | |
2741 | + jl = orig_jl; | |
2742 | + *next_jl = jl; | |
2743 | + *next_trans_id = jl->j_trans_id; | |
2744 | + ret = transactions_flushed; | |
2745 | + while(transactions_flushed--) { | |
2746 | + | |
2747 | + wait_one_transaction(s, jl); | |
2748 | + entry = jl->j_list.next; | |
2749 | + jl = JOURNAL_LIST_ENTRY(entry); | |
2750 | + | |
2751 | + /* make sure we can really count */ | |
2752 | + if (jl->j_trans_id <= orig_trans_id && transactions_flushed > 0) { | |
2753 | +printk("flushing %s %lu, orig_trans_id was %lu\n", kdevname(s->s_dev), jl->j_trans_id, orig_trans_id); | |
2754 | + BUG(); | |
2755 | + } | |
2756 | + *next_jl = jl; | |
2757 | + *next_trans_id = jl->j_trans_id; | |
2758 | + } | |
2759 | + | |
2760 | +done: | |
2761 | + up(&SB_JOURNAL(s)->j_flush_sem); | |
2762 | + return ret; | |
2763 | } | |
2764 | + | |
2765 | +/* for o_sync and fsync heavy applications, they tend to use | |
2766 | +** all the journa list slots with tiny transactions. These | |
2767 | +** trigger lots and lots of calls to update the header block, which | |
2768 | +** adds seeks and slows things down. | |
2769 | +** | |
2770 | +** This function tries to clear out a large chunk of the journal lists | |
2771 | +** at once, which makes everything faster since only the newest journal | |
2772 | +** list updates the header block | |
2773 | +*/ | |
2774 | +static int flush_used_journal_lists(struct super_block *s, | |
2775 | + struct reiserfs_journal_list *jl) { | |
2776 | + unsigned long len = 0; | |
2777 | + unsigned long cur_len; | |
2778 | + int ret; | |
2779 | + int i; | |
2780 | + struct reiserfs_journal_list *tjl; | |
2781 | + struct reiserfs_journal_list *flush_jl; | |
2782 | + unsigned long trans_id; | |
2783 | + | |
2784 | + flush_jl = tjl = jl; | |
2785 | + | |
2786 | + /* flush for 256 transactions or 256 blocks, whichever comes first */ | |
2787 | + for(i = 0 ; i < 256 && len < 256 ; i++) { | |
2788 | + if (atomic_read(&tjl->j_commit_left) || | |
2789 | + tjl->j_trans_id < jl->j_trans_id) { | |
2790 | + break; | |
2791 | + } | |
2792 | + cur_len = atomic_read(&tjl->j_nonzerolen); | |
2793 | + if (cur_len > 0) { | |
2794 | + tjl->j_state &= ~LIST_TOUCHED; | |
2795 | + } | |
2796 | + len += cur_len; | |
2797 | + flush_jl = tjl; | |
2798 | + if (tjl->j_list.next == &SB_JOURNAL(s)->j_journal_list) | |
2799 | + break; | |
2800 | + tjl = JOURNAL_LIST_ENTRY(tjl->j_list.next); | |
2801 | + } | |
2802 | + /* try to find a group of blocks we can flush across all the | |
2803 | + ** transactions, but only bother if we've actually spanned | |
2804 | + ** across multiple lists | |
2805 | + */ | |
2806 | + if (flush_jl != jl) { | |
2807 | + ret = kupdate_transactions(s, jl, &tjl, &trans_id, len, i); | |
2808 | + } | |
2809 | + flush_journal_list(s, flush_jl, 1) ; | |
2810 | + return 0; | |
2811 | +} | |
2812 | + | |
2813 | + | |
2814 | /* since we never give dirty buffers to bdflush/kupdate, we have to | |
2815 | ** flush them ourselves. This runs through the journal lists, finds | |
2816 | ** old metadata in need of flushing and sends it to disk. | |
2817 | ** this does not end transactions, commit anything, or free | |
2818 | ** cnodes. | |
2819 | -** | |
2820 | -** returns the highest transaction id that was flushed last time | |
2821 | */ | |
2822 | static unsigned long reiserfs_journal_kupdate(struct super_block *s) { | |
2823 | - struct reiserfs_journal_list *jl ; | |
2824 | - int i ; | |
2825 | - int start ; | |
2826 | + struct reiserfs_journal_list *jl, *next_jl; | |
2827 | + unsigned long trans_id, next_trans_id; | |
2828 | time_t age ; | |
2829 | - int ret = 0 ; | |
2830 | ||
2831 | - start = SB_JOURNAL_LIST_INDEX(s) ; | |
2832 | + jl = JOURNAL_WORK_ENTRY(SB_JOURNAL(s)->j_working_list.next); | |
2833 | ||
2834 | - /* safety check to prevent flush attempts during a mount */ | |
2835 | - if (start < 0) { | |
2836 | +restart: | |
2837 | + /* kupdate transactions might not set next_trans_id, it must be | |
2838 | + * initialized before each call | |
2839 | + */ | |
2840 | + next_trans_id = 0; | |
2841 | + if (list_empty(&SB_JOURNAL(s)->j_working_list)) { | |
2842 | return 0 ; | |
2843 | } | |
2844 | - i = (start + 1) % JOURNAL_LIST_COUNT ; | |
2845 | - while(i != start) { | |
2846 | - jl = SB_JOURNAL_LIST(s) + i ; | |
2847 | - age = CURRENT_TIME - jl->j_timestamp ; | |
2848 | - if (jl->j_len > 0 && // age >= (JOURNAL_MAX_COMMIT_AGE * 2) && | |
2849 | - atomic_read(&(jl->j_nonzerolen)) > 0 && | |
2850 | - atomic_read(&(jl->j_commit_left)) == 0) { | |
2851 | + trans_id = jl->j_trans_id; | |
2852 | ||
2853 | - if (jl->j_trans_id == SB_JOURNAL(s)->j_trans_id) { | |
2854 | - break ; | |
2855 | - } | |
2856 | - /* if ret was already 1, we want to preserve that */ | |
2857 | - ret |= kupdate_one_transaction(s, jl) ; | |
2858 | - } | |
2859 | - if (atomic_read(&(jl->j_nonzerolen)) > 0) { | |
2860 | - ret |= 1 ; | |
2861 | - } | |
2862 | - i = (i + 1) % JOURNAL_LIST_COUNT ; | |
2863 | + /* check for race with the code that frees lists */ | |
2864 | + if (jl->j_trans_id == 0) | |
2865 | + BUG(); | |
2866 | + age = CURRENT_TIME - jl->j_timestamp ; | |
2867 | + if (age >= SB_JOURNAL_MAX_COMMIT_AGE(s) && | |
2868 | + atomic_read(&jl->j_nonzerolen) > 0 && | |
2869 | + atomic_read(&jl->j_commit_left) == 0) | |
2870 | + { | |
2871 | + if (kupdate_transactions(s, jl, &next_jl, &next_trans_id, 32, 32) < 0) | |
2872 | + return 0; | |
2873 | + if (next_jl != JOURNAL_WORK_ENTRY(&SB_JOURNAL(s)->j_working_list) && | |
2874 | + next_trans_id > trans_id) | |
2875 | + { | |
2876 | + if (journal_list_still_alive(s, next_trans_id)) { | |
2877 | + jl = next_jl; | |
2878 | + goto restart; | |
2879 | + } | |
2880 | + } | |
2881 | } | |
2882 | - return ret ; | |
2883 | + return 0; | |
2884 | } | |
2885 | ||
2886 | /* | |
2887 | @@ -1307,6 +1644,12 @@ | |
2888 | } | |
2889 | ||
2890 | static void free_journal_ram(struct super_block *p_s_sb) { | |
2891 | + | |
2892 | + // kmem_cache_free(journal_list_cachep, SB_JOURNAL(p_s_sb)->j_current_jl); | |
2893 | + reiserfs_kfree(SB_JOURNAL(p_s_sb)->j_current_jl, | |
2894 | + sizeof(struct reiserfs_journal_list), p_s_sb); | |
2895 | + SB_JOURNAL(p_s_sb)->j_num_lists--; | |
2896 | + | |
2897 | vfree(SB_JOURNAL(p_s_sb)->j_cnode_free_orig) ; | |
2898 | free_list_bitmaps(p_s_sb, SB_JOURNAL(p_s_sb)->j_list_bitmap) ; | |
2899 | free_bitmap_nodes(p_s_sb) ; /* must be after free_list_bitmaps */ | |
2900 | @@ -1327,6 +1670,10 @@ | |
2901 | static int do_journal_release(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, int error) { | |
2902 | struct reiserfs_transaction_handle myth ; | |
2903 | ||
2904 | + down(&kreiserfsd_sem); | |
2905 | + list_del(&p_s_sb->u.reiserfs_sb.s_reiserfs_supers); | |
2906 | + up(&kreiserfsd_sem); | |
2907 | + | |
2908 | /* we only want to flush out transactions if we were called with error == 0 | |
2909 | */ | |
2910 | if (!error && !(p_s_sb->s_flags & MS_RDONLY)) { | |
2911 | @@ -1813,66 +2160,6 @@ | |
2912 | return 0 ; | |
2913 | } | |
2914 | ||
2915 | - | |
2916 | -struct reiserfs_journal_commit_task { | |
2917 | - struct super_block *p_s_sb ; | |
2918 | - int jindex ; | |
2919 | - int wake_on_finish ; /* if this is one, we wake the task_done queue, if it | |
2920 | - ** is zero, we free the whole struct on finish | |
2921 | - */ | |
2922 | - struct reiserfs_journal_commit_task *self ; | |
2923 | - struct wait_queue *task_done ; | |
2924 | - struct tq_struct task ; | |
2925 | -} ; | |
2926 | - | |
2927 | -static void reiserfs_journal_commit_task_func(struct reiserfs_journal_commit_task *ct) { | |
2928 | - | |
2929 | - struct reiserfs_journal_list *jl ; | |
2930 | - jl = SB_JOURNAL_LIST(ct->p_s_sb) + ct->jindex ; | |
2931 | - | |
2932 | - flush_commit_list(ct->p_s_sb, SB_JOURNAL_LIST(ct->p_s_sb) + ct->jindex, 1) ; | |
2933 | - | |
2934 | - if (jl->j_len > 0 && atomic_read(&(jl->j_nonzerolen)) > 0 && | |
2935 | - atomic_read(&(jl->j_commit_left)) == 0) { | |
2936 | - kupdate_one_transaction(ct->p_s_sb, jl) ; | |
2937 | - } | |
2938 | - reiserfs_kfree(ct->self, sizeof(struct reiserfs_journal_commit_task), ct->p_s_sb) ; | |
2939 | -} | |
2940 | - | |
2941 | -static void setup_commit_task_arg(struct reiserfs_journal_commit_task *ct, | |
2942 | - struct super_block *p_s_sb, | |
2943 | - int jindex) { | |
2944 | - if (!ct) { | |
2945 | - reiserfs_panic(NULL, "journal-1360: setup_commit_task_arg called with NULL struct\n") ; | |
2946 | - } | |
2947 | - ct->p_s_sb = p_s_sb ; | |
2948 | - ct->jindex = jindex ; | |
2949 | - ct->task_done = NULL ; | |
2950 | - INIT_LIST_HEAD(&ct->task.list) ; | |
2951 | - ct->task.sync = 0 ; | |
2952 | - ct->task.routine = (void *)(void *)reiserfs_journal_commit_task_func ; | |
2953 | - ct->self = ct ; | |
2954 | - ct->task.data = (void *)ct ; | |
2955 | -} | |
2956 | - | |
2957 | -static void commit_flush_async(struct super_block *p_s_sb, int jindex) { | |
2958 | - struct reiserfs_journal_commit_task *ct ; | |
2959 | - /* using GFP_NOFS, GFP_KERNEL could try to flush inodes, which will try | |
2960 | - ** to start/join a transaction, which will deadlock | |
2961 | - */ | |
2962 | - ct = reiserfs_kmalloc(sizeof(struct reiserfs_journal_commit_task), GFP_NOFS, p_s_sb) ; | |
2963 | - if (ct) { | |
2964 | - setup_commit_task_arg(ct, p_s_sb, jindex) ; | |
2965 | - queue_task(&(ct->task), &reiserfs_commit_thread_tq); | |
2966 | - wake_up(&reiserfs_commit_thread_wait) ; | |
2967 | - } else { | |
2968 | -#ifdef CONFIG_REISERFS_CHECK | |
2969 | - reiserfs_warning(p_s_sb, "journal-1540: kmalloc failed, doing sync commit\n") ; | |
2970 | -#endif | |
2971 | - flush_commit_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + jindex, 1) ; | |
2972 | - } | |
2973 | -} | |
2974 | - | |
2975 | /* | |
2976 | ** this is the commit thread. It is started with kernel_thread on | |
2977 | ** FS mount, and journal_release() waits for it to exit. | |
2978 | @@ -1885,6 +2172,9 @@ | |
2979 | ** then run the per filesystem commit task queue when we wakeup. | |
2980 | */ | |
2981 | static int reiserfs_journal_commit_thread(void *nullp) { | |
2982 | + struct list_head *entry, *safe ; | |
2983 | + struct super_block *s; | |
2984 | + time_t last_run = 0; | |
2985 | ||
2986 | daemonize() ; | |
2987 | ||
2988 | @@ -1897,13 +2187,73 @@ | |
2989 | lock_kernel() ; | |
2990 | while(1) { | |
2991 | ||
2992 | - while(TQ_ACTIVE(reiserfs_commit_thread_tq)) { | |
2993 | - run_task_queue(&reiserfs_commit_thread_tq) ; | |
2994 | +restart: | |
2995 | + down(&kreiserfsd_sem); | |
2996 | + list_for_each_safe(entry, safe, &kreiserfsd_supers) { | |
2997 | + s = list_entry(entry, struct super_block, | |
2998 | + u.reiserfs_sb.s_reiserfs_supers); | |
2999 | + if (!(s->s_flags & MS_RDONLY)) { | |
3000 | + flush_async_commits(s); | |
3001 | + | |
3002 | + if (CURRENT_TIME - last_run > 5) { | |
3003 | + reiserfs_flush_old_commits(s); | |
3004 | + } | |
3005 | + | |
3006 | + if (!list_empty(&SB_JOURNAL(s)->j_working_list)) { | |
3007 | + struct reiserfs_journal_list *jl, *tjl; | |
3008 | + unsigned long trans_id ; | |
3009 | + unsigned long start; | |
3010 | + unsigned long cur_start; | |
3011 | + unsigned long nfract = SB_ONDISK_JOURNAL_SIZE(s) / 4; | |
3012 | + int ret; | |
3013 | + | |
3014 | + jl = JOURNAL_WORK_ENTRY(SB_JOURNAL(s)->j_working_list.next); | |
3015 | + cur_start = SB_JOURNAL(s)->j_start; | |
3016 | + start = jl->j_start; | |
3017 | + | |
3018 | + /* pretend the log doesn't actually wrap */ | |
3019 | + if (cur_start < start) { | |
3020 | + cur_start = cur_start + SB_ONDISK_JOURNAL_SIZE(s); | |
3021 | + } | |
3022 | + | |
3023 | + /* if the first transaction on the working list is more | |
3024 | + * than nfract blocks away from the current transaction start | |
3025 | + * or there are more than 128 working lists, start | |
3026 | + * a background flush | |
3027 | + */ | |
3028 | + if (cur_start - start > nfract || | |
3029 | + SB_JOURNAL(s)->j_num_work_lists > 32) { | |
3030 | + tjl=JOURNAL_LIST_ENTRY(SB_JOURNAL(s)->j_journal_list.next); | |
3031 | + ret = kupdate_transactions(s, jl, &tjl, &trans_id,32,128); | |
3032 | + } | |
3033 | + } | |
3034 | + } | |
3035 | } | |
3036 | + /* check again for new async commits that need tending */ | |
3037 | + list_for_each_safe(entry, safe, &kreiserfsd_supers) { | |
3038 | + s = list_entry(entry, struct super_block, | |
3039 | + u.reiserfs_sb.s_reiserfs_supers); | |
3040 | + if (!list_empty(&SB_JOURNAL(s)->j_journal_list)) { | |
3041 | + struct reiserfs_journal_list *jl; | |
3042 | + struct list_head *entry; | |
3043 | + | |
3044 | + /* last entry is the youngest, commit it and you get everything */ | |
3045 | + entry = SB_JOURNAL(s)->j_journal_list.prev; | |
3046 | + jl = JOURNAL_LIST_ENTRY(entry); | |
3047 | + if (!atomic_read(&(jl->j_older_commits_done))) { | |
3048 | + /* give new mounts a chance to come in */ | |
3049 | + up(&kreiserfsd_sem); | |
3050 | + last_run = CURRENT_TIME; | |
3051 | + wake_up_all(&reiserfs_commit_thread_done) ; | |
3052 | + goto restart; | |
3053 | + } | |
3054 | + } | |
3055 | + } | |
3056 | + up(&kreiserfsd_sem); | |
3057 | + last_run = CURRENT_TIME; | |
3058 | ||
3059 | /* if there aren't any more filesystems left, break */ | |
3060 | if (reiserfs_mounted_fs_count <= 0) { | |
3061 | - run_task_queue(&reiserfs_commit_thread_tq) ; | |
3062 | break ; | |
3063 | } | |
3064 | wake_up(&reiserfs_commit_thread_done) ; | |
3065 | @@ -1914,12 +2264,28 @@ | |
3066 | return 0 ; | |
3067 | } | |
3068 | ||
3069 | +static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s) | |
3070 | +{ | |
3071 | + struct reiserfs_journal_list *jl; | |
3072 | +retry: | |
3073 | + // jl = (struct reiserfs_journal_list *)kmem_cache_alloc(journal_list_cachep, SLAB_NOFS); | |
3074 | + jl = reiserfs_kmalloc(sizeof(struct reiserfs_journal_list), GFP_NOFS, s); | |
3075 | + if (!jl) { | |
3076 | + yield(); | |
3077 | + goto retry; | |
3078 | + } | |
3079 | + memset(jl, 0, sizeof(*jl)); | |
3080 | + INIT_LIST_HEAD(&jl->j_list); | |
3081 | + INIT_LIST_HEAD(&jl->j_working_list); | |
3082 | + INIT_LIST_HEAD(&jl->j_ordered_bh_list); | |
3083 | + INIT_LIST_HEAD(&jl->j_tail_bh_list); | |
3084 | + sema_init(&jl->j_commit_lock, 1); | |
3085 | + SB_JOURNAL(s)->j_num_lists++; | |
3086 | + return jl; | |
3087 | +} | |
3088 | + | |
3089 | static void journal_list_init(struct super_block *p_s_sb) { | |
3090 | - int i ; | |
3091 | - for (i = 0 ; i < JOURNAL_LIST_COUNT ; i++) { | |
3092 | - init_waitqueue_head(&(SB_JOURNAL_LIST(p_s_sb)[i].j_commit_wait)) ; | |
3093 | - init_waitqueue_head(&(SB_JOURNAL_LIST(p_s_sb)[i].j_flush_wait)) ; | |
3094 | - } | |
3095 | + SB_JOURNAL(p_s_sb)->j_current_jl = alloc_journal_list(p_s_sb); | |
3096 | } | |
3097 | ||
3098 | static int release_journal_dev( struct super_block *super, | |
3099 | @@ -1952,7 +2318,6 @@ | |
3100 | int blkdev_mode = FMODE_READ | FMODE_WRITE; | |
3101 | ||
3102 | result = 0; | |
3103 | - | |
3104 | journal -> j_dev_bd = NULL; | |
3105 | journal -> j_dev_file = NULL; | |
3106 | jdev = SB_JOURNAL_DEV( super ) = | |
3107 | @@ -2030,7 +2395,6 @@ | |
3108 | printk( "journal_init_dev: journal device: %s", kdevname( SB_JOURNAL_DEV( super ) ) ); | |
3109 | return result; | |
3110 | } | |
3111 | - | |
3112 | /* | |
3113 | ** must be called once on fs mount. calls journal_read for you | |
3114 | */ | |
3115 | @@ -2041,6 +2405,7 @@ | |
3116 | struct reiserfs_super_block * rs; | |
3117 | struct reiserfs_journal_header *jh; | |
3118 | struct reiserfs_journal *journal; | |
3119 | + struct reiserfs_journal_list *jl; | |
3120 | ||
3121 | if (sizeof(struct reiserfs_journal_commit) != 4096 || | |
3122 | sizeof(struct reiserfs_journal_desc) != 4096) { | |
3123 | @@ -2054,7 +2419,6 @@ | |
3124 | reiserfs_warning(p_s_sb, "Journal size %d is less than 512+1 blocks, which unsupported\n", SB_ONDISK_JOURNAL_SIZE(p_s_sb)); | |
3125 | return 1 ; | |
3126 | } | |
3127 | - | |
3128 | journal = SB_JOURNAL(p_s_sb) = vmalloc(sizeof (struct reiserfs_journal)) ; | |
3129 | if (!journal) { | |
3130 | reiserfs_warning(p_s_sb, "journal-1256: unable to get memory for journal structure\n") ; | |
3131 | @@ -2155,15 +2519,9 @@ | |
3132 | SB_JOURNAL_MAX_BATCH(p_s_sb) = SB_JOURNAL_TRANS_MAX(p_s_sb)*9 / 10; | |
3133 | } | |
3134 | } | |
3135 | - | |
3136 | brelse (bhjh); | |
3137 | ||
3138 | SB_JOURNAL(p_s_sb)->j_list_bitmap_index = 0 ; | |
3139 | - SB_JOURNAL_LIST_INDEX(p_s_sb) = -10000 ; /* make sure flush_old_commits does not try to flush a list while replay is on */ | |
3140 | - | |
3141 | - /* clear out the journal list array */ | |
3142 | - memset(SB_JOURNAL_LIST(p_s_sb), 0, | |
3143 | - sizeof(struct reiserfs_journal_list) * JOURNAL_LIST_COUNT) ; | |
3144 | ||
3145 | journal_list_init(p_s_sb) ; | |
3146 | ||
3147 | @@ -2171,8 +2529,6 @@ | |
3148 | JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *)) ; | |
3149 | memset(journal_writers, 0, sizeof(char *) * 512) ; /* debug code */ | |
3150 | ||
3151 | - INIT_LIST_HEAD(&(SB_JOURNAL(p_s_sb)->j_dirty_buffers)) ; | |
3152 | - | |
3153 | SB_JOURNAL(p_s_sb)->j_start = 0 ; | |
3154 | SB_JOURNAL(p_s_sb)->j_len = 0 ; | |
3155 | SB_JOURNAL(p_s_sb)->j_len_alloc = 0 ; | |
3156 | @@ -2182,13 +2538,15 @@ | |
3157 | SB_JOURNAL(p_s_sb)->j_last = NULL ; | |
3158 | SB_JOURNAL(p_s_sb)->j_first = NULL ; | |
3159 | init_waitqueue_head(&(SB_JOURNAL(p_s_sb)->j_join_wait)) ; | |
3160 | - init_waitqueue_head(&(SB_JOURNAL(p_s_sb)->j_wait)) ; | |
3161 | - | |
3162 | + sema_init(&SB_JOURNAL(p_s_sb)->j_lock, 1); | |
3163 | + sema_init(&SB_JOURNAL(p_s_sb)->j_flush_sem, 1); | |
3164 | + INIT_LIST_HEAD (&SB_JOURNAL(p_s_sb)->j_journal_list); | |
3165 | + INIT_LIST_HEAD (&SB_JOURNAL(p_s_sb)->j_working_list); | |
3166 | + | |
3167 | SB_JOURNAL(p_s_sb)->j_trans_id = 10 ; | |
3168 | SB_JOURNAL(p_s_sb)->j_mount_id = 10 ; | |
3169 | SB_JOURNAL(p_s_sb)->j_state = 0 ; | |
3170 | atomic_set(&(SB_JOURNAL(p_s_sb)->j_jlock), 0) ; | |
3171 | - atomic_set(&(SB_JOURNAL(p_s_sb)->j_wlock), 0) ; | |
3172 | SB_JOURNAL(p_s_sb)->j_cnode_free_list = allocate_cnodes(num_cnodes) ; | |
3173 | SB_JOURNAL(p_s_sb)->j_cnode_free_orig = SB_JOURNAL(p_s_sb)->j_cnode_free_list ; | |
3174 | SB_JOURNAL(p_s_sb)->j_cnode_free = SB_JOURNAL(p_s_sb)->j_cnode_free_list ? | |
3175 | @@ -2196,8 +2554,9 @@ | |
3176 | SB_JOURNAL(p_s_sb)->j_cnode_used = 0 ; | |
3177 | SB_JOURNAL(p_s_sb)->j_must_wait = 0 ; | |
3178 | init_journal_hash(p_s_sb) ; | |
3179 | - SB_JOURNAL_LIST(p_s_sb)[0].j_list_bitmap = get_list_bitmap(p_s_sb, SB_JOURNAL_LIST(p_s_sb)) ; | |
3180 | - if (!(SB_JOURNAL_LIST(p_s_sb)[0].j_list_bitmap)) { | |
3181 | + jl = SB_JOURNAL(p_s_sb)->j_current_jl; | |
3182 | + jl->j_list_bitmap = get_list_bitmap(p_s_sb, jl) ; | |
3183 | + if (!jl->j_list_bitmap) { | |
3184 | reiserfs_warning(p_s_sb, "journal-2005, get_list_bitmap failed for journal list 0\n") ; | |
3185 | goto free_and_return; | |
3186 | } | |
3187 | @@ -2205,8 +2564,6 @@ | |
3188 | reiserfs_warning(p_s_sb, "Replay Failure, unable to mount\n") ; | |
3189 | goto free_and_return; | |
3190 | } | |
3191 | - /* once the read is done, we can set this where it belongs */ | |
3192 | - SB_JOURNAL_LIST_INDEX(p_s_sb) = 0 ; | |
3193 | ||
3194 | if (reiserfs_dont_log (p_s_sb)) | |
3195 | return 0; | |
3196 | @@ -2216,6 +2573,9 @@ | |
3197 | kernel_thread((void *)(void *)reiserfs_journal_commit_thread, NULL, | |
3198 | CLONE_FS | CLONE_FILES | CLONE_VM) ; | |
3199 | } | |
3200 | + down(&kreiserfsd_sem); | |
3201 | + list_add(&p_s_sb->u.reiserfs_sb.s_reiserfs_supers, &kreiserfsd_supers); | |
3202 | + up(&kreiserfsd_sem); | |
3203 | return 0 ; | |
3204 | ||
3205 | free_and_return: | |
3206 | @@ -2230,7 +2590,9 @@ | |
3207 | */ | |
3208 | int journal_transaction_should_end(struct reiserfs_transaction_handle *th, int new_alloc) { | |
3209 | time_t now = CURRENT_TIME ; | |
3210 | - if (reiserfs_dont_log(th->t_super)) | |
3211 | + | |
3212 | + /* cannot restart while nested unless the parent allows it */ | |
3213 | + if (!reiserfs_restartable_handle(th) && th->t_refcount > 1) | |
3214 | return 0 ; | |
3215 | if ( SB_JOURNAL(th->t_super)->j_must_wait > 0 || | |
3216 | (SB_JOURNAL(th->t_super)->j_len_alloc + new_alloc) >= SB_JOURNAL_MAX_BATCH(th->t_super) || | |
3217 | @@ -2239,9 +2601,48 @@ | |
3218 | SB_JOURNAL(th->t_super)->j_cnode_free < (SB_JOURNAL_TRANS_MAX(th->t_super) * 3)) { | |
3219 | return 1 ; | |
3220 | } | |
3221 | + | |
3222 | + /* we are allowing them to continue in the current transaction, so | |
3223 | + * we have to bump the blocks allocated now. | |
3224 | + */ | |
3225 | + th->t_blocks_allocated += new_alloc; | |
3226 | + SB_JOURNAL(th->t_super)->j_len_alloc += new_alloc; | |
3227 | + | |
3228 | return 0 ; | |
3229 | } | |
3230 | ||
3231 | +int | |
3232 | +reiserfs_restart_transaction(struct reiserfs_transaction_handle *th, int num) { | |
3233 | + int refcount = th->t_refcount ; | |
3234 | + struct super_block *s = th->t_super ; | |
3235 | + int flags = th->t_flags ; | |
3236 | + int parent_flags = 0; | |
3237 | + struct reiserfs_transaction_handle *saved_th = current->journal_info ; | |
3238 | + | |
3239 | + /* if refcount is > 1, saved_th is the parent we've nested into, save | |
3240 | + ** his flags as well. So far, only intermezzo needs this, 99% of the | |
3241 | + ** time it is horribly unsafe. | |
3242 | + */ | |
3243 | + if (refcount > 1) { | |
3244 | + if (!reiserfs_restartable_handle(saved_th)) { | |
3245 | + BUG() ; | |
3246 | + } | |
3247 | + th->t_refcount = 1; | |
3248 | + parent_flags = saved_th->t_flags ; | |
3249 | + } | |
3250 | + th->t_flags = 0 ; | |
3251 | + journal_end(th, s, th->t_blocks_allocated) ; | |
3252 | + journal_begin(th, s, num) ; | |
3253 | + th->t_flags = flags; | |
3254 | + if (refcount > 1) { | |
3255 | + current->journal_info = saved_th ; | |
3256 | + th->t_refcount = refcount ; | |
3257 | + memcpy(saved_th, th, sizeof(*th)) ; | |
3258 | + saved_th->t_flags = parent_flags ; | |
3259 | + } | |
3260 | + return 0 ; | |
3261 | +} | |
3262 | + | |
3263 | /* this must be called inside a transaction, and requires the | |
3264 | ** kernel_lock to be held | |
3265 | */ | |
3266 | @@ -2268,6 +2669,37 @@ | |
3267 | !test_bit(WRITERS_BLOCKED, &SB_JOURNAL(s)->j_state)) ; | |
3268 | } | |
3269 | ||
3270 | +static void queue_log_writer(struct super_block *s) { | |
3271 | + set_bit(WRITERS_QUEUED, &SB_JOURNAL(s)->j_state); | |
3272 | + sleep_on(&SB_JOURNAL(s)->j_join_wait); | |
3273 | +} | |
3274 | + | |
3275 | +static void wake_queued_writers(struct super_block *s) { | |
3276 | + if (test_and_clear_bit(WRITERS_QUEUED, &SB_JOURNAL(s)->j_state)) { | |
3277 | + wake_up(&SB_JOURNAL(s)->j_join_wait); | |
3278 | + } | |
3279 | +} | |
3280 | + | |
3281 | +static void let_transaction_grow(struct super_block *sb, | |
3282 | + unsigned long trans_id) | |
3283 | +{ | |
3284 | + unsigned long bcount = SB_JOURNAL(sb)->j_bcount; | |
3285 | + while(1) { | |
3286 | + yield(); | |
3287 | + while ((atomic_read(&SB_JOURNAL(sb)->j_wcount) > 0 || | |
3288 | + atomic_read(&SB_JOURNAL(sb)->j_jlock)) && | |
3289 | + SB_JOURNAL(sb)->j_trans_id == trans_id) { | |
3290 | + queue_log_writer(sb); | |
3291 | + } | |
3292 | + if (SB_JOURNAL(sb)->j_trans_id != trans_id) | |
3293 | + break; | |
3294 | + if (bcount == SB_JOURNAL(sb)->j_bcount) | |
3295 | + break; | |
3296 | + bcount = SB_JOURNAL(sb)->j_bcount; | |
3297 | + } | |
3298 | +} | |
3299 | + | |
3300 | + | |
3301 | /* join == true if you must join an existing transaction. | |
3302 | ** join == false if you can deal with waiting for others to finish | |
3303 | ** | |
3304 | @@ -2275,8 +2707,10 @@ | |
3305 | ** expect to use in nblocks. | |
3306 | */ | |
3307 | static int do_journal_begin_r(struct reiserfs_transaction_handle *th, struct super_block * p_s_sb,unsigned long nblocks,int join) { | |
3308 | - time_t now = CURRENT_TIME ; | |
3309 | + time_t now ; | |
3310 | int old_trans_id ; | |
3311 | + struct reiserfs_transaction_handle myth ; | |
3312 | + int sched_count = 0; | |
3313 | ||
3314 | reiserfs_check_lock_depth("journal_begin") ; | |
3315 | RFALSE( p_s_sb->s_flags & MS_RDONLY, | |
3316 | @@ -2287,9 +2721,14 @@ | |
3317 | return 0 ; | |
3318 | } | |
3319 | PROC_INFO_INC( p_s_sb, journal.journal_being ); | |
3320 | + /* set here for journal_join */ | |
3321 | + th->t_refcount = 1; | |
3322 | + th->t_flags = 0 ; | |
3323 | + th->t_super = p_s_sb ; | |
3324 | ||
3325 | relock: | |
3326 | lock_journal(p_s_sb) ; | |
3327 | + SB_JOURNAL(p_s_sb)->j_bcount++ ; | |
3328 | ||
3329 | if (test_bit(WRITERS_BLOCKED, &SB_JOURNAL(p_s_sb)->j_state)) { | |
3330 | unlock_journal(p_s_sb) ; | |
3331 | @@ -2297,12 +2736,12 @@ | |
3332 | PROC_INFO_INC( p_s_sb, journal.journal_relock_writers ); | |
3333 | goto relock ; | |
3334 | } | |
3335 | + now = CURRENT_TIME; | |
3336 | ||
3337 | /* if there is no room in the journal OR | |
3338 | ** if this transaction is too old, and we weren't called joinable, wait for it to finish before beginning | |
3339 | ** we don't sleep if there aren't other writers | |
3340 | */ | |
3341 | - | |
3342 | if ( (!join && SB_JOURNAL(p_s_sb)->j_must_wait > 0) || | |
3343 | ( !join && (SB_JOURNAL(p_s_sb)->j_len_alloc + nblocks + 2) >= SB_JOURNAL_MAX_BATCH(p_s_sb)) || | |
3344 | (!join && atomic_read(&(SB_JOURNAL(p_s_sb)->j_wcount)) > 0 && SB_JOURNAL(p_s_sb)->j_trans_start_time > 0 && | |
3345 | @@ -2310,54 +2749,128 @@ | |
3346 | (!join && atomic_read(&(SB_JOURNAL(p_s_sb)->j_jlock)) ) || | |
3347 | (!join && SB_JOURNAL(p_s_sb)->j_cnode_free < (SB_JOURNAL_TRANS_MAX(p_s_sb) * 3))) { | |
3348 | ||
3349 | + old_trans_id = SB_JOURNAL(p_s_sb)->j_trans_id ; | |
3350 | unlock_journal(p_s_sb) ; /* allow others to finish this transaction */ | |
3351 | ||
3352 | - /* if writer count is 0, we can just force this transaction to end, and start | |
3353 | - ** a new one afterwards. | |
3354 | - */ | |
3355 | - if (atomic_read(&(SB_JOURNAL(p_s_sb)->j_wcount)) <= 0) { | |
3356 | - struct reiserfs_transaction_handle myth ; | |
3357 | - journal_join(&myth, p_s_sb, 1) ; | |
3358 | - reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ; | |
3359 | - journal_mark_dirty(&myth, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ; | |
3360 | - do_journal_end(&myth, p_s_sb,1,COMMIT_NOW) ; | |
3361 | + if (!join && (SB_JOURNAL(p_s_sb)->j_len_alloc + nblocks + 2) >= | |
3362 | + SB_JOURNAL_MAX_BATCH(p_s_sb) && | |
3363 | + ((SB_JOURNAL(p_s_sb)->j_len + nblocks + 2) * 100) < | |
3364 | + (SB_JOURNAL(p_s_sb)->j_len_alloc * 75)) | |
3365 | + { | |
3366 | + if (atomic_read(&SB_JOURNAL(p_s_sb)->j_wcount) > 10) { | |
3367 | + sched_count++; | |
3368 | + queue_log_writer(p_s_sb); | |
3369 | + goto relock; | |
3370 | + } | |
3371 | + } | |
3372 | + /* don't mess with joining the transaction if all we have to do is | |
3373 | + * wait for someone else to do a commit | |
3374 | + */ | |
3375 | + if (atomic_read(&SB_JOURNAL(p_s_sb)->j_jlock)) { | |
3376 | + while (SB_JOURNAL(p_s_sb)->j_trans_id == old_trans_id && | |
3377 | + atomic_read(&SB_JOURNAL(p_s_sb)->j_jlock)) { | |
3378 | + queue_log_writer(p_s_sb); | |
3379 | + } | |
3380 | + goto relock; | |
3381 | + } | |
3382 | + journal_join(&myth, p_s_sb, 1) ; | |
3383 | + | |
3384 | + /* someone might have ended the transaction while we joined */ | |
3385 | + if (old_trans_id != SB_JOURNAL(p_s_sb)->j_trans_id) { | |
3386 | + do_journal_end(&myth, p_s_sb, 1, 0) ; | |
3387 | } else { | |
3388 | - /* but if the writer count isn't zero, we have to wait for the current writers to finish. | |
3389 | - ** They won't batch on transaction end once we set j_jlock | |
3390 | - */ | |
3391 | - atomic_set(&(SB_JOURNAL(p_s_sb)->j_jlock), 1) ; | |
3392 | - old_trans_id = SB_JOURNAL(p_s_sb)->j_trans_id ; | |
3393 | - while(atomic_read(&(SB_JOURNAL(p_s_sb)->j_jlock)) && | |
3394 | - SB_JOURNAL(p_s_sb)->j_trans_id == old_trans_id) { | |
3395 | - sleep_on(&(SB_JOURNAL(p_s_sb)->j_join_wait)) ; | |
3396 | - } | |
3397 | + do_journal_end(&myth, p_s_sb, 1, COMMIT_NOW) ; | |
3398 | } | |
3399 | PROC_INFO_INC( p_s_sb, journal.journal_relock_wcount ); | |
3400 | goto relock ; | |
3401 | } | |
3402 | ||
3403 | if (SB_JOURNAL(p_s_sb)->j_trans_start_time == 0) { /* we are the first writer, set trans_id */ | |
3404 | - SB_JOURNAL(p_s_sb)->j_trans_start_time = now ; | |
3405 | + SB_JOURNAL(p_s_sb)->j_trans_start_time = CURRENT_TIME; | |
3406 | } | |
3407 | atomic_inc(&(SB_JOURNAL(p_s_sb)->j_wcount)) ; | |
3408 | SB_JOURNAL(p_s_sb)->j_len_alloc += nblocks ; | |
3409 | th->t_blocks_logged = 0 ; | |
3410 | th->t_blocks_allocated = nblocks ; | |
3411 | - th->t_super = p_s_sb ; | |
3412 | th->t_trans_id = SB_JOURNAL(p_s_sb)->j_trans_id ; | |
3413 | - th->t_caller = "Unknown" ; | |
3414 | + reiserfs_set_handle_active(th) ; | |
3415 | unlock_journal(p_s_sb) ; | |
3416 | - p_s_sb->s_dirt = 1; | |
3417 | return 0 ; | |
3418 | } | |
3419 | ||
3420 | +struct reiserfs_transaction_handle * | |
3421 | +reiserfs_persistent_transaction(struct super_block *s, unsigned long nblocks) { | |
3422 | + int ret ; | |
3423 | + struct reiserfs_transaction_handle *th ; | |
3424 | ||
3425 | + /* if we're nesting into an existing transaction. It will be | |
3426 | + ** persistent on its own | |
3427 | + */ | |
3428 | + if (reiserfs_transaction_running(s)) { | |
3429 | + th = current->journal_info ; | |
3430 | + th->t_refcount++ ; | |
3431 | + if (th->t_refcount < 2) { | |
3432 | + BUG() ; | |
3433 | + } | |
3434 | + return th ; | |
3435 | + } | |
3436 | + th = reiserfs_kmalloc(sizeof(struct reiserfs_transaction_handle), GFP_NOFS, s) ; | |
3437 | + if (!th) { | |
3438 | + return ERR_PTR(-ENOMEM) ; | |
3439 | + } | |
3440 | + ret = journal_begin(th, s, nblocks) ; | |
3441 | + if (ret) { | |
3442 | + reiserfs_kfree(th, sizeof(struct reiserfs_transaction_handle), s) ; | |
3443 | + return ERR_PTR(ret) ; | |
3444 | + } | |
3445 | + /* do_journal_end is now responsible for freeing the handle */ | |
3446 | + reiserfs_set_handle_persistent(th) ; | |
3447 | + return th ; | |
3448 | +} | |
3449 | static int journal_join(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, unsigned long nblocks) { | |
3450 | + struct reiserfs_transaction_handle *cur_th = current->journal_info; | |
3451 | + | |
3452 | + /* this keeps do_journal_end from NULLing out the current->journal_info | |
3453 | + ** pointer | |
3454 | + */ | |
3455 | + th->t_handle_save = cur_th ; | |
3456 | + if (cur_th && cur_th->t_refcount > 1) { | |
3457 | + BUG() ; | |
3458 | + } | |
3459 | return do_journal_begin_r(th, p_s_sb, nblocks, 1) ; | |
3460 | } | |
3461 | ||
3462 | int journal_begin(struct reiserfs_transaction_handle *th, struct super_block * p_s_sb, unsigned long nblocks) { | |
3463 | - return do_journal_begin_r(th, p_s_sb, nblocks, 0) ; | |
3464 | + struct reiserfs_transaction_handle *cur_th = current->journal_info ; | |
3465 | + int ret ; | |
3466 | + | |
3467 | + th->t_handle_save = NULL ; | |
3468 | + if (cur_th) { | |
3469 | + /* we are nesting into the current transaction */ | |
3470 | + if (cur_th->t_super == p_s_sb) { | |
3471 | + cur_th->t_refcount++ ; | |
3472 | + memcpy(th, cur_th, sizeof(*th)); | |
3473 | + th->t_flags = 0 ; | |
3474 | + reiserfs_set_handle_active(th) ; | |
3475 | + if (th->t_refcount <= 1) | |
3476 | + printk("BAD: refcount <= 1, but journal_info != 0\n"); | |
3477 | + return 0; | |
3478 | + } else { | |
3479 | + /* we've ended up with a handle from a different filesystem. | |
3480 | + ** save it and restore on journal_end. This should never | |
3481 | + ** really happen... | |
3482 | + */ | |
3483 | + reiserfs_warning(p_s_sb, "clm-2100: nesting info a different FS\n") ; | |
3484 | + th->t_handle_save = current->journal_info ; | |
3485 | + current->journal_info = th; | |
3486 | + } | |
3487 | + } else { | |
3488 | + current->journal_info = th; | |
3489 | + } | |
3490 | + ret = do_journal_begin_r(th, p_s_sb, nblocks, 0) ; | |
3491 | + if (current->journal_info != th) | |
3492 | + BUG() ; | |
3493 | + return ret ; | |
3494 | } | |
3495 | ||
3496 | /* not used at all */ | |
3497 | @@ -2389,7 +2902,7 @@ | |
3498 | reiserfs_panic(th->t_super, "journal-1577: handle trans id %ld != current trans id %ld\n", | |
3499 | th->t_trans_id, SB_JOURNAL(p_s_sb)->j_trans_id); | |
3500 | } | |
3501 | - p_s_sb->s_dirt = 1 ; | |
3502 | + p_s_sb->s_dirt = 1; | |
3503 | ||
3504 | prepared = test_and_clear_bit(BH_JPrepared, &bh->b_state) ; | |
3505 | /* already in this transaction, we are done */ | |
3506 | @@ -2413,6 +2926,7 @@ | |
3507 | ||
3508 | if (atomic_read(&(SB_JOURNAL(p_s_sb)->j_wcount)) <= 0) { | |
3509 | reiserfs_warning(p_s_sb, "journal-1409: journal_mark_dirty returning because j_wcount was %d\n", atomic_read(&(SB_JOURNAL(p_s_sb)->j_wcount))) ; | |
3510 | + BUG(); | |
3511 | return 1 ; | |
3512 | } | |
3513 | /* this error means I've screwed up, and we've overflowed the transaction. | |
3514 | @@ -2479,25 +2993,36 @@ | |
3515 | return 0 ; | |
3516 | } | |
3517 | ||
3518 | -/* | |
3519 | -** if buffer already in current transaction, do a journal_mark_dirty | |
3520 | -** otherwise, just mark it dirty and move on. Used for writes to meta blocks | |
3521 | -** that don't need journaling | |
3522 | -*/ | |
3523 | -int journal_mark_dirty_nolog(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, struct buffer_head *bh) { | |
3524 | - if (reiserfs_dont_log(th->t_super) || buffer_journaled(bh) || | |
3525 | - buffer_journal_dirty(bh)) { | |
3526 | - return journal_mark_dirty(th, p_s_sb, bh) ; | |
3527 | - } | |
3528 | - if (get_journal_hash_dev(SB_JOURNAL(p_s_sb)->j_list_hash_table, bh->b_dev,bh->b_blocknr,bh->b_size)) { | |
3529 | - return journal_mark_dirty(th, p_s_sb, bh) ; | |
3530 | - } | |
3531 | - mark_buffer_dirty(bh) ; | |
3532 | - return 0 ; | |
3533 | -} | |
3534 | - | |
3535 | int journal_end(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, unsigned long nblocks) { | |
3536 | - return do_journal_end(th, p_s_sb, nblocks, 0) ; | |
3537 | + | |
3538 | + int ret; | |
3539 | + if (!current->journal_info && th->t_refcount > 1) | |
3540 | + printk("REISER-NESTING: th NULL, refcount %d\n", th->t_refcount); | |
3541 | + if (th->t_refcount > 1) { | |
3542 | + struct reiserfs_transaction_handle *cur_th = current->journal_info ; | |
3543 | + | |
3544 | + /* we aren't allowed to close a nested transaction on a different | |
3545 | + ** filesystem from the one in the task struct | |
3546 | + */ | |
3547 | + if (cur_th->t_super != th->t_super) | |
3548 | + BUG() ; | |
3549 | + | |
3550 | + th->t_refcount--; | |
3551 | + if (th != cur_th) { | |
3552 | + int flags = cur_th->t_flags ; | |
3553 | + /* nested handles are never persistent */ | |
3554 | + if (reiserfs_persistent_handle(th)) { | |
3555 | + BUG() ; | |
3556 | + } | |
3557 | + memcpy(cur_th, th, sizeof(*th)); | |
3558 | + th->t_flags = 0 ; | |
3559 | + cur_th->t_flags = flags ; | |
3560 | + } | |
3561 | + ret = 0; | |
3562 | + } else { | |
3563 | + ret = do_journal_end(th, p_s_sb, nblocks, 0) ; | |
3564 | + } | |
3565 | + return ret; | |
3566 | } | |
3567 | ||
3568 | /* removes from the current transaction, relsing and descrementing any counters. | |
3569 | @@ -2600,6 +3125,10 @@ | |
3570 | */ | |
3571 | int journal_end_sync(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, unsigned long nblocks) { | |
3572 | ||
3573 | + /* you are not allowed to sync while nested, very, very bad */ | |
3574 | + if (th->t_refcount > 1) { | |
3575 | + BUG() ; | |
3576 | + } | |
3577 | if (SB_JOURNAL(p_s_sb)->j_len == 0) { | |
3578 | reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ; | |
3579 | journal_mark_dirty(th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ; | |
3580 | @@ -2624,12 +3153,14 @@ | |
3581 | ** | |
3582 | */ | |
3583 | void flush_async_commits(struct super_block *p_s_sb) { | |
3584 | - int i ; | |
3585 | + struct reiserfs_journal_list *jl; | |
3586 | + struct list_head *entry; | |
3587 | ||
3588 | - for (i = 0 ; i < JOURNAL_LIST_COUNT ; i++) { | |
3589 | - if (i != SB_JOURNAL_LIST_INDEX(p_s_sb)) { | |
3590 | - flush_commit_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + i, 1) ; | |
3591 | - } | |
3592 | + if (!list_empty(&SB_JOURNAL(p_s_sb)->j_journal_list)) { | |
3593 | + /* last entry is the youngest, commit it and you get everything */ | |
3594 | + entry = SB_JOURNAL(p_s_sb)->j_journal_list.prev; | |
3595 | + jl = JOURNAL_LIST_ENTRY(entry); | |
3596 | + flush_commit_list(p_s_sb, jl, 1); | |
3597 | } | |
3598 | } | |
3599 | ||
3600 | @@ -2637,58 +3168,39 @@ | |
3601 | ** flushes any old transactions to disk | |
3602 | ** ends the current transaction if it is too old | |
3603 | ** | |
3604 | -** also calls flush_journal_list with old_only == 1, which allows me to reclaim | |
3605 | -** memory and such from the journal lists whose real blocks are all on disk. | |
3606 | -** | |
3607 | -** called by sync_dev_journal from buffer.c | |
3608 | */ | |
3609 | -int flush_old_commits(struct super_block *p_s_sb, int immediate) { | |
3610 | - int i ; | |
3611 | - int count = 0; | |
3612 | - int start ; | |
3613 | - time_t now ; | |
3614 | - struct reiserfs_transaction_handle th ; | |
3615 | - | |
3616 | - start = SB_JOURNAL_LIST_INDEX(p_s_sb) ; | |
3617 | - now = CURRENT_TIME ; | |
3618 | +int reiserfs_flush_old_commits(struct super_block *p_s_sb) { | |
3619 | + time_t now ; | |
3620 | + struct reiserfs_transaction_handle th ; | |
3621 | + | |
3622 | + now = CURRENT_TIME ; | |
3623 | + /* safety check so we don't flush while we are replaying the log during | |
3624 | + * mount | |
3625 | + */ | |
3626 | + if (list_empty(&SB_JOURNAL(p_s_sb)->j_journal_list)) { | |
3627 | + return 0 ; | |
3628 | + } | |
3629 | ||
3630 | - /* safety check so we don't flush while we are replaying the log during mount */ | |
3631 | - if (SB_JOURNAL_LIST_INDEX(p_s_sb) < 0) { | |
3632 | - return 0 ; | |
3633 | - } | |
3634 | - /* starting with oldest, loop until we get to the start */ | |
3635 | - i = (SB_JOURNAL_LIST_INDEX(p_s_sb) + 1) % JOURNAL_LIST_COUNT ; | |
3636 | - while(i != start) { | |
3637 | - if (SB_JOURNAL_LIST(p_s_sb)[i].j_len > 0 && ((now - SB_JOURNAL_LIST(p_s_sb)[i].j_timestamp) > SB_JOURNAL_MAX_COMMIT_AGE(p_s_sb) || | |
3638 | - immediate)) { | |
3639 | - /* we have to check again to be sure the current transaction did not change */ | |
3640 | - if (i != SB_JOURNAL_LIST_INDEX(p_s_sb)) { | |
3641 | - flush_commit_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + i, 1) ; | |
3642 | - } | |
3643 | - } | |
3644 | - i = (i + 1) % JOURNAL_LIST_COUNT ; | |
3645 | - count++ ; | |
3646 | - } | |
3647 | - /* now, check the current transaction. If there are no writers, and it is too old, finish it, and | |
3648 | - ** force the commit blocks to disk | |
3649 | - */ | |
3650 | - if (!immediate && atomic_read(&(SB_JOURNAL(p_s_sb)->j_wcount)) <= 0 && | |
3651 | - SB_JOURNAL(p_s_sb)->j_trans_start_time > 0 && | |
3652 | - SB_JOURNAL(p_s_sb)->j_len > 0 && | |
3653 | - (now - SB_JOURNAL(p_s_sb)->j_trans_start_time) > SB_JOURNAL_MAX_TRANS_AGE(p_s_sb)) { | |
3654 | - journal_join(&th, p_s_sb, 1) ; | |
3655 | - reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ; | |
3656 | - journal_mark_dirty(&th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ; | |
3657 | - do_journal_end(&th, p_s_sb,1, COMMIT_NOW) ; | |
3658 | - } else if (immediate) { /* belongs above, but I wanted this to be very explicit as a special case. If they say to | |
3659 | - flush, we must be sure old transactions hit the disk too. */ | |
3660 | - journal_join(&th, p_s_sb, 1) ; | |
3661 | - reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ; | |
3662 | - journal_mark_dirty(&th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ; | |
3663 | - do_journal_end(&th, p_s_sb,1, COMMIT_NOW | WAIT) ; | |
3664 | - } | |
3665 | - reiserfs_journal_kupdate(p_s_sb) ; | |
3666 | - return 0 ; | |
3667 | + /* check the current transaction. If there are no writers, and it is | |
3668 | + * too old, finish it, and force the commit blocks to disk | |
3669 | + */ | |
3670 | + if (atomic_read(&(SB_JOURNAL(p_s_sb)->j_wcount)) <= 0 && | |
3671 | + SB_JOURNAL(p_s_sb)->j_trans_start_time > 0 && | |
3672 | + SB_JOURNAL(p_s_sb)->j_len > 0 && | |
3673 | + (now - SB_JOURNAL(p_s_sb)->j_trans_start_time) > | |
3674 | + SB_JOURNAL_MAX_TRANS_AGE(p_s_sb)) | |
3675 | + { | |
3676 | + journal_join(&th, p_s_sb, 1) ; | |
3677 | + reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ; | |
3678 | + journal_mark_dirty(&th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ; | |
3679 | + | |
3680 | + /* we're only being called from kreiserfsd, it makes no sense to do | |
3681 | + ** an async commit so that kreiserfsd can do it later | |
3682 | + */ | |
3683 | + do_journal_end(&th, p_s_sb,1, COMMIT_NOW | WAIT) ; | |
3684 | + } | |
3685 | + reiserfs_journal_kupdate(p_s_sb) ; | |
3686 | + return p_s_sb->s_dirt; | |
3687 | } | |
3688 | ||
3689 | /* | |
3690 | @@ -2709,6 +3221,7 @@ | |
3691 | int flush = flags & FLUSH_ALL ; | |
3692 | int commit_now = flags & COMMIT_NOW ; | |
3693 | int wait_on_commit = flags & WAIT ; | |
3694 | + struct reiserfs_journal_list *jl; | |
3695 | ||
3696 | if (th->t_trans_id != SB_JOURNAL(p_s_sb)->j_trans_id) { | |
3697 | reiserfs_panic(th->t_super, "journal-1577: handle trans id %ld != current trans id %ld\n", | |
3698 | @@ -2727,8 +3240,9 @@ | |
3699 | if (SB_JOURNAL(p_s_sb)->j_len == 0) { | |
3700 | int wcount = atomic_read(&(SB_JOURNAL(p_s_sb)->j_wcount)) ; | |
3701 | unlock_journal(p_s_sb) ; | |
3702 | - if (atomic_read(&(SB_JOURNAL(p_s_sb)->j_jlock)) > 0 && wcount <= 0) { | |
3703 | - atomic_dec(&(SB_JOURNAL(p_s_sb)->j_jlock)) ; | |
3704 | + BUG(); | |
3705 | + if (atomic_read(&(SB_JOURNAL(p_s_sb)->j_jlock)) > 0 && wcount <= 0) { | |
3706 | + atomic_set(&(SB_JOURNAL(p_s_sb)->j_jlock), 0) ; | |
3707 | wake_up(&(SB_JOURNAL(p_s_sb)->j_join_wait)) ; | |
3708 | } | |
3709 | return 0 ; | |
3710 | @@ -2741,24 +3255,37 @@ | |
3711 | */ | |
3712 | if (atomic_read(&(SB_JOURNAL(p_s_sb)->j_wcount)) > 0) { | |
3713 | if (flush || commit_now) { | |
3714 | - int orig_jindex = SB_JOURNAL_LIST_INDEX(p_s_sb) ; | |
3715 | + unsigned trans_id ; | |
3716 | + | |
3717 | + jl = SB_JOURNAL(p_s_sb)->j_current_jl; | |
3718 | + trans_id = jl->j_trans_id; | |
3719 | + | |
3720 | atomic_set(&(SB_JOURNAL(p_s_sb)->j_jlock), 1) ; | |
3721 | if (flush) { | |
3722 | SB_JOURNAL(p_s_sb)->j_next_full_flush = 1 ; | |
3723 | } | |
3724 | unlock_journal(p_s_sb) ; | |
3725 | + | |
3726 | /* sleep while the current transaction is still j_jlocked */ | |
3727 | - while(atomic_read(&(SB_JOURNAL(p_s_sb)->j_jlock)) && | |
3728 | - SB_JOURNAL(p_s_sb)->j_trans_id == th->t_trans_id) { | |
3729 | - sleep_on(&(SB_JOURNAL(p_s_sb)->j_join_wait)) ; | |
3730 | - } | |
3731 | - if (commit_now) { | |
3732 | - if (wait_on_commit) { | |
3733 | - flush_commit_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + orig_jindex, 1) ; | |
3734 | - } else { | |
3735 | - commit_flush_async(p_s_sb, orig_jindex) ; | |
3736 | + while(SB_JOURNAL(p_s_sb)->j_trans_id == trans_id) { | |
3737 | + if (atomic_read(&SB_JOURNAL(p_s_sb)->j_jlock)) { | |
3738 | + queue_log_writer(p_s_sb); | |
3739 | + } else { | |
3740 | + lock_journal(p_s_sb); | |
3741 | + if (SB_JOURNAL(p_s_sb)->j_trans_id == trans_id) { | |
3742 | + atomic_set(&(SB_JOURNAL(p_s_sb)->j_jlock), 1) ; | |
3743 | + } | |
3744 | + unlock_journal(p_s_sb); | |
3745 | } | |
3746 | } | |
3747 | + if (SB_JOURNAL(p_s_sb)->j_trans_id == trans_id) { | |
3748 | + BUG(); | |
3749 | + } | |
3750 | + if (commit_now && journal_list_still_alive(p_s_sb, trans_id) && | |
3751 | + wait_on_commit) | |
3752 | + { | |
3753 | + flush_commit_list(p_s_sb, jl, 1) ; | |
3754 | + } | |
3755 | return 0 ; | |
3756 | } | |
3757 | unlock_journal(p_s_sb) ; | |
3758 | @@ -2776,8 +3303,8 @@ | |
3759 | if (!(SB_JOURNAL(p_s_sb)->j_must_wait > 0) && !(atomic_read(&(SB_JOURNAL(p_s_sb)->j_jlock))) && !flush && !commit_now && | |
3760 | (SB_JOURNAL(p_s_sb)->j_len < SB_JOURNAL_MAX_BATCH(p_s_sb)) && | |
3761 | SB_JOURNAL(p_s_sb)->j_len_alloc < SB_JOURNAL_MAX_BATCH(p_s_sb) && SB_JOURNAL(p_s_sb)->j_cnode_free > (SB_JOURNAL_TRANS_MAX(p_s_sb) * 3)) { | |
3762 | - SB_JOURNAL(p_s_sb)->j_bcount++ ; | |
3763 | unlock_journal(p_s_sb) ; | |
3764 | + | |
3765 | return 0 ; | |
3766 | } | |
3767 | ||
3768 | @@ -2807,16 +3334,13 @@ | |
3769 | struct reiserfs_list_bitmap *jb = NULL ; | |
3770 | int cleaned = 0 ; | |
3771 | ||
3772 | - if (reiserfs_dont_log(th->t_super)) { | |
3773 | - bh = sb_get_hash_table(p_s_sb, blocknr) ; | |
3774 | - if (bh && buffer_dirty (bh)) { | |
3775 | - reiserfs_warning (p_s_sb, "journal_mark_freed(dont_log): dirty buffer on hash list: %lx %ld\n", bh->b_state, blocknr); | |
3776 | - BUG (); | |
3777 | - } | |
3778 | - brelse (bh); | |
3779 | - return 0 ; | |
3780 | + cn = get_journal_hash_dev(SB_JOURNAL(p_s_sb)->j_hash_table, p_s_sb->s_dev, | |
3781 | + blocknr, p_s_sb->s_blocksize) ; | |
3782 | + if (cn && cn->bh) { | |
3783 | + bh = cn->bh ; | |
3784 | + get_bh(bh) ; | |
3785 | } | |
3786 | - bh = sb_get_hash_table(p_s_sb, blocknr) ; | |
3787 | + | |
3788 | /* if it is journal new, we just remove it from this transaction */ | |
3789 | if (bh && buffer_journal_new(bh)) { | |
3790 | mark_buffer_notjournal_new(bh) ; | |
3791 | @@ -2824,14 +3348,22 @@ | |
3792 | cleaned = remove_from_transaction(p_s_sb, blocknr, cleaned) ; | |
3793 | } else { | |
3794 | /* set the bit for this block in the journal bitmap for this transaction */ | |
3795 | - jb = SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_list_bitmap ; | |
3796 | + jb = SB_JOURNAL(p_s_sb)->j_current_jl->j_list_bitmap; | |
3797 | if (!jb) { | |
3798 | reiserfs_panic(p_s_sb, "journal-1702: journal_mark_freed, journal_list_bitmap is NULL\n") ; | |
3799 | } | |
3800 | - set_bit_in_list_bitmap(p_s_sb, blocknr, jb) ; | |
3801 | ||
3802 | - /* Note, the entire while loop is not allowed to schedule. */ | |
3803 | + /* we set bits in the list bitmap so the block won't be reallocated | |
3804 | + * as a data block which might get flushed before this transaction | |
3805 | + * commits. When data logging is on, the block might get reallocated | |
3806 | + * as a data block, but we know the data block won't get flushed before | |
3807 | + * we commit | |
3808 | + */ | |
3809 | + if (!reiserfs_data_log(p_s_sb)) { | |
3810 | + set_bit_in_list_bitmap(p_s_sb, blocknr, jb) ; | |
3811 | + } | |
3812 | ||
3813 | + /* Note, the entire while loop is not allowed to schedule. */ | |
3814 | if (bh) { | |
3815 | clear_prepared_bits(bh) ; | |
3816 | } | |
3817 | @@ -2876,57 +3408,77 @@ | |
3818 | ||
3819 | void reiserfs_update_inode_transaction(struct inode *inode) { | |
3820 | ||
3821 | - inode->u.reiserfs_i.i_trans_index = SB_JOURNAL_LIST_INDEX(inode->i_sb); | |
3822 | - | |
3823 | + inode->u.reiserfs_i.i_jl = SB_JOURNAL(inode->i_sb)->j_current_jl; | |
3824 | inode->u.reiserfs_i.i_trans_id = SB_JOURNAL(inode->i_sb)->j_trans_id ; | |
3825 | } | |
3826 | ||
3827 | void reiserfs_update_tail_transaction(struct inode *inode) { | |
3828 | ||
3829 | - inode->u.reiserfs_i.i_tail_trans_index = SB_JOURNAL_LIST_INDEX(inode->i_sb); | |
3830 | - | |
3831 | + inode->u.reiserfs_i.i_tail_jl = SB_JOURNAL(inode->i_sb)->j_current_jl; | |
3832 | inode->u.reiserfs_i.i_tail_trans_id = SB_JOURNAL(inode->i_sb)->j_trans_id ; | |
3833 | } | |
3834 | ||
3835 | -static void __commit_trans_index(struct inode *inode, unsigned long id, | |
3836 | - unsigned long index) | |
3837 | +static void __commit_trans_jl(struct inode *inode, unsigned long id, | |
3838 | + struct reiserfs_journal_list *jl) | |
3839 | { | |
3840 | - struct reiserfs_journal_list *jl ; | |
3841 | struct reiserfs_transaction_handle th ; | |
3842 | struct super_block *sb = inode->i_sb ; | |
3843 | ||
3844 | - jl = SB_JOURNAL_LIST(sb) + index; | |
3845 | - | |
3846 | /* is it from the current transaction, or from an unknown transaction? */ | |
3847 | if (id == SB_JOURNAL(sb)->j_trans_id) { | |
3848 | - journal_join(&th, sb, 1) ; | |
3849 | + jl = SB_JOURNAL(sb)->j_current_jl; | |
3850 | + /* try to let other writers come in and grow this transaction */ | |
3851 | + let_transaction_grow(sb, id); | |
3852 | + if (SB_JOURNAL(sb)->j_trans_id != id) { | |
3853 | + goto flush_commit_only; | |
3854 | + } | |
3855 | + | |
3856 | + journal_begin(&th, sb, 1) ; | |
3857 | + | |
3858 | + /* someone might have ended this transaction while we joined */ | |
3859 | + if (SB_JOURNAL(sb)->j_trans_id != id) { | |
3860 | + reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb), 1) ; | |
3861 | + journal_mark_dirty(&th, sb, SB_BUFFER_WITH_SB(sb)) ; | |
3862 | + journal_end(&th, sb, 1) ; | |
3863 | + goto flush_commit_only; | |
3864 | + } | |
3865 | + | |
3866 | journal_end_sync(&th, sb, 1) ; | |
3867 | - } else if (jl->j_trans_id == id) { | |
3868 | - flush_commit_list(sb, jl, 1) ; | |
3869 | + | |
3870 | + } else { | |
3871 | + /* this gets tricky, we have to make sure the journal list in | |
3872 | + * the inode still exists. We know the list is still around | |
3873 | + * if we've got a larger transaction id than the oldest list | |
3874 | + */ | |
3875 | +flush_commit_only: | |
3876 | + if (journal_list_still_alive(inode->i_sb, id)) { | |
3877 | + flush_commit_list(sb, jl, 1) ; | |
3878 | + } | |
3879 | } | |
3880 | - /* if the transaction id does not match, this list is long since flushed | |
3881 | - ** and we don't have to do anything here | |
3882 | - */ | |
3883 | + /* otherwise the list is gone, and long since committed */ | |
3884 | } | |
3885 | void reiserfs_commit_for_tail(struct inode *inode) { | |
3886 | unsigned long id = inode->u.reiserfs_i.i_tail_trans_id; | |
3887 | - unsigned long index = inode->u.reiserfs_i.i_tail_trans_index; | |
3888 | + struct reiserfs_journal_list *jl = inode->u.reiserfs_i.i_tail_jl; | |
3889 | ||
3890 | /* for tails, if this info is unset there's nothing to commit */ | |
3891 | - if (id && index) | |
3892 | - __commit_trans_index(inode, id, index); | |
3893 | + if (id && jl) | |
3894 | + __commit_trans_jl(inode, id, jl); | |
3895 | } | |
3896 | void reiserfs_commit_for_inode(struct inode *inode) { | |
3897 | unsigned long id = inode->u.reiserfs_i.i_trans_id; | |
3898 | - unsigned long index = inode->u.reiserfs_i.i_trans_index; | |
3899 | + struct reiserfs_journal_list *jl = inode->u.reiserfs_i.i_jl; | |
3900 | ||
3901 | - /* for the whole inode, assume unset id or index means it was | |
3902 | + /* for the whole inode, assume unset id means it was | |
3903 | * changed in the current transaction. More conservative | |
3904 | */ | |
3905 | - if (!id || !index) | |
3906 | + if (!id || !jl) { | |
3907 | reiserfs_update_inode_transaction(inode) ; | |
3908 | + id = inode->u.reiserfs_i.i_trans_id; | |
3909 | + /* jl will be updated in __commit_trans_jl */ | |
3910 | + } | |
3911 | ||
3912 | - __commit_trans_index(inode, id, index); | |
3913 | + __commit_trans_jl(inode, id, jl); | |
3914 | } | |
3915 | ||
3916 | void reiserfs_restore_prepared_buffer(struct super_block *p_s_sb, | |
3917 | @@ -2954,8 +3506,6 @@ | |
3918 | int retry_count = 0 ; | |
3919 | ||
3920 | PROC_INFO_INC( p_s_sb, journal.prepare ); | |
3921 | - if (reiserfs_dont_log (p_s_sb)) | |
3922 | - return; | |
3923 | ||
3924 | while(!test_bit(BH_JPrepared, &bh->b_state) || | |
3925 | (wait && buffer_locked(bh))) { | |
3926 | @@ -2964,16 +3514,37 @@ | |
3927 | return ; | |
3928 | } | |
3929 | set_bit(BH_JPrepared, &bh->b_state) ; | |
3930 | + | |
3931 | if (wait) { | |
3932 | RFALSE( buffer_locked(bh) && cur_tb != NULL, | |
3933 | "waiting while do_balance was running\n") ; | |
3934 | + /* only data buffers are allowed to come in dirty, and they | |
3935 | + * never get run through restore_prepared_buffer. So we can | |
3936 | + * just mark them clean here and know it is safe | |
3937 | + */ | |
3938 | + mark_buffer_clean(bh); | |
3939 | wait_on_buffer(bh) ; | |
3940 | - } | |
3941 | + } | |
3942 | PROC_INFO_INC( p_s_sb, journal.prepare_retry ); | |
3943 | retry_count++ ; | |
3944 | } | |
3945 | } | |
3946 | - | |
3947 | +static void flush_old_journal_lists(struct super_block *s) { | |
3948 | + struct reiserfs_journal_list *jl; | |
3949 | + struct list_head *entry; | |
3950 | + time_t now = CURRENT_TIME; | |
3951 | + | |
3952 | + while(!list_empty(&SB_JOURNAL(s)->j_journal_list)) { | |
3953 | + entry = SB_JOURNAL(s)->j_journal_list.next; | |
3954 | + jl = JOURNAL_LIST_ENTRY(entry); | |
3955 | + /* this check should always be run, to send old lists to disk */ | |
3956 | + if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4))) { | |
3957 | + flush_used_journal_lists(s, jl); | |
3958 | + } else { | |
3959 | + break; | |
3960 | + } | |
3961 | + } | |
3962 | +} | |
3963 | /* | |
3964 | ** long and ugly. If flush, will not return until all commit | |
3965 | ** blocks and all real buffers in the trans are on disk. | |
3966 | @@ -2990,18 +3561,30 @@ | |
3967 | struct buffer_head *c_bh ; /* commit bh */ | |
3968 | struct buffer_head *d_bh ; /* desc bh */ | |
3969 | int cur_write_start = 0 ; /* start index of current log write */ | |
3970 | - int cur_blocks_left = 0 ; /* number of journal blocks left to write */ | |
3971 | int old_start ; | |
3972 | int i ; | |
3973 | - int jindex ; | |
3974 | - int orig_jindex ; | |
3975 | int flush = flags & FLUSH_ALL ; | |
3976 | int commit_now = flags & COMMIT_NOW ; | |
3977 | int wait_on_commit = flags & WAIT ; | |
3978 | struct reiserfs_super_block *rs ; | |
3979 | + struct reiserfs_journal_list *jl, *temp_jl; | |
3980 | + struct list_head *entry, *safe; | |
3981 | + int wakeup_kreiserfsd = 0; | |
3982 | + unsigned long jindex; | |
3983 | + unsigned long commit_trans_id; | |
3984 | + | |
3985 | + if (th->t_refcount > 1) | |
3986 | + BUG() ; | |
3987 | ||
3988 | + reiserfs_check_lock_depth("journal end"); | |
3989 | + current->journal_info = th->t_handle_save; | |
3990 | if (reiserfs_dont_log(th->t_super)) { | |
3991 | - return 0 ; | |
3992 | + goto out ; | |
3993 | + } | |
3994 | + | |
3995 | + if (SB_JOURNAL(p_s_sb)->j_len == 0) { | |
3996 | + reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ; | |
3997 | + journal_mark_dirty(th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ; | |
3998 | } | |
3999 | ||
4000 | lock_journal(p_s_sb) ; | |
4001 | @@ -3018,7 +3601,9 @@ | |
4002 | ** it tells us if we should continue with the journal_end, or just return | |
4003 | */ | |
4004 | if (!check_journal_end(th, p_s_sb, nblocks, flags)) { | |
4005 | - return 0 ; | |
4006 | + p_s_sb->s_dirt = 1; | |
4007 | + wake_queued_writers(p_s_sb); | |
4008 | + goto out ; | |
4009 | } | |
4010 | ||
4011 | /* check_journal_end might set these, check again */ | |
4012 | @@ -3037,8 +3622,11 @@ | |
4013 | } | |
4014 | ||
4015 | #ifdef REISERFS_PREALLOCATE | |
4016 | + /* quota ops might need to nest, setup the journal_info pointer for them */ | |
4017 | + current->journal_info = th ; | |
4018 | reiserfs_discard_all_prealloc(th); /* it should not involve new blocks into | |
4019 | * the transaction */ | |
4020 | + current->journal_info = th->t_handle_save ; | |
4021 | #endif | |
4022 | ||
4023 | rs = SB_DISK_SUPER_BLOCK(p_s_sb) ; | |
4024 | @@ -3059,25 +3647,23 @@ | |
4025 | mark_buffer_uptodate(c_bh, 1) ; | |
4026 | ||
4027 | /* init this journal list */ | |
4028 | - atomic_set(&(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_older_commits_done), 0) ; | |
4029 | - SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_trans_id = SB_JOURNAL(p_s_sb)->j_trans_id ; | |
4030 | - SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_timestamp = SB_JOURNAL(p_s_sb)->j_trans_start_time ; | |
4031 | - SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_commit_bh = c_bh ; | |
4032 | - SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_start = SB_JOURNAL(p_s_sb)->j_start ; | |
4033 | - SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_len = SB_JOURNAL(p_s_sb)->j_len ; | |
4034 | - atomic_set(&(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_nonzerolen), SB_JOURNAL(p_s_sb)->j_len) ; | |
4035 | - atomic_set(&(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_commit_left), SB_JOURNAL(p_s_sb)->j_len + 2); | |
4036 | - SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_realblock = NULL ; | |
4037 | - atomic_set(&(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_commit_flushing), 1) ; | |
4038 | - atomic_set(&(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_flushing), 1) ; | |
4039 | - | |
4040 | - /* which is faster, locking/unlocking at the start and end of the for | |
4041 | - ** or locking once per iteration around the insert_journal_hash? | |
4042 | - ** eitherway, we are write locking insert_journal_hash. The ENTIRE FOR | |
4043 | - ** LOOP MUST not cause schedule to occur. | |
4044 | - */ | |
4045 | + jl = SB_JOURNAL(p_s_sb)->j_current_jl; | |
4046 | + | |
4047 | + /* save the transaction id in case we need to commit it later */ | |
4048 | + commit_trans_id = jl->j_trans_id; | |
4049 | ||
4050 | - /* for each real block, add it to the journal list hash, | |
4051 | + atomic_set(&jl->j_older_commits_done, 0) ; | |
4052 | + jl->j_trans_id = SB_JOURNAL(p_s_sb)->j_trans_id ; | |
4053 | + jl->j_timestamp = SB_JOURNAL(p_s_sb)->j_trans_start_time ; | |
4054 | + jl->j_commit_bh = c_bh ; | |
4055 | + jl->j_start = SB_JOURNAL(p_s_sb)->j_start ; | |
4056 | + jl->j_len = SB_JOURNAL(p_s_sb)->j_len ; | |
4057 | + atomic_set(&jl->j_nonzerolen, SB_JOURNAL(p_s_sb)->j_len) ; | |
4058 | + atomic_set(&jl->j_commit_left, SB_JOURNAL(p_s_sb)->j_len + 2); | |
4059 | + jl->j_realblock = NULL ; | |
4060 | + | |
4061 | + /* The ENTIRE FOR LOOP MUST not cause schedule to occur. | |
4062 | + ** for each real block, add it to the journal list hash, | |
4063 | ** copy into real block index array in the commit or desc block | |
4064 | */ | |
4065 | for (i = 0, cn = SB_JOURNAL(p_s_sb)->j_first ; cn ; cn = cn->next, i++) { | |
4066 | @@ -3087,7 +3673,7 @@ | |
4067 | reiserfs_panic(p_s_sb, "journal-1676, get_cnode returned NULL\n") ; | |
4068 | } | |
4069 | if (i == 0) { | |
4070 | - SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_realblock = jl_cn ; | |
4071 | + jl->j_realblock = jl_cn ; | |
4072 | } | |
4073 | jl_cn->prev = last_cn ; | |
4074 | jl_cn->next = NULL ; | |
4075 | @@ -3105,7 +3691,7 @@ | |
4076 | jl_cn->state = 0 ; | |
4077 | jl_cn->dev = cn->bh->b_dev ; | |
4078 | jl_cn->bh = cn->bh ; | |
4079 | - jl_cn->jlist = SB_JOURNAL_LIST(p_s_sb) + SB_JOURNAL_LIST_INDEX(p_s_sb) ; | |
4080 | + jl_cn->jlist = jl; | |
4081 | insert_journal_hash(SB_JOURNAL(p_s_sb)->j_list_hash_table, jl_cn) ; | |
4082 | if (i < JOURNAL_TRANS_HALF) { | |
4083 | desc->j_realblock[i] = cpu_to_le32(cn->bh->b_blocknr) ; | |
4084 | @@ -3130,29 +3716,34 @@ | |
4085 | reiserfs_warning(p_s_sb, "journal-2020: do_journal_end: BAD desc->j_len is ZERO\n") ; | |
4086 | atomic_set(&(SB_JOURNAL(p_s_sb)->j_jlock), 0) ; | |
4087 | wake_up(&(SB_JOURNAL(p_s_sb)->j_join_wait)) ; | |
4088 | - return 0 ; | |
4089 | + goto out ; | |
4090 | } | |
4091 | ||
4092 | /* first data block is j_start + 1, so add one to cur_write_start wherever you use it */ | |
4093 | cur_write_start = SB_JOURNAL(p_s_sb)->j_start ; | |
4094 | - cur_blocks_left = SB_JOURNAL(p_s_sb)->j_len ; | |
4095 | cn = SB_JOURNAL(p_s_sb)->j_first ; | |
4096 | jindex = 1 ; /* start at one so we don't get the desc again */ | |
4097 | - while(cur_blocks_left > 0) { | |
4098 | + while(cn) { | |
4099 | + clear_bit(BH_JNew, &(cn->bh->b_state)) ; | |
4100 | /* copy all the real blocks into log area. dirty log blocks */ | |
4101 | if (test_bit(BH_JDirty, &cn->bh->b_state)) { | |
4102 | struct buffer_head *tmp_bh ; | |
4103 | tmp_bh = journal_getblk(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + | |
4104 | ((cur_write_start + jindex) % SB_ONDISK_JOURNAL_SIZE(p_s_sb))) ; | |
4105 | mark_buffer_uptodate(tmp_bh, 1) ; | |
4106 | - memcpy(tmp_bh->b_data, cn->bh->b_data, cn->bh->b_size) ; | |
4107 | + memcpy(tmp_bh->b_data, bh_kmap(cn->bh), cn->bh->b_size) ; | |
4108 | + bh_kunmap(cn->bh); | |
4109 | jindex++ ; | |
4110 | + set_bit(BH_JDirty_wait, &(cn->bh->b_state)) ; | |
4111 | + clear_bit(BH_JDirty, &(cn->bh->b_state)) ; | |
4112 | } else { | |
4113 | /* JDirty cleared sometime during transaction. don't log this one */ | |
4114 | reiserfs_warning(p_s_sb, "journal-2048: do_journal_end: BAD, buffer in journal hash, but not JDirty!\n") ; | |
4115 | + brelse(cn->bh) ; | |
4116 | } | |
4117 | - cn = cn->next ; | |
4118 | - cur_blocks_left-- ; | |
4119 | + next = cn->next ; | |
4120 | + free_cnode(p_s_sb, cn) ; | |
4121 | + cn = next ; | |
4122 | } | |
4123 | ||
4124 | /* we are done with both the c_bh and d_bh, but | |
4125 | @@ -3160,47 +3751,19 @@ | |
4126 | ** so we dirty/relse c_bh in flush_commit_list, with commit_left <= 1. | |
4127 | */ | |
4128 | ||
4129 | - /* now loop through and mark all buffers from this transaction as JDirty_wait | |
4130 | - ** clear the JDirty bit, clear BH_JNew too. | |
4131 | - ** if they weren't JDirty, they weren't logged, just relse them and move on | |
4132 | - */ | |
4133 | - cn = SB_JOURNAL(p_s_sb)->j_first ; | |
4134 | - while(cn) { | |
4135 | - clear_bit(BH_JNew, &(cn->bh->b_state)) ; | |
4136 | - if (test_bit(BH_JDirty, &(cn->bh->b_state))) { | |
4137 | - set_bit(BH_JDirty_wait, &(cn->bh->b_state)) ; | |
4138 | - clear_bit(BH_JDirty, &(cn->bh->b_state)) ; | |
4139 | - } else { | |
4140 | - brelse(cn->bh) ; | |
4141 | - } | |
4142 | - next = cn->next ; | |
4143 | - free_cnode(p_s_sb, cn) ; | |
4144 | - cn = next ; | |
4145 | - } | |
4146 | - | |
4147 | - /* unlock the journal list for committing and flushing */ | |
4148 | - atomic_set(&(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_commit_flushing), 0) ; | |
4149 | - atomic_set(&(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_flushing), 0) ; | |
4150 | - | |
4151 | - orig_jindex = SB_JOURNAL_LIST_INDEX(p_s_sb) ; | |
4152 | - jindex = (SB_JOURNAL_LIST_INDEX(p_s_sb) + 1) % JOURNAL_LIST_COUNT ; | |
4153 | - SB_JOURNAL_LIST_INDEX(p_s_sb) = jindex ; | |
4154 | + SB_JOURNAL(p_s_sb)->j_current_jl = alloc_journal_list(p_s_sb); | |
4155 | ||
4156 | - /* write any buffers that must hit disk before this commit is done */ | |
4157 | - fsync_buffers_list(&(SB_JOURNAL(p_s_sb)->j_dirty_buffers)) ; | |
4158 | + /* we lock the commit before putting it onto the main list because | |
4159 | + * we want to make sure nobody tries to run flush_commit_list until | |
4160 | + * the new transaction is fully setup, and we've already flushed the | |
4161 | + * ordered bh list | |
4162 | + */ | |
4163 | + down(&jl->j_commit_lock); | |
4164 | ||
4165 | - /* honor the flush and async wishes from the caller */ | |
4166 | - if (flush) { | |
4167 | - | |
4168 | - flush_commit_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + orig_jindex, 1) ; | |
4169 | - flush_journal_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + orig_jindex , 1) ; | |
4170 | - } else if (commit_now) { | |
4171 | - if (wait_on_commit) { | |
4172 | - flush_commit_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + orig_jindex, 1) ; | |
4173 | - } else { | |
4174 | - commit_flush_async(p_s_sb, orig_jindex) ; | |
4175 | - } | |
4176 | - } | |
4177 | + /* now it is safe to insert this transaction on the main list */ | |
4178 | + list_add_tail(&jl->j_list, &SB_JOURNAL(p_s_sb)->j_journal_list); | |
4179 | + list_add_tail(&jl->j_working_list, &SB_JOURNAL(p_s_sb)->j_working_list); | |
4180 | + SB_JOURNAL(p_s_sb)->j_num_work_lists++; | |
4181 | ||
4182 | /* reset journal values for the next transaction */ | |
4183 | old_start = SB_JOURNAL(p_s_sb)->j_start ; | |
4184 | @@ -3212,57 +3775,119 @@ | |
4185 | SB_JOURNAL(p_s_sb)->j_len = 0 ; | |
4186 | SB_JOURNAL(p_s_sb)->j_trans_start_time = 0 ; | |
4187 | SB_JOURNAL(p_s_sb)->j_trans_id++ ; | |
4188 | + SB_JOURNAL(p_s_sb)->j_current_jl->j_trans_id = SB_JOURNAL(p_s_sb)->j_trans_id; | |
4189 | SB_JOURNAL(p_s_sb)->j_must_wait = 0 ; | |
4190 | SB_JOURNAL(p_s_sb)->j_len_alloc = 0 ; | |
4191 | SB_JOURNAL(p_s_sb)->j_next_full_flush = 0 ; | |
4192 | SB_JOURNAL(p_s_sb)->j_next_async_flush = 0 ; | |
4193 | init_journal_hash(p_s_sb) ; | |
4194 | ||
4195 | + /* tail conversion targets have to hit the disk before we end the | |
4196 | + * transaction. Otherwise a later transaction might repack the tail | |
4197 | + * before this transaction commits, leaving the data block unflushed and | |
4198 | + * clean, if we crash before the later transaction commits, the data block | |
4199 | + * is lost. | |
4200 | + */ | |
4201 | + while(!list_empty(&jl->j_tail_bh_list)) { | |
4202 | + unlock_kernel(); | |
4203 | + fsync_buffers_list(&jl->j_tail_bh_list); | |
4204 | + lock_kernel(); | |
4205 | + } | |
4206 | + up(&jl->j_commit_lock); | |
4207 | + | |
4208 | + /* honor the flush wishes from the caller, simple commits can | |
4209 | + ** be done outside the journal lock, they are done below | |
4210 | + */ | |
4211 | + if (flush) { | |
4212 | + flush_commit_list(p_s_sb, jl, 1) ; | |
4213 | + flush_journal_list(p_s_sb, jl, 1) ; | |
4214 | + } | |
4215 | + | |
4216 | + | |
4217 | /* if the next transaction has any chance of wrapping, flush | |
4218 | ** transactions that might get overwritten. If any journal lists are very | |
4219 | ** old flush them as well. | |
4220 | */ | |
4221 | - for (i = 0 ; i < JOURNAL_LIST_COUNT ; i++) { | |
4222 | - jindex = i ; | |
4223 | - if (SB_JOURNAL_LIST(p_s_sb)[jindex].j_len > 0 && SB_JOURNAL(p_s_sb)->j_start <= SB_JOURNAL_LIST(p_s_sb)[jindex].j_start) { | |
4224 | - if ((SB_JOURNAL(p_s_sb)->j_start + SB_JOURNAL_TRANS_MAX(p_s_sb) + 1) >= SB_JOURNAL_LIST(p_s_sb)[jindex].j_start) { | |
4225 | - flush_journal_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + jindex, 1) ; | |
4226 | - } | |
4227 | - } else if (SB_JOURNAL_LIST(p_s_sb)[jindex].j_len > 0 && | |
4228 | - (SB_JOURNAL(p_s_sb)->j_start + SB_JOURNAL_TRANS_MAX(p_s_sb) + 1) > SB_ONDISK_JOURNAL_SIZE(p_s_sb)) { | |
4229 | - if (((SB_JOURNAL(p_s_sb)->j_start + SB_JOURNAL_TRANS_MAX(p_s_sb) + 1) % SB_ONDISK_JOURNAL_SIZE(p_s_sb)) >= | |
4230 | - SB_JOURNAL_LIST(p_s_sb)[jindex].j_start) { | |
4231 | - flush_journal_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + jindex, 1 ) ; | |
4232 | +first_jl: | |
4233 | + list_for_each_safe(entry, safe, &SB_JOURNAL(p_s_sb)->j_journal_list) { | |
4234 | + temp_jl = JOURNAL_LIST_ENTRY(entry); | |
4235 | + if (SB_JOURNAL(p_s_sb)->j_start <= temp_jl->j_start) { | |
4236 | + if ((SB_JOURNAL(p_s_sb)->j_start + SB_JOURNAL_TRANS_MAX(p_s_sb) + 1) >= | |
4237 | + temp_jl->j_start) | |
4238 | + { | |
4239 | + flush_used_journal_lists(p_s_sb, temp_jl); | |
4240 | + wakeup_kreiserfsd = 1; | |
4241 | + goto first_jl; | |
4242 | + } else if ((SB_JOURNAL(p_s_sb)->j_start + | |
4243 | + SB_JOURNAL_TRANS_MAX(p_s_sb) + 1) < | |
4244 | + SB_ONDISK_JOURNAL_SIZE(p_s_sb)) | |
4245 | + { | |
4246 | + /* if we don't cross into the next transaction and we don't | |
4247 | + * wrap, there is no way we can overlap any later transactions | |
4248 | + * break now | |
4249 | + */ | |
4250 | + break; | |
4251 | + } | |
4252 | + } else if ((SB_JOURNAL(p_s_sb)->j_start + | |
4253 | + SB_JOURNAL_TRANS_MAX(p_s_sb) + 1) > | |
4254 | + SB_ONDISK_JOURNAL_SIZE(p_s_sb)) | |
4255 | + { | |
4256 | + if (((SB_JOURNAL(p_s_sb)->j_start + SB_JOURNAL_TRANS_MAX(p_s_sb) + 1) % | |
4257 | + SB_ONDISK_JOURNAL_SIZE(p_s_sb)) >= temp_jl->j_start) | |
4258 | + { | |
4259 | + flush_used_journal_lists(p_s_sb, temp_jl); | |
4260 | + wakeup_kreiserfsd = 1; | |
4261 | + goto first_jl; | |
4262 | + } else { | |
4263 | + /* we don't overlap anything from out start to the end of the | |
4264 | + * log, and our wrapped portion doesn't overlap anything at | |
4265 | + * the start of the log. We can break | |
4266 | + */ | |
4267 | + break; | |
4268 | } | |
4269 | - } | |
4270 | - /* this check should always be run, to send old lists to disk */ | |
4271 | - if (SB_JOURNAL_LIST(p_s_sb)[jindex].j_len > 0 && | |
4272 | - SB_JOURNAL_LIST(p_s_sb)[jindex].j_timestamp < | |
4273 | - (CURRENT_TIME - (SB_JOURNAL_MAX_TRANS_AGE(p_s_sb) * 4))) { | |
4274 | - flush_journal_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + jindex, 1 ) ; | |
4275 | } | |
4276 | } | |
4277 | + flush_old_journal_lists(p_s_sb); | |
4278 | ||
4279 | - /* if the next journal_list is still in use, flush it */ | |
4280 | - if (SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_len != 0) { | |
4281 | - flush_journal_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + SB_JOURNAL_LIST_INDEX(p_s_sb), 1) ; | |
4282 | - } | |
4283 | + /* soft limit */ | |
4284 | + if (SB_JOURNAL(p_s_sb)->j_num_work_lists > 128 || wakeup_kreiserfsd) { | |
4285 | + wake_up(&reiserfs_commit_thread_wait) ; | |
4286 | + } | |
4287 | ||
4288 | - /* we don't want anyone flushing the new transaction's list */ | |
4289 | - atomic_set(&(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_commit_flushing), 1) ; | |
4290 | - atomic_set(&(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_flushing), 1) ; | |
4291 | - SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_list_bitmap = get_list_bitmap(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + | |
4292 | - SB_JOURNAL_LIST_INDEX(p_s_sb)) ; | |
4293 | + SB_JOURNAL(p_s_sb)->j_current_jl->j_list_bitmap = get_list_bitmap(p_s_sb, SB_JOURNAL(p_s_sb)->j_current_jl) ; | |
4294 | ||
4295 | - if (!(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_list_bitmap)) { | |
4296 | + if (!(SB_JOURNAL(p_s_sb)->j_current_jl->j_list_bitmap)) { | |
4297 | reiserfs_panic(p_s_sb, "journal-1996: do_journal_end, could not get a list bitmap\n") ; | |
4298 | } | |
4299 | - unlock_journal(p_s_sb) ; | |
4300 | + | |
4301 | atomic_set(&(SB_JOURNAL(p_s_sb)->j_jlock), 0) ; | |
4302 | + unlock_journal(p_s_sb) ; | |
4303 | /* wake up any body waiting to join. */ | |
4304 | + clear_bit(WRITERS_QUEUED, &SB_JOURNAL(p_s_sb)->j_state); | |
4305 | wake_up(&(SB_JOURNAL(p_s_sb)->j_join_wait)) ; | |
4306 | + | |
4307 | + if (!flush && commit_now && wait_on_commit) { | |
4308 | + if (current->need_resched) { | |
4309 | + schedule() ; | |
4310 | + } | |
4311 | + if (journal_list_still_alive(p_s_sb, commit_trans_id)) | |
4312 | + flush_commit_list(p_s_sb, jl, 1) ; | |
4313 | + } | |
4314 | + /* if we did an async commit, get kreiserfsd going on it */ | |
4315 | + if (!commit_now && !wait_on_commit) { | |
4316 | + wake_up(&reiserfs_commit_thread_wait) ; | |
4317 | + schedule(); | |
4318 | + } | |
4319 | +out: | |
4320 | + reiserfs_check_lock_depth("journal end2"); | |
4321 | + if (reiserfs_persistent_handle(th)) { | |
4322 | + memset(th, 0, sizeof(*th)); | |
4323 | + reiserfs_kfree(th, sizeof(struct reiserfs_transaction_handle), p_s_sb) ; | |
4324 | + } else | |
4325 | + th->t_flags = 0 ; | |
4326 | return 0 ; | |
4327 | } | |
4328 | ||
4329 | - | |
4330 | - | |
4331 | +int __init reiserfs_journal_cache_init(void) { | |
4332 | + return 0; | |
4333 | +} | |
052932c9 AM |
4334 | diff -urN linux-2.4.22.org/fs/reiserfs/Makefile linux-2.4.22/fs/reiserfs/Makefile |
4335 | --- linux-2.4.22.org/fs/reiserfs/Makefile 2003-11-21 15:08:29.000000000 +0100 | |
4336 | +++ linux-2.4.22/fs/reiserfs/Makefile 2003-11-21 15:14:23.000000000 +0100 | |
e57e653a JR |
4337 | @@ -7,6 +7,7 @@ |
4338 | # | |
4339 | # Note 2! The CFLAGS definitions are now in the main makefile... | |
4340 | ||
4341 | +export-objs := super.o | |
4342 | O_TARGET := reiserfs.o | |
4343 | obj-y := bitmap.o do_balan.o namei.o inode.o file.o dir.o fix_node.o super.o prints.o objectid.o \ | |
4344 | lbalance.o ibalance.o stree.o hashes.o buffer2.o tail_conversion.o journal.o resize.o item_ops.o ioctl.o procfs.o | |
052932c9 AM |
4345 | diff -urN linux-2.4.22.org/fs/reiserfs/namei.c linux-2.4.22/fs/reiserfs/namei.c |
4346 | --- linux-2.4.22.org/fs/reiserfs/namei.c 2003-11-21 15:08:29.000000000 +0100 | |
4347 | +++ linux-2.4.22/fs/reiserfs/namei.c 2003-11-21 15:14:23.000000000 +0100 | |
e57e653a JR |
4348 | @@ -7,6 +7,7 @@ |
4349 | #include <linux/bitops.h> | |
4350 | #include <linux/reiserfs_fs.h> | |
4351 | #include <linux/smp_lock.h> | |
4352 | +#include <linux/quotaops.h> | |
4353 | ||
4354 | #define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { i->i_nlink++; if (i->i_nlink >= REISERFS_LINK_MAX) i->i_nlink=1; } | |
4355 | #define DEC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) i->i_nlink--; | |
4356 | @@ -469,7 +470,7 @@ | |
4357 | } | |
4358 | ||
4359 | /* perform the insertion of the entry that we have prepared */ | |
4360 | - retval = reiserfs_paste_into_item (th, &path, &entry_key, buffer, paste_size); | |
4361 | + retval = reiserfs_paste_into_item (th, &path, &entry_key, dir, buffer, paste_size); | |
4362 | if (buffer != small_buf) | |
4363 | reiserfs_kfree (buffer, buflen, dir->i_sb); | |
4364 | if (retval) { | |
4365 | @@ -478,7 +479,6 @@ | |
4366 | } | |
4367 | ||
4368 | dir->i_size += paste_size; | |
4369 | - dir->i_blocks = ((dir->i_size + 511) >> 9); | |
4370 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; | |
4371 | if (!S_ISDIR (inode->i_mode) && visible) | |
4372 | // reiserfs_mkdir or reiserfs_rename will do that by itself | |
4373 | @@ -494,7 +494,9 @@ | |
4374 | ** inserted into the tree yet. | |
4375 | */ | |
4376 | static int drop_new_inode(struct inode *inode) { | |
4377 | + DQUOT_DROP(inode); | |
4378 | make_bad_inode(inode) ; | |
4379 | + inode->i_flags |= S_NOQUOTA; | |
4380 | iput(inode) ; | |
4381 | return 0 ; | |
4382 | } | |
4383 | @@ -518,6 +520,11 @@ | |
4384 | } else | |
4385 | inode->i_gid = current->fsgid; | |
4386 | ||
4387 | + DQUOT_INIT(inode); | |
4388 | + if (DQUOT_ALLOC_INODE(inode)) { | |
4389 | + drop_new_inode(inode); | |
4390 | + return -EDQUOT; | |
4391 | + } | |
4392 | return 0 ; | |
4393 | } | |
4394 | ||
4395 | @@ -536,7 +543,6 @@ | |
4396 | return retval ; | |
4397 | ||
4398 | journal_begin(&th, dir->i_sb, jbegin_count) ; | |
4399 | - th.t_caller = "create" ; | |
4400 | retval = reiserfs_new_inode (&th, dir, mode, 0, 0/*i_size*/, dentry, inode); | |
4401 | if (retval) { | |
4402 | goto out_failed ; | |
4403 | @@ -750,7 +756,6 @@ | |
4404 | ||
4405 | DEC_DIR_INODE_NLINK(dir) | |
4406 | dir->i_size -= (DEH_SIZE + de.de_entrylen); | |
4407 | - dir->i_blocks = ((dir->i_size + 511) >> 9); | |
4408 | reiserfs_update_sd (&th, dir); | |
4409 | ||
4410 | /* prevent empty directory from getting lost */ | |
4411 | @@ -835,7 +840,6 @@ | |
4412 | reiserfs_update_sd (&th, inode); | |
4413 | ||
4414 | dir->i_size -= (de.de_entrylen + DEH_SIZE); | |
4415 | - dir->i_blocks = ((dir->i_size + 511) >> 9); | |
4416 | dir->i_ctime = dir->i_mtime = CURRENT_TIME; | |
4417 | reiserfs_update_sd (&th, dir); | |
4418 | ||
4419 | @@ -1245,7 +1249,6 @@ | |
4420 | reiserfs_warning ((&th)->t_super, "vs-7060: reiserfs_rename: couldn't not cut old name. Fsck later?\n"); | |
4421 | ||
4422 | old_dir->i_size -= DEH_SIZE + old_de.de_entrylen; | |
4423 | - old_dir->i_blocks = ((old_dir->i_size + 511) >> 9); | |
4424 | ||
4425 | reiserfs_update_sd (&th, old_dir); | |
4426 | reiserfs_update_sd (&th, new_dir); | |
052932c9 AM |
4427 | diff -urN linux-2.4.22.org/fs/reiserfs/objectid.c linux-2.4.22/fs/reiserfs/objectid.c |
4428 | --- linux-2.4.22.org/fs/reiserfs/objectid.c 2003-11-21 15:08:29.000000000 +0100 | |
4429 | +++ linux-2.4.22/fs/reiserfs/objectid.c 2003-11-21 15:14:23.000000000 +0100 | |
e57e653a JR |
4430 | @@ -87,7 +87,6 @@ |
4431 | } | |
4432 | ||
4433 | journal_mark_dirty(th, s, SB_BUFFER_WITH_SB (s)); | |
4434 | - s->s_dirt = 1; | |
4435 | return unused_objectid; | |
4436 | } | |
4437 | ||
4438 | @@ -106,8 +105,6 @@ | |
4439 | ||
4440 | reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ; | |
4441 | journal_mark_dirty(th, s, SB_BUFFER_WITH_SB (s)); | |
4442 | - s->s_dirt = 1; | |
4443 | - | |
4444 | ||
4445 | /* start at the beginning of the objectid map (i = 0) and go to | |
4446 | the end of it (i = disk_sb->s_oid_cursize). Linear search is | |
052932c9 AM |
4447 | diff -urN linux-2.4.22.org/fs/reiserfs/procfs.c linux-2.4.22/fs/reiserfs/procfs.c |
4448 | --- linux-2.4.22.org/fs/reiserfs/procfs.c 2003-11-21 15:08:29.000000000 +0100 | |
4449 | +++ linux-2.4.22/fs/reiserfs/procfs.c 2003-11-21 15:14:24.000000000 +0100 | |
e57e653a JR |
4450 | @@ -497,7 +497,6 @@ |
4451 | "j_first_unflushed_offset: \t%lu\n" | |
4452 | "j_last_flush_trans_id: \t%lu\n" | |
4453 | "j_trans_start_time: \t%li\n" | |
4454 | - "j_journal_list_index: \t%i\n" | |
4455 | "j_list_bitmap_index: \t%i\n" | |
4456 | "j_must_wait: \t%i\n" | |
4457 | "j_next_full_flush: \t%i\n" | |
4458 | @@ -543,7 +542,6 @@ | |
4459 | JF( j_first_unflushed_offset ), | |
4460 | JF( j_last_flush_trans_id ), | |
4461 | JF( j_trans_start_time ), | |
4462 | - JF( j_journal_list_index ), | |
4463 | JF( j_list_bitmap_index ), | |
4464 | JF( j_must_wait ), | |
4465 | JF( j_next_full_flush ), | |
052932c9 AM |
4466 | diff -urN linux-2.4.22.org/fs/reiserfs/stree.c linux-2.4.22/fs/reiserfs/stree.c |
4467 | --- linux-2.4.22.org/fs/reiserfs/stree.c 2003-11-21 15:08:29.000000000 +0100 | |
4468 | +++ linux-2.4.22/fs/reiserfs/stree.c 2003-11-21 15:14:25.000000000 +0100 | |
e57e653a JR |
4469 | @@ -60,6 +60,7 @@ |
4470 | #include <linux/pagemap.h> | |
4471 | #include <linux/reiserfs_fs.h> | |
4472 | #include <linux/smp_lock.h> | |
4473 | +#include <linux/quotaops.h> | |
4474 | ||
4475 | /* Does the buffer contain a disk block which is in the tree. */ | |
4476 | inline int B_IS_IN_TREE (const struct buffer_head * p_s_bh) | |
4477 | @@ -71,9 +72,6 @@ | |
4478 | return ( B_LEVEL (p_s_bh) != FREE_LEVEL ); | |
4479 | } | |
4480 | ||
4481 | - | |
4482 | - | |
4483 | - | |
4484 | inline void copy_short_key (void * to, const void * from) | |
4485 | { | |
4486 | memcpy (to, from, SHORT_KEY_SIZE); | |
4487 | @@ -652,9 +650,9 @@ | |
4488 | stop at leaf level - set to | |
4489 | DISK_LEAF_NODE_LEVEL */ | |
4490 | ) { | |
4491 | - int n_block_number = SB_ROOT_BLOCK (p_s_sb), | |
4492 | - expected_level = SB_TREE_HEIGHT (p_s_sb), | |
4493 | - n_block_size = p_s_sb->s_blocksize; | |
4494 | + int n_block_number, | |
4495 | + expected_level, | |
4496 | + n_block_size = p_s_sb->s_blocksize; | |
4497 | struct buffer_head * p_s_bh; | |
4498 | struct path_element * p_s_last_element; | |
4499 | int n_node_level, n_retval; | |
4500 | @@ -678,8 +676,11 @@ | |
4501 | /* With each iteration of this loop we search through the items in the | |
4502 | current node, and calculate the next current node(next path element) | |
4503 | for the next iteration of this loop.. */ | |
4504 | + n_block_number = SB_ROOT_BLOCK (p_s_sb); | |
4505 | + expected_level = SB_TREE_HEIGHT (p_s_sb); | |
4506 | while ( 1 ) { | |
4507 | ||
4508 | + reiserfs_check_lock_depth("search_by_key"); | |
4509 | #ifdef CONFIG_REISERFS_CHECK | |
4510 | if ( !(++n_repeat_counter % 50000) ) | |
4511 | reiserfs_warning (p_s_sb, "PAP-5100: search_by_key: %s:" | |
4512 | @@ -1123,8 +1124,7 @@ | |
4513 | tmp = get_block_num(p_n_unfm_pointer,0); | |
4514 | put_block_num(p_n_unfm_pointer, 0, 0); | |
4515 | journal_mark_dirty (th, p_s_sb, p_s_bh); | |
4516 | - inode->i_blocks -= p_s_sb->s_blocksize / 512; | |
4517 | - reiserfs_free_block(th, tmp); | |
4518 | + reiserfs_free_block(th, inode, tmp, 1); | |
4519 | /* In case of big fragmentation it is possible that each block | |
4520 | freed will cause dirtying of one more bitmap and then we will | |
4521 | quickly overflow our transaction space. This is a | |
4522 | @@ -1132,9 +1132,7 @@ | |
4523 | if (journal_transaction_should_end(th, th->t_blocks_allocated)) { | |
4524 | int orig_len_alloc = th->t_blocks_allocated ; | |
4525 | pathrelse(p_s_path) ; | |
4526 | - | |
4527 | - journal_end(th, p_s_sb, orig_len_alloc) ; | |
4528 | - journal_begin(th, p_s_sb, orig_len_alloc) ; | |
4529 | + reiserfs_restart_transaction(th, orig_len_alloc); | |
4530 | reiserfs_update_inode_transaction(inode) ; | |
4531 | need_research = 1; | |
4532 | break; | |
4533 | @@ -1168,8 +1166,7 @@ | |
4534 | } | |
4535 | } | |
4536 | ||
4537 | - | |
4538 | -/* Calculate bytes number which will be deleted or cutted in the balance. */ | |
4539 | +/* Calculate number of bytes which will be deleted or cut during balance */ | |
4540 | int calc_deleted_bytes_number( | |
4541 | struct tree_balance * p_s_tb, | |
4542 | char c_mode | |
4543 | @@ -1180,14 +1177,14 @@ | |
4544 | if ( is_statdata_le_ih (p_le_ih) ) | |
4545 | return 0; | |
4546 | ||
4547 | + n_del_size = ( c_mode == M_DELETE ) ? ih_item_len(p_le_ih) : -p_s_tb->insert_size[0]; | |
4548 | if ( is_direntry_le_ih (p_le_ih) ) { | |
4549 | // return EMPTY_DIR_SIZE; /* We delete emty directoris only. */ | |
4550 | // we can't use EMPTY_DIR_SIZE, as old format dirs have a different | |
4551 | // empty size. ick. FIXME, is this right? | |
4552 | // | |
4553 | - return ih_item_len(p_le_ih); | |
4554 | + return n_del_size ; | |
4555 | } | |
4556 | - n_del_size = ( c_mode == M_DELETE ) ? ih_item_len(p_le_ih) : -p_s_tb->insert_size[0]; | |
4557 | ||
4558 | if ( is_indirect_le_ih (p_le_ih) ) | |
4559 | n_del_size = (n_del_size/UNFM_P_SIZE)* | |
4560 | @@ -1221,17 +1218,46 @@ | |
4561 | item [--i] = 0; | |
4562 | } | |
4563 | ||
4564 | +#ifdef REISERQUOTA_DEBUG | |
4565 | +char key2type(struct key *ih) | |
4566 | +{ | |
4567 | + if (is_direntry_le_key(2, ih)) | |
4568 | + return 'd'; | |
4569 | + if (is_direct_le_key(2, ih)) | |
4570 | + return 'D'; | |
4571 | + if (is_indirect_le_key(2, ih)) | |
4572 | + return 'i'; | |
4573 | + if (is_statdata_le_key(2, ih)) | |
4574 | + return 's'; | |
4575 | + return 'u'; | |
4576 | +} | |
4577 | + | |
4578 | +char head2type(struct item_head *ih) | |
4579 | +{ | |
4580 | + if (is_direntry_le_ih(ih)) | |
4581 | + return 'd'; | |
4582 | + if (is_direct_le_ih(ih)) | |
4583 | + return 'D'; | |
4584 | + if (is_indirect_le_ih(ih)) | |
4585 | + return 'i'; | |
4586 | + if (is_statdata_le_ih(ih)) | |
4587 | + return 's'; | |
4588 | + return 'u'; | |
4589 | +} | |
4590 | +#endif | |
4591 | ||
4592 | /* Delete object item. */ | |
4593 | int reiserfs_delete_item (struct reiserfs_transaction_handle *th, | |
4594 | struct path * p_s_path, /* Path to the deleted item. */ | |
4595 | const struct cpu_key * p_s_item_key, /* Key to search for the deleted item. */ | |
4596 | - struct inode * p_s_inode,/* inode is here just to update i_blocks */ | |
4597 | + struct inode * p_s_inode,/* inode is here just to update i_blocks and quotas */ | |
4598 | struct buffer_head * p_s_un_bh) /* NULL or unformatted node pointer. */ | |
4599 | { | |
4600 | struct super_block * p_s_sb = p_s_inode->i_sb; | |
4601 | struct tree_balance s_del_balance; | |
4602 | struct item_head s_ih; | |
4603 | + struct item_head *q_ih; | |
4604 | + int quota_cut_bytes; | |
4605 | int n_ret_value, | |
4606 | n_del_size, | |
4607 | n_removed; | |
4608 | @@ -1281,6 +1307,22 @@ | |
4609 | ||
4610 | // reiserfs_delete_item returns item length when success | |
4611 | n_ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE); | |
4612 | + q_ih = get_ih(p_s_path) ; | |
4613 | + quota_cut_bytes = ih_item_len(q_ih) ; | |
4614 | + | |
4615 | + /* hack so the quota code doesn't have to guess if the file | |
4616 | + ** has a tail. On tail insert, we allocate quota for 1 unformatted node. | |
4617 | + ** We test the offset because the tail might have been | |
4618 | + ** split into multiple items, and we only want to decrement for | |
4619 | + ** the unfm node once | |
4620 | + */ | |
4621 | + if (!S_ISLNK (p_s_inode->i_mode) && is_direct_le_ih(q_ih)) { | |
4622 | + if ((le_ih_k_offset(q_ih) & (p_s_sb->s_blocksize - 1)) == 1) { | |
4623 | + quota_cut_bytes = p_s_sb->s_blocksize + UNFM_P_SIZE; | |
4624 | + } else { | |
4625 | + quota_cut_bytes = 0 ; | |
4626 | + } | |
4627 | + } | |
4628 | ||
4629 | if ( p_s_un_bh ) { | |
4630 | int off; | |
4631 | @@ -1312,10 +1354,14 @@ | |
4632 | memcpy(data + off, | |
4633 | B_I_PITEM(PATH_PLAST_BUFFER(p_s_path), &s_ih), n_ret_value); | |
4634 | } | |
4635 | - | |
4636 | /* Perform balancing after all resources have been collected at once. */ | |
4637 | do_balance(&s_del_balance, NULL, NULL, M_DELETE); | |
4638 | ||
4639 | +#ifdef REISERQUOTA_DEBUG | |
4640 | + printk(KERN_DEBUG "reiserquota delete_item(): freeing %u, id=%u type=%c\n", quota_cut_bytes, p_s_inode->i_uid, head2type(&s_ih)); | |
4641 | +#endif | |
4642 | + DQUOT_FREE_SPACE_NODIRTY(p_s_inode, quota_cut_bytes); | |
4643 | + | |
4644 | /* Return deleted body length */ | |
4645 | return n_ret_value; | |
4646 | } | |
4647 | @@ -1340,14 +1386,16 @@ | |
4648 | ||
4649 | /* this deletes item which never gets split */ | |
4650 | void reiserfs_delete_solid_item (struct reiserfs_transaction_handle *th, | |
4651 | + struct inode *inode, | |
4652 | struct key * key) | |
4653 | { | |
4654 | struct tree_balance tb; | |
4655 | INITIALIZE_PATH (path); | |
4656 | - int item_len; | |
4657 | + int item_len = 0; | |
4658 | int tb_init = 0 ; | |
4659 | struct cpu_key cpu_key; | |
4660 | int retval; | |
4661 | + int quota_cut_bytes = 0; | |
4662 | ||
4663 | le_key2cpu_key (&cpu_key, key); | |
4664 | ||
4665 | @@ -1371,6 +1419,7 @@ | |
4666 | item_len = ih_item_len( PATH_PITEM_HEAD(&path) ); | |
4667 | init_tb_struct (th, &tb, th->t_super, &path, - (IH_SIZE + item_len)); | |
4668 | } | |
4669 | + quota_cut_bytes = ih_item_len(PATH_PITEM_HEAD(&path)) ; | |
4670 | ||
4671 | retval = fix_nodes (M_DELETE, &tb, NULL, 0); | |
4672 | if (retval == REPEAT_SEARCH) { | |
4673 | @@ -1380,6 +1429,12 @@ | |
4674 | ||
4675 | if (retval == CARRY_ON) { | |
4676 | do_balance (&tb, 0, 0, M_DELETE); | |
4677 | + if (inode) { /* Should we count quota for item? (we don't count quotas for save-links) */ | |
4678 | +#ifdef REISERQUOTA_DEBUG | |
4679 | + printk(KERN_DEBUG "reiserquota delete_solid_item(): freeing %u id=%u type=%c\n", quota_cut_bytes, inode->i_uid, key2type(key)); | |
4680 | +#endif | |
4681 | + DQUOT_FREE_SPACE_NODIRTY(inode, quota_cut_bytes); | |
4682 | + } | |
4683 | break; | |
4684 | } | |
4685 | ||
4686 | @@ -1412,7 +1467,7 @@ | |
4687 | } | |
4688 | /* USE_INODE_GENERATION_COUNTER */ | |
4689 | #endif | |
4690 | - reiserfs_delete_solid_item (th, INODE_PKEY (inode)); | |
4691 | + reiserfs_delete_solid_item (th, inode, INODE_PKEY (inode)); | |
4692 | } | |
4693 | ||
4694 | ||
4695 | @@ -1484,6 +1539,38 @@ | |
4696 | mark_inode_dirty (inode); | |
4697 | } | |
4698 | ||
4699 | +static void | |
4700 | +unmap_buffers(struct page *page, loff_t pos) { | |
4701 | + struct buffer_head *bh ; | |
4702 | + struct buffer_head *head ; | |
4703 | + struct buffer_head *next ; | |
4704 | + unsigned long tail_index ; | |
4705 | + unsigned long cur_index ; | |
4706 | + | |
4707 | + if (!page || !page->buffers) | |
4708 | + return; | |
4709 | + | |
4710 | + tail_index = pos & (PAGE_CACHE_SIZE - 1) ; | |
4711 | + cur_index = 0 ; | |
4712 | + head = page->buffers ; | |
4713 | + bh = head ; | |
4714 | + do { | |
4715 | + next = bh->b_this_page ; | |
4716 | + | |
4717 | + /* we want to unmap the buffers that contain the tail, and | |
4718 | + ** all the buffers after it (since the tail must be at the | |
4719 | + ** end of the file). We don't want to unmap file data | |
4720 | + ** before the tail, since it might be dirty and waiting to | |
4721 | + ** reach disk | |
4722 | + */ | |
4723 | + cur_index += bh->b_size ; | |
4724 | + if (cur_index > tail_index) { | |
4725 | + reiserfs_unmap_buffer(bh) ; | |
4726 | + } | |
4727 | + bh = next ; | |
4728 | + } while (bh != head) ; | |
4729 | +} | |
4730 | + | |
4731 | ||
4732 | /* (Truncate or cut entry) or delete object item. Returns < 0 on failure */ | |
4733 | int reiserfs_cut_from_item (struct reiserfs_transaction_handle *th, | |
4734 | @@ -1499,12 +1586,15 @@ | |
4735 | structure by using the init_tb_struct and fix_nodes functions. | |
4736 | After that we can make tree balancing. */ | |
4737 | struct tree_balance s_cut_balance; | |
4738 | + struct item_head *p_le_ih; | |
4739 | + loff_t tail_pos = 0; | |
4740 | int n_cut_size = 0, /* Amount to be cut. */ | |
4741 | n_ret_value = CARRY_ON, | |
4742 | n_removed = 0, /* Number of the removed unformatted nodes. */ | |
4743 | n_is_inode_locked = 0; | |
4744 | char c_mode; /* Mode of the balance. */ | |
4745 | int retval2 = -1; | |
4746 | + int quota_cut_bytes; | |
4747 | ||
4748 | ||
4749 | init_tb_struct(th, &s_cut_balance, p_s_inode->i_sb, p_s_path, n_cut_size); | |
4750 | @@ -1531,6 +1621,9 @@ | |
4751 | /* tail has been left in the unformatted node */ | |
4752 | return n_ret_value; | |
4753 | ||
4754 | + if (n_is_inode_locked) { | |
4755 | +printk("inode locked twice\n"); | |
4756 | + } | |
4757 | n_is_inode_locked = 1; | |
4758 | ||
4759 | /* removing of last unformatted node will change value we | |
4760 | @@ -1545,6 +1638,7 @@ | |
4761 | set_cpu_key_k_type (p_s_item_key, TYPE_INDIRECT); | |
4762 | p_s_item_key->key_length = 4; | |
4763 | n_new_file_size -= (n_new_file_size & (p_s_sb->s_blocksize - 1)); | |
4764 | + tail_pos = n_new_file_size; | |
4765 | set_cpu_key_k_offset (p_s_item_key, n_new_file_size + 1); | |
4766 | if ( search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path) == POSITION_NOT_FOUND ){ | |
4767 | print_block (PATH_PLAST_BUFFER (p_s_path), 3, PATH_LAST_POSITION (p_s_path) - 1, PATH_LAST_POSITION (p_s_path) + 1); | |
4768 | @@ -1592,23 +1686,27 @@ | |
4769 | RFALSE( c_mode == M_PASTE || c_mode == M_INSERT, "illegal mode"); | |
4770 | ||
4771 | /* Calculate number of bytes that need to be cut from the item. */ | |
4772 | + quota_cut_bytes = ( c_mode == M_DELETE ) ? ih_item_len(get_ih(p_s_path)) : -s_cut_balance.insert_size[0]; | |
4773 | if (retval2 == -1) | |
4774 | n_ret_value = calc_deleted_bytes_number(&s_cut_balance, c_mode); | |
4775 | else | |
4776 | n_ret_value = retval2; | |
4777 | - | |
4778 | - if ( c_mode == M_DELETE ) { | |
4779 | - struct item_head * p_le_ih = PATH_PITEM_HEAD (s_cut_balance.tb_path); | |
4780 | - | |
4781 | - if ( is_direct_le_ih (p_le_ih) && (le_ih_k_offset (p_le_ih) & (p_s_sb->s_blocksize - 1)) == 1 ) { | |
4782 | - /* we delete first part of tail which was stored in direct | |
4783 | - item(s) */ | |
4784 | + | |
4785 | + | |
4786 | + /* For direct items, we only change the quota when deleting the last | |
4787 | + ** item. | |
4788 | + */ | |
4789 | + p_le_ih = PATH_PITEM_HEAD (s_cut_balance.tb_path); | |
4790 | + if (!S_ISLNK (p_s_inode->i_mode) && is_direct_le_ih(p_le_ih)) { | |
4791 | + if (c_mode == M_DELETE && | |
4792 | + (le_ih_k_offset (p_le_ih) & (p_s_sb->s_blocksize - 1)) == 1 ) { | |
4793 | // FIXME: this is to keep 3.5 happy | |
4794 | p_s_inode->u.reiserfs_i.i_first_direct_byte = U32_MAX; | |
4795 | - p_s_inode->i_blocks -= p_s_sb->s_blocksize / 512; | |
4796 | + quota_cut_bytes = p_s_sb->s_blocksize + UNFM_P_SIZE ; | |
4797 | + } else { | |
4798 | + quota_cut_bytes = 0 ; | |
4799 | } | |
4800 | } | |
4801 | - | |
4802 | #ifdef CONFIG_REISERFS_CHECK | |
4803 | if (n_is_inode_locked) { | |
4804 | struct item_head * le_ih = PATH_PITEM_HEAD (s_cut_balance.tb_path); | |
4805 | @@ -1642,7 +1740,12 @@ | |
4806 | ** deal with it here. | |
4807 | */ | |
4808 | p_s_inode->u.reiserfs_i.i_flags &= ~i_pack_on_close_mask; | |
4809 | + unmap_buffers(page, tail_pos); | |
4810 | } | |
4811 | +#ifdef REISERQUOTA_DEBUG | |
4812 | + printk(KERN_DEBUG "reiserquota cut_from_item(): freeing %u id=%u type=%c\n", quota_cut_bytes, p_s_inode->i_uid, '?'); | |
4813 | +#endif | |
4814 | + DQUOT_FREE_SPACE_NODIRTY(p_s_inode, quota_cut_bytes); | |
4815 | return n_ret_value; | |
4816 | } | |
4817 | ||
4818 | @@ -1654,8 +1757,8 @@ | |
4819 | ||
4820 | set_le_key_k_offset (KEY_FORMAT_3_5, INODE_PKEY (inode), DOT_OFFSET); | |
4821 | set_le_key_k_type (KEY_FORMAT_3_5, INODE_PKEY (inode), TYPE_DIRENTRY); | |
4822 | - reiserfs_delete_solid_item (th, INODE_PKEY (inode)); | |
4823 | - | |
4824 | + reiserfs_delete_solid_item (th, inode, INODE_PKEY (inode)); | |
4825 | + reiserfs_update_sd(th, inode) ; | |
4826 | set_le_key_k_offset (KEY_FORMAT_3_5, INODE_PKEY (inode), SD_OFFSET); | |
4827 | set_le_key_k_type (KEY_FORMAT_3_5, INODE_PKEY (inode), TYPE_STAT_DATA); | |
4828 | } | |
4829 | @@ -1681,6 +1784,7 @@ | |
4830 | n_new_file_size;/* New file size. */ | |
4831 | int n_deleted; /* Number of deleted or truncated bytes. */ | |
4832 | int retval; | |
4833 | + int jbegin_count = th->t_blocks_allocated; | |
4834 | ||
4835 | if ( ! (S_ISREG(p_s_inode->i_mode) || S_ISDIR(p_s_inode->i_mode) || S_ISLNK(p_s_inode->i_mode)) ) | |
4836 | return; | |
4837 | @@ -1760,17 +1864,14 @@ | |
4838 | ** sure the file is consistent before ending the current trans | |
4839 | ** and starting a new one | |
4840 | */ | |
4841 | - if (journal_transaction_should_end(th, th->t_blocks_allocated)) { | |
4842 | - int orig_len_alloc = th->t_blocks_allocated ; | |
4843 | + if (journal_transaction_should_end(th, jbegin_count)) { | |
4844 | decrement_counters_in_path(&s_search_path) ; | |
4845 | ||
4846 | if (update_timestamps) { | |
4847 | p_s_inode->i_mtime = p_s_inode->i_ctime = CURRENT_TIME; | |
4848 | } | |
4849 | reiserfs_update_sd(th, p_s_inode) ; | |
4850 | - | |
4851 | - journal_end(th, p_s_inode->i_sb, orig_len_alloc) ; | |
4852 | - journal_begin(th, p_s_inode->i_sb, orig_len_alloc) ; | |
4853 | + reiserfs_restart_transaction(th, jbegin_count) ; | |
4854 | reiserfs_update_inode_transaction(p_s_inode) ; | |
4855 | } | |
4856 | } while ( n_file_size > ROUND_UP (n_new_file_size) && | |
4857 | @@ -1822,18 +1923,37 @@ | |
4858 | int reiserfs_paste_into_item (struct reiserfs_transaction_handle *th, | |
4859 | struct path * p_s_search_path, /* Path to the pasted item. */ | |
4860 | const struct cpu_key * p_s_key, /* Key to search for the needed item.*/ | |
4861 | + struct inode * inode, /* Inode item belongs to */ | |
4862 | const char * p_c_body, /* Pointer to the bytes to paste. */ | |
4863 | int n_pasted_size) /* Size of pasted bytes. */ | |
4864 | { | |
4865 | struct tree_balance s_paste_balance; | |
4866 | int retval; | |
4867 | + int fs_gen; | |
4868 | + | |
4869 | + fs_gen = get_generation(inode->i_sb) ; | |
4870 | + | |
4871 | +#ifdef REISERQUOTA_DEBUG | |
4872 | + printk(KERN_DEBUG "reiserquota paste_into_item(): allocating %u id=%u type=%c\n", n_pasted_size, inode->i_uid, key2type(&(p_s_key->on_disk_key))); | |
4873 | +#endif | |
4874 | ||
4875 | + if (DQUOT_ALLOC_SPACE_NODIRTY(inode, n_pasted_size)) { | |
4876 | + pathrelse(p_s_search_path); | |
4877 | + return -EDQUOT; | |
4878 | + } | |
4879 | init_tb_struct(th, &s_paste_balance, th->t_super, p_s_search_path, n_pasted_size); | |
4880 | #ifdef DISPLACE_NEW_PACKING_LOCALITIES | |
4881 | s_paste_balance.key = p_s_key->on_disk_key; | |
4882 | #endif | |
4883 | - | |
4884 | - while ( (retval = fix_nodes(M_PASTE, &s_paste_balance, NULL, p_c_body)) == REPEAT_SEARCH ) { | |
4885 | + | |
4886 | + /* DQUOT_* can schedule, must check before the fix_nodes */ | |
4887 | + if (fs_changed(fs_gen, inode->i_sb)) { | |
4888 | + goto search_again; | |
4889 | + } | |
4890 | + | |
4891 | + while ((retval = fix_nodes(M_PASTE, &s_paste_balance, NULL, p_c_body)) == | |
4892 | +REPEAT_SEARCH ) { | |
4893 | +search_again: | |
4894 | /* file system changed while we were in the fix_nodes */ | |
4895 | PROC_INFO_INC( th -> t_super, paste_into_item_restarted ); | |
4896 | retval = search_for_position_by_key (th->t_super, p_s_key, p_s_search_path); | |
4897 | @@ -1862,6 +1982,10 @@ | |
4898 | error_out: | |
4899 | /* this also releases the path */ | |
4900 | unfix_nodes(&s_paste_balance); | |
4901 | +#ifdef REISERQUOTA_DEBUG | |
4902 | + printk(KERN_DEBUG "reiserquota paste_into_item(): freeing %u id=%u type=%c\n", n_pasted_size, inode->i_uid, key2type(&(p_s_key->on_disk_key))); | |
4903 | +#endif | |
4904 | + DQUOT_FREE_SPACE_NODIRTY(inode, n_pasted_size); | |
4905 | return retval ; | |
4906 | } | |
4907 | ||
4908 | @@ -1871,23 +1995,45 @@ | |
4909 | struct path * p_s_path, /* Path to the inserteded item. */ | |
4910 | const struct cpu_key * key, | |
4911 | struct item_head * p_s_ih, /* Pointer to the item header to insert.*/ | |
4912 | + struct inode * inode, | |
4913 | const char * p_c_body) /* Pointer to the bytes to insert. */ | |
4914 | { | |
4915 | struct tree_balance s_ins_balance; | |
4916 | int retval; | |
4917 | + int fs_gen = 0 ; | |
4918 | + int quota_bytes = 0 ; | |
4919 | ||
4920 | + if (inode) { /* Do we count quotas for item? */ | |
4921 | + fs_gen = get_generation(inode->i_sb); | |
4922 | + quota_bytes = ih_item_len(p_s_ih); | |
4923 | + | |
4924 | + /* hack so the quota code doesn't have to guess if the file has | |
4925 | + ** a tail, links are always tails, so there's no guessing needed | |
4926 | + */ | |
4927 | + if (!S_ISLNK (inode->i_mode) && is_direct_le_ih(p_s_ih)) { | |
4928 | + quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE ; | |
4929 | + } | |
4930 | +#ifdef REISERQUOTA_DEBUG | |
4931 | + printk(KERN_DEBUG "reiserquota insert_item(): allocating %u id=%u type=%c\n", quota_bytes, inode->i_uid, head2type(p_s_ih)); | |
4932 | +#endif | |
4933 | + /* We can't dirty inode here. It would be immediately written but | |
4934 | + * appropriate stat item isn't inserted yet... */ | |
4935 | + if (DQUOT_ALLOC_SPACE_NODIRTY(inode, quota_bytes)) { | |
4936 | + pathrelse(p_s_path); | |
4937 | + return -EDQUOT; | |
4938 | + } | |
4939 | + } | |
4940 | init_tb_struct(th, &s_ins_balance, th->t_super, p_s_path, IH_SIZE + ih_item_len(p_s_ih)); | |
4941 | #ifdef DISPLACE_NEW_PACKING_LOCALITIES | |
4942 | s_ins_balance.key = key->on_disk_key; | |
4943 | #endif | |
4944 | - | |
4945 | - /* | |
4946 | - if (p_c_body == 0) | |
4947 | - n_zeros_num = ih_item_len(p_s_ih); | |
4948 | - */ | |
4949 | - // le_key2cpu_key (&key, &(p_s_ih->ih_key)); | |
4950 | + /* DQUOT_* can schedule, must check to be sure calling fix_nodes is safe */ | |
4951 | + if (inode && fs_changed(fs_gen, inode->i_sb)) { | |
4952 | + goto search_again; | |
4953 | + } | |
4954 | ||
4955 | while ( (retval = fix_nodes(M_INSERT, &s_ins_balance, p_s_ih, p_c_body)) == REPEAT_SEARCH) { | |
4956 | +search_again: | |
4957 | /* file system changed while we were in the fix_nodes */ | |
4958 | PROC_INFO_INC( th -> t_super, insert_item_restarted ); | |
4959 | retval = search_item (th->t_super, key, p_s_path); | |
4960 | @@ -1902,7 +2048,7 @@ | |
4961 | goto error_out; | |
4962 | } | |
4963 | } | |
4964 | - | |
4965 | + | |
4966 | /* make balancing after all resources will be collected at a time */ | |
4967 | if ( retval == CARRY_ON ) { | |
4968 | do_balance (&s_ins_balance, p_s_ih, p_c_body, M_INSERT); | |
4969 | @@ -1913,6 +2059,11 @@ | |
4970 | error_out: | |
4971 | /* also releases the path */ | |
4972 | unfix_nodes(&s_ins_balance); | |
4973 | +#ifdef REISERQUOTA_DEBUG | |
4974 | + printk(KERN_DEBUG "reiserquota insert_item(): freeing %u id=%u type=%c\n", quota_bytes, inode->i_uid, head2type(p_s_ih)); | |
4975 | +#endif | |
4976 | + if (inode) | |
4977 | + DQUOT_FREE_SPACE_NODIRTY(inode, quota_bytes) ; | |
4978 | return retval; | |
4979 | } | |
4980 | ||
052932c9 AM |
4981 | diff -urN linux-2.4.22.org/fs/reiserfs/super.c linux-2.4.22/fs/reiserfs/super.c |
4982 | --- linux-2.4.22.org/fs/reiserfs/super.c 2003-11-21 15:08:29.000000000 +0100 | |
4983 | +++ linux-2.4.22/fs/reiserfs/super.c 2003-11-21 15:14:25.000000000 +0100 | |
e57e653a JR |
4984 | @@ -13,6 +13,9 @@ |
4985 | #include <linux/locks.h> | |
4986 | #include <linux/init.h> | |
4987 | ||
4988 | +EXPORT_SYMBOL(journal_begin) ; | |
4989 | +EXPORT_SYMBOL(journal_end) ; | |
4990 | + | |
4991 | #define REISERFS_OLD_BLOCKSIZE 4096 | |
4992 | #define REISERFS_SUPER_MAGIC_STRING_OFFSET_NJ 20 | |
4993 | ||
4994 | @@ -50,22 +53,28 @@ | |
4995 | static int reiserfs_remount (struct super_block * s, int * flags, char * data); | |
4996 | static int reiserfs_statfs (struct super_block * s, struct statfs * buf); | |
4997 | ||
4998 | -static void reiserfs_write_super (struct super_block * s) | |
4999 | +static int reiserfs_sync_fs (struct super_block * s) | |
5000 | { | |
5001 | + struct reiserfs_transaction_handle th; | |
5002 | + lock_kernel() ; | |
5003 | + if (!(s->s_flags & MS_RDONLY)) { | |
5004 | + journal_begin(&th, s, 1); | |
5005 | + journal_end_sync(&th, s, 1); | |
5006 | + s->s_dirt = 0; | |
5007 | + } | |
5008 | + unlock_kernel() ; | |
5009 | + return 0; | |
5010 | +} | |
5011 | ||
5012 | - int dirty = 0 ; | |
5013 | - lock_kernel() ; | |
5014 | - if (!(s->s_flags & MS_RDONLY)) { | |
5015 | - dirty = flush_old_commits(s, 1) ; | |
5016 | - } | |
5017 | - s->s_dirt = dirty; | |
5018 | - unlock_kernel() ; | |
5019 | +static void reiserfs_write_super (struct super_block * s) | |
5020 | +{ | |
5021 | + reiserfs_sync_fs(s); | |
5022 | } | |
5023 | ||
5024 | + | |
5025 | static void reiserfs_write_super_lockfs (struct super_block * s) | |
5026 | { | |
5027 | ||
5028 | - int dirty = 0 ; | |
5029 | struct reiserfs_transaction_handle th ; | |
5030 | lock_kernel() ; | |
5031 | if (!(s->s_flags & MS_RDONLY)) { | |
5032 | @@ -75,7 +84,7 @@ | |
5033 | reiserfs_block_writes(&th) ; | |
052932c9 | 5034 | journal_end(&th, s, 1) ; |
e57e653a JR |
5035 | } |
5036 | - s->s_dirt = dirty; | |
5037 | + s->s_dirt = 0; | |
5038 | unlock_kernel() ; | |
5039 | } | |
5040 | ||
5041 | @@ -100,7 +109,7 @@ | |
5042 | /* we are going to do one balancing */ | |
5043 | journal_begin (&th, s, JOURNAL_PER_BALANCE_CNT); | |
5044 | ||
5045 | - reiserfs_delete_solid_item (&th, key); | |
5046 | + reiserfs_delete_solid_item (&th, NULL, key); | |
5047 | if (oid_free) | |
5048 | /* removals are protected by direct items */ | |
5049 | reiserfs_release_objectid (&th, le32_to_cpu (key->k_objectid)); | |
5050 | @@ -286,8 +295,8 @@ | |
5051 | /* body of "save" link */ | |
5052 | link = INODE_PKEY (inode)->k_dir_id; | |
5053 | ||
5054 | - /* put "save" link inot tree */ | |
5055 | - retval = reiserfs_insert_item (th, &path, &key, &ih, (char *)&link); | |
5056 | + /* put "save" link inot tree, don't charge quota to anyone */ | |
5057 | + retval = reiserfs_insert_item (th, &path, &key, &ih, NULL, (char *)&link); | |
5058 | if (retval) { | |
5059 | if (retval != -ENOSPC) | |
5060 | reiserfs_warning (inode->i_sb, "vs-2120: add_save_link: insert_item returned %d\n", | |
5061 | @@ -329,7 +338,8 @@ | |
5062 | ( inode -> u.reiserfs_i.i_flags & i_link_saved_truncate_mask ) ) || | |
5063 | ( !truncate && | |
5064 | ( inode -> u.reiserfs_i.i_flags & i_link_saved_unlink_mask ) ) ) | |
5065 | - reiserfs_delete_solid_item (&th, &key); | |
5066 | + /* don't take quota bytes from anywhere */ | |
5067 | + reiserfs_delete_solid_item (&th, NULL, &key); | |
5068 | if (!truncate) { | |
5069 | reiserfs_release_objectid (&th, inode->i_ino); | |
5070 | inode -> u.reiserfs_i.i_flags &= ~i_link_saved_unlink_mask; | |
5071 | @@ -357,6 +367,7 @@ | |
5072 | ** to do a journal_end | |
5073 | */ | |
5074 | journal_release(&th, s) ; | |
5075 | + s->s_dirt = 0; | |
5076 | ||
5077 | for (i = 0; i < SB_BMAP_NR (s); i ++) | |
5078 | brelse (SB_AP_BITMAP (s)[i].bh); | |
5079 | @@ -418,6 +429,7 @@ | |
5080 | put_super: reiserfs_put_super, | |
5081 | write_super: reiserfs_write_super, | |
5082 | write_super_lockfs: reiserfs_write_super_lockfs, | |
5083 | + sync_fs: reiserfs_sync_fs, | |
5084 | unlockfs: reiserfs_unlockfs, | |
5085 | statfs: reiserfs_statfs, | |
5086 | remount_fs: reiserfs_remount, | |
5087 | @@ -463,6 +475,14 @@ | |
5088 | {NULL, 0, 0} | |
5089 | }; | |
5090 | ||
5091 | +/* possible values for -o data= */ | |
5092 | +static const arg_desc_t logging_mode[] = { | |
5093 | + {"ordered", 1<<REISERFS_DATA_ORDERED, (1<<REISERFS_DATA_LOG|1<<REISERFS_DATA_WRITEBACK)}, | |
5094 | + {"journal", 1<<REISERFS_DATA_LOG, (1<<REISERFS_DATA_ORDERED|1<<REISERFS_DATA_WRITEBACK)}, | |
5095 | + {"writeback", 1<<REISERFS_DATA_WRITEBACK, (1<<REISERFS_DATA_ORDERED|1<<REISERFS_DATA_LOG)}, | |
5096 | + {NULL, 0} | |
5097 | +}; | |
5098 | + | |
5099 | ||
5100 | /* possible values for "-o block-allocator=" and bits which are to be set in | |
5101 | s_mount_opt of reiserfs specific part of in-core super block */ | |
5102 | @@ -612,10 +632,14 @@ | |
5103 | ||
5104 | {"block-allocator", 'a', balloc, 0, 0}, | |
5105 | {"hash", 'h', hash, 1<<FORCE_HASH_DETECT, 0}, | |
5106 | + {"data", 'd', logging_mode, 0, 0}, | |
5107 | ||
5108 | {"resize", 'r', 0, 0, 0}, | |
5109 | {"attrs", 0, 0, 1<<REISERFS_ATTRS, 0}, | |
5110 | {"noattrs", 0, 0, 0, 1<<REISERFS_ATTRS}, | |
5111 | + {"usrquota", 0, 0, 0, 0}, | |
5112 | + {"grpquota", 0, 0, 0, 0}, | |
5113 | + | |
5114 | {NULL, 0, 0, 0, 0} | |
5115 | }; | |
5116 | ||
5117 | @@ -672,6 +696,47 @@ | |
5118 | } | |
5119 | } | |
5120 | ||
5121 | +static void switch_data_mode(struct super_block *s, unsigned long mode) { | |
5122 | + struct reiserfs_transaction_handle th; | |
5123 | + int sync_all = !reiserfs_data_log(s); | |
5124 | + | |
5125 | + journal_begin(&th, s, 1); | |
5126 | + SB_JOURNAL(s)->j_must_wait = 1; | |
5127 | + journal_end_sync(&th, s, 1); | |
5128 | + | |
5129 | + s->u.reiserfs_sb.s_mount_opt &= ~((1 << REISERFS_DATA_LOG) | | |
5130 | + (1 << REISERFS_DATA_ORDERED) | | |
5131 | + (1 << REISERFS_DATA_WRITEBACK)); | |
5132 | + s->u.reiserfs_sb.s_mount_opt |= (1 << mode); | |
5133 | + | |
5134 | + journal_begin(&th, s, 1); | |
5135 | + SB_JOURNAL(s)->j_must_wait = 1; | |
5136 | + journal_end_sync(&th, s, 1); | |
5137 | + | |
5138 | + if (sync_all) | |
5139 | + fsync_no_super(s->s_dev); | |
5140 | +} | |
5141 | + | |
5142 | +static void handle_data_mode(struct super_block *s, unsigned long mount_options) | |
5143 | +{ | |
5144 | + if (mount_options & (1 << REISERFS_DATA_LOG)) { | |
5145 | + if (!reiserfs_data_log(s)) { | |
5146 | + switch_data_mode(s, REISERFS_DATA_LOG); | |
5147 | + printk("reiserfs: switching to journaled data mode\n"); | |
5148 | + } | |
5149 | + } else if (mount_options & (1 << REISERFS_DATA_ORDERED)) { | |
5150 | + if (!reiserfs_data_ordered(s)) { | |
5151 | + switch_data_mode(s, REISERFS_DATA_ORDERED); | |
5152 | + printk("reiserfs: switching to ordered data mode\n"); | |
5153 | + } | |
5154 | + } else if (mount_options & (1 << REISERFS_DATA_WRITEBACK)) { | |
5155 | + if (!reiserfs_data_writeback(s)) { | |
5156 | + switch_data_mode(s, REISERFS_DATA_WRITEBACK); | |
5157 | + printk("reiserfs: switching to writeback data mode\n"); | |
5158 | + } | |
5159 | + } | |
5160 | +} | |
5161 | + | |
5162 | static int reiserfs_remount (struct super_block * s, int * mount_flags, char * data) | |
5163 | { | |
5164 | struct reiserfs_super_block * rs; | |
5165 | @@ -723,9 +788,10 @@ | |
5166 | s->s_dirt = 0; | |
5167 | } else { | |
5168 | /* remount read-write */ | |
5169 | - if (!(s->s_flags & MS_RDONLY)) | |
5170 | + if (!(s->s_flags & MS_RDONLY)) { | |
5171 | + handle_data_mode(s, mount_options); | |
5172 | return 0; /* We are read-write already */ | |
5173 | - | |
5174 | + } | |
5175 | s->s_flags &= ~MS_RDONLY ; /* now it is safe to call journal_begin */ | |
5176 | journal_begin(&th, s, 10) ; | |
5177 | ||
5178 | @@ -743,9 +809,10 @@ | |
5179 | SB_JOURNAL(s)->j_must_wait = 1 ; | |
5180 | journal_end(&th, s, 10) ; | |
5181 | ||
5182 | - if (!( *mount_flags & MS_RDONLY ) ) | |
5183 | + if (!( *mount_flags & MS_RDONLY ) ) { | |
5184 | finish_unfinished( s ); | |
5185 | - | |
5186 | + handle_data_mode(s, mount_options); | |
5187 | + } | |
5188 | return 0; | |
5189 | } | |
5190 | ||
5191 | @@ -1172,9 +1239,6 @@ | |
5192 | ||
5193 | if (reiserfs_parse_options (s, (char *) data, &(s->u.reiserfs_sb.s_mount_opt), &blocks) == 0) { | |
5194 | return NULL; | |
5195 | - | |
5196 | - | |
5197 | - | |
5198 | } | |
5199 | ||
5200 | if (blocks) { | |
5201 | @@ -1222,9 +1286,22 @@ | |
5202 | printk("reiserfs:warning: - it is slow mode for debugging.\n"); | |
5203 | #endif | |
5204 | ||
5205 | - /* fixme */ | |
5206 | - jdev_name = NULL; | |
5207 | + /* make data=ordered the default */ | |
5208 | + if (!reiserfs_data_log(s) && !reiserfs_data_ordered(s) && | |
5209 | + !reiserfs_data_writeback(s)) | |
5210 | + { | |
5211 | + s->u.reiserfs_sb.s_mount_opt |= (1 << REISERFS_DATA_ORDERED); | |
5212 | + } | |
5213 | + | |
5214 | + if (reiserfs_data_log(s)) { | |
5215 | + printk("reiserfs: using journaled data mode\n"); | |
5216 | + } else if (reiserfs_data_ordered(s)) { | |
5217 | + printk("reiserfs: using ordered data mode\n"); | |
5218 | + } else { | |
5219 | + printk("reiserfs: using writeback data mode\n"); | |
5220 | + } | |
5221 | ||
5222 | + jdev_name = NULL; | |
5223 | if( journal_init(s, jdev_name, old_format) ) { | |
5224 | reiserfs_warning(s, "sh-2022: reiserfs_read_super: unable to initialize journal space\n") ; | |
5225 | goto error ; | |
5226 | @@ -1364,16 +1441,19 @@ | |
5227 | ||
5228 | static int __init init_reiserfs_fs (void) | |
5229 | { | |
5230 | + int ret; | |
5231 | reiserfs_proc_info_global_init(); | |
5232 | reiserfs_proc_register_global( "version", | |
5233 | reiserfs_global_version_in_proc ); | |
5234 | + ret = reiserfs_journal_cache_init(); | |
5235 | + if (ret) | |
5236 | + return ret; | |
5237 | return register_filesystem(&reiserfs_fs_type); | |
5238 | } | |
5239 | ||
5240 | MODULE_DESCRIPTION("ReiserFS journaled filesystem"); | |
5241 | MODULE_AUTHOR("Hans Reiser <reiser@namesys.com>"); | |
5242 | MODULE_LICENSE("GPL"); | |
5243 | -EXPORT_NO_SYMBOLS; | |
5244 | ||
5245 | static void __exit exit_reiserfs_fs(void) | |
5246 | { | |
052932c9 AM |
5247 | diff -urN linux-2.4.22.org/fs/reiserfs/tail_conversion.c linux-2.4.22/fs/reiserfs/tail_conversion.c |
5248 | --- linux-2.4.22.org/fs/reiserfs/tail_conversion.c 2003-11-21 15:08:29.000000000 +0100 | |
5249 | +++ linux-2.4.22/fs/reiserfs/tail_conversion.c 2003-11-21 15:14:25.000000000 +0100 | |
e57e653a JR |
5250 | @@ -66,11 +66,11 @@ |
5251 | set_ih_free_space (&ind_ih, 0); /* delete at nearest future */ | |
5252 | put_ih_item_len( &ind_ih, UNFM_P_SIZE ); | |
5253 | PATH_LAST_POSITION (path)++; | |
5254 | - n_retval = reiserfs_insert_item (th, path, &end_key, &ind_ih, | |
5255 | + n_retval = reiserfs_insert_item (th, path, &end_key, &ind_ih, inode, | |
5256 | (char *)&unfm_ptr); | |
5257 | } else { | |
5258 | /* Paste into last indirect item of an object. */ | |
5259 | - n_retval = reiserfs_paste_into_item(th, path, &end_key, | |
5260 | + n_retval = reiserfs_paste_into_item(th, path, &end_key, inode, | |
5261 | (char *)&unfm_ptr, UNFM_P_SIZE); | |
5262 | } | |
5263 | if ( n_retval ) { | |
5264 | @@ -152,39 +152,6 @@ | |
5265 | } | |
5266 | } | |
5267 | ||
5268 | -static void | |
5269 | -unmap_buffers(struct page *page, loff_t pos) { | |
5270 | - struct buffer_head *bh ; | |
5271 | - struct buffer_head *head ; | |
5272 | - struct buffer_head *next ; | |
5273 | - unsigned long tail_index ; | |
5274 | - unsigned long cur_index ; | |
5275 | - | |
5276 | - if (page) { | |
5277 | - if (page->buffers) { | |
5278 | - tail_index = pos & (PAGE_CACHE_SIZE - 1) ; | |
5279 | - cur_index = 0 ; | |
5280 | - head = page->buffers ; | |
5281 | - bh = head ; | |
5282 | - do { | |
5283 | - next = bh->b_this_page ; | |
5284 | - | |
5285 | - /* we want to unmap the buffers that contain the tail, and | |
5286 | - ** all the buffers after it (since the tail must be at the | |
5287 | - ** end of the file). We don't want to unmap file data | |
5288 | - ** before the tail, since it might be dirty and waiting to | |
5289 | - ** reach disk | |
5290 | - */ | |
5291 | - cur_index += bh->b_size ; | |
5292 | - if (cur_index > tail_index) { | |
5293 | - reiserfs_unmap_buffer(bh) ; | |
5294 | - } | |
5295 | - bh = next ; | |
5296 | - } while (bh != head) ; | |
5297 | - } | |
5298 | - } | |
5299 | -} | |
5300 | - | |
5301 | /* this first locks inode (neither reads nor sync are permitted), | |
5302 | reads tail through page cache, insert direct item. When direct item | |
5303 | inserted successfully inode is left locked. Return value is always | |
5304 | @@ -261,7 +228,7 @@ | |
5305 | set_cpu_key_k_type (&key, TYPE_DIRECT); | |
5306 | key.key_length = 4; | |
5307 | /* Insert tail as new direct item in the tree */ | |
5308 | - if ( reiserfs_insert_item(th, p_s_path, &key, &s_ih, | |
5309 | + if ( reiserfs_insert_item(th, p_s_path, &key, &s_ih, p_s_inode, | |
5310 | tail ? tail : NULL) < 0 ) { | |
5311 | /* No disk memory. So we can not convert last unformatted node | |
5312 | to the direct item. In this case we used to adjust | |
5313 | @@ -274,10 +241,8 @@ | |
5314 | } | |
5315 | kunmap(page) ; | |
5316 | ||
5317 | - /* this will invalidate all the buffers in the page after | |
5318 | - ** pos1 | |
5319 | - */ | |
5320 | - unmap_buffers(page, pos1) ; | |
5321 | + /* make sure to get the i_blocks changes from reiserfs_insert_item */ | |
5322 | + reiserfs_update_sd(th, p_s_inode); | |
5323 | ||
5324 | // note: we have now the same as in above direct2indirect | |
5325 | // conversion: there are two keys which have matching first three | |
5326 | @@ -285,7 +250,6 @@ | |
5327 | ||
5328 | /* We have inserted new direct item and must remove last | |
5329 | unformatted node. */ | |
5330 | - p_s_inode->i_blocks += (p_s_sb->s_blocksize / 512); | |
5331 | *p_c_mode = M_CUT; | |
5332 | ||
5333 | /* we store position of first direct item in the in-core inode */ | |
052932c9 AM |
5334 | diff -urN linux-2.4.22.org/include/linux/fs.h linux-2.4.22/include/linux/fs.h |
5335 | --- linux-2.4.22.org/include/linux/fs.h 2003-11-21 15:08:34.000000000 +0100 | |
5336 | +++ linux-2.4.22/include/linux/fs.h 2003-11-21 15:14:25.000000000 +0100 | |
5337 | @@ -1222,6 +1222,8 @@ | |
e57e653a JR |
5338 | return test_and_set_bit(BH_Dirty, &bh->b_state); |
5339 | } | |
5340 | ||
5341 | +extern void buffer_insert_list_journal_head(struct buffer_head *bh, struct list_head *list, void *journal_head); | |
5342 | + | |
5343 | static inline void mark_buffer_async(struct buffer_head * bh, int on) | |
5344 | { | |
5345 | if (on) | |
052932c9 | 5346 | @@ -1508,6 +1510,7 @@ |
e57e653a JR |
5347 | /* Generic buffer handling for block filesystems.. */ |
5348 | extern int try_to_release_page(struct page * page, int gfp_mask); | |
5349 | extern int discard_bh_page(struct page *, unsigned long, int); | |
5350 | +extern void discard_buffer(struct buffer_head *bh) ; | |
5351 | #define block_flushpage(page, offset) discard_bh_page(page, offset, 1) | |
5352 | #define block_invalidate_page(page) discard_bh_page(page, 0, 0) | |
5353 | extern int block_symlink(struct inode *, const char *, int); | |
052932c9 AM |
5354 | diff -urN linux-2.4.22.org/include/linux/reiserfs_fs.h linux-2.4.22/include/linux/reiserfs_fs.h |
5355 | --- linux-2.4.22.org/include/linux/reiserfs_fs.h 2003-11-21 15:08:34.000000000 +0100 | |
5356 | +++ linux-2.4.22/include/linux/reiserfs_fs.h 2003-11-21 15:14:25.000000000 +0100 | |
e57e653a JR |
5357 | @@ -266,6 +266,7 @@ |
5358 | #define NO_DISK_SPACE -3 | |
5359 | #define NO_BALANCING_NEEDED (-4) | |
5360 | #define NO_MORE_UNUSED_CONTIGUOUS_BLOCKS (-5) | |
5361 | +#define QUOTA_EXCEEDED -6 | |
5362 | ||
5363 | typedef unsigned long b_blocknr_t; | |
5364 | typedef __u32 unp_t; | |
5365 | @@ -1329,8 +1330,7 @@ | |
5366 | #define fs_generation(s) ((s)->u.reiserfs_sb.s_generation_counter) | |
5367 | #define get_generation(s) atomic_read (&fs_generation(s)) | |
5368 | #define FILESYSTEM_CHANGED_TB(tb) (get_generation((tb)->tb_sb) != (tb)->fs_gen) | |
5369 | -#define fs_changed(gen,s) (gen != get_generation (s)) | |
5370 | - | |
5371 | +#define fs_changed(gen,s) (gen != get_generation(s)) | |
5372 | ||
5373 | /***************************************************************************/ | |
5374 | /* FIXATE NODES */ | |
5375 | @@ -1653,6 +1653,86 @@ | |
5376 | /* 12 */ struct journal_params jh_journal; | |
5377 | } ; | |
5378 | ||
5379 | +static inline int | |
5380 | +reiserfs_file_data_log(struct inode *inode) { | |
5381 | + if (reiserfs_data_log(inode->i_sb) || | |
5382 | + (inode->u.reiserfs_i.i_flags & i_data_log)) | |
5383 | + { | |
5384 | + return 1 ; | |
5385 | + } | |
5386 | + return 0 ; | |
5387 | +} | |
5388 | + | |
5389 | +/* flags for the nested transaction handle */ | |
5390 | +#define REISERFS_PERSISTENT_HANDLE 1 | |
5391 | +#define REISERFS_ACTIVE_HANDLE 2 | |
5392 | +#define REISERFS_CLOSE_NESTED 4 | |
5393 | +#define REISERFS_DANGLING_HANDLE 8 | |
5394 | +/* | |
5395 | +** transaction handle which is passed around for all journal calls | |
5396 | +*/ | |
5397 | +struct reiserfs_transaction_handle { | |
5398 | + struct super_block *t_super ; /* super for this FS when journal_begin was | |
5399 | + called. saves calls to reiserfs_get_super | |
5400 | + also used by nested transactions to make | |
5401 | + sure they are nesting on the right FS | |
5402 | + _must_ be first in the handle | |
5403 | + */ | |
5404 | + int t_refcount; | |
5405 | + int t_blocks_logged ; /* number of blocks this writer has logged */ | |
5406 | + int t_blocks_allocated ; /* number of blocks this writer allocated */ | |
5407 | + unsigned long t_trans_id ; /* sanity check, equals the current trans id */ | |
5408 | + int t_flags ; | |
5409 | + void *t_handle_save ; /* save existing current->journal_info */ | |
5410 | + int displace_new_blocks:1; /* if new block allocation occurs, that | |
5411 | + block should be displaced from others */ | |
5412 | +} ; | |
5413 | + | |
5414 | +static inline int | |
5415 | +reiserfs_dangling_handle(struct reiserfs_transaction_handle *th) { | |
5416 | + return (th && (th->t_flags & REISERFS_DANGLING_HANDLE)) ; | |
5417 | +} | |
5418 | + | |
5419 | +static inline void | |
5420 | +reiserfs_set_handle_dangling(struct reiserfs_transaction_handle *th) { | |
5421 | + th->t_flags |= REISERFS_DANGLING_HANDLE ; | |
5422 | +} | |
5423 | + | |
5424 | +static inline void | |
5425 | +reiserfs_clear_handle_dangling(struct reiserfs_transaction_handle *th) { | |
5426 | + th->t_flags &= ~REISERFS_DANGLING_HANDLE ; | |
5427 | +} | |
5428 | + | |
5429 | +static inline int | |
5430 | +reiserfs_persistent_handle(struct reiserfs_transaction_handle *th) { | |
5431 | + return (th && (th->t_flags & REISERFS_PERSISTENT_HANDLE)) ; | |
5432 | +} | |
5433 | + | |
5434 | +static inline void | |
5435 | +reiserfs_set_handle_persistent(struct reiserfs_transaction_handle *th) { | |
5436 | + th->t_flags |= REISERFS_PERSISTENT_HANDLE ; | |
5437 | +} | |
5438 | + | |
5439 | +static inline int | |
5440 | +reiserfs_active_handle(struct reiserfs_transaction_handle *th) { | |
5441 | + return (th && (th->t_flags & REISERFS_ACTIVE_HANDLE)) ; | |
5442 | +} | |
5443 | + | |
5444 | +static inline void | |
5445 | +reiserfs_set_handle_active(struct reiserfs_transaction_handle *th) { | |
5446 | + th->t_flags |= REISERFS_ACTIVE_HANDLE ; | |
5447 | +} | |
5448 | + | |
5449 | +static inline int | |
5450 | +reiserfs_restartable_handle(struct reiserfs_transaction_handle *th) { | |
5451 | + return (th && (th->t_flags & REISERFS_CLOSE_NESTED)) ; | |
5452 | +} | |
5453 | + | |
5454 | +static inline void | |
5455 | +reiserfs_set_handle_restartable(struct reiserfs_transaction_handle *th) { | |
5456 | + th->t_flags |= REISERFS_CLOSE_NESTED ; | |
5457 | +} | |
5458 | + | |
5459 | extern task_queue reiserfs_commit_thread_tq ; | |
5460 | extern wait_queue_head_t reiserfs_commit_thread_wait ; | |
5461 | ||
5462 | @@ -1693,6 +1773,8 @@ | |
5463 | */ | |
5464 | #define JOURNAL_BUFFER(j,n) ((j)->j_ap_blocks[((j)->j_start + (n)) % JOURNAL_BLOCK_COUNT]) | |
5465 | ||
5466 | +int reiserfs_journal_cache_init(void); | |
5467 | +int reiserfs_flush_old_commits(struct super_block *); | |
5468 | void reiserfs_commit_for_inode(struct inode *) ; | |
5469 | void reiserfs_commit_for_tail(struct inode *) ; | |
5470 | void reiserfs_update_inode_transaction(struct inode *) ; | |
5471 | @@ -1701,6 +1783,18 @@ | |
5472 | void reiserfs_block_writes(struct reiserfs_transaction_handle *th) ; | |
5473 | void reiserfs_allow_writes(struct super_block *s) ; | |
5474 | void reiserfs_check_lock_depth(char *caller) ; | |
5475 | +int journal_mark_dirty(struct reiserfs_transaction_handle *, | |
5476 | + struct super_block *, struct buffer_head *bh) ; | |
5477 | + | |
5478 | +static inline int reiserfs_transaction_running(struct super_block *s) { | |
5479 | + struct reiserfs_transaction_handle *th = current->journal_info ; | |
5480 | + if (th && th->t_super == s) | |
5481 | + return 1 ; | |
5482 | + if (th && th->t_super == NULL) | |
5483 | + BUG(); | |
5484 | + return 0 ; | |
5485 | +} | |
5486 | + | |
5487 | void reiserfs_prepare_for_journal(struct super_block *, struct buffer_head *bh, int wait) ; | |
5488 | void reiserfs_restore_prepared_buffer(struct super_block *, struct buffer_head *bh) ; | |
5489 | struct buffer_head * journal_bread (struct super_block *s, int block); | |
5490 | @@ -1716,8 +1810,14 @@ | |
5491 | int push_journal_writer(char *w) ; | |
5492 | int pop_journal_writer(int windex) ; | |
5493 | int journal_transaction_should_end(struct reiserfs_transaction_handle *, int) ; | |
5494 | +int reiserfs_restart_transaction(struct reiserfs_transaction_handle *, int) ; | |
5495 | int reiserfs_in_journal(struct super_block *p_s_sb, kdev_t dev, int bmap_nr, int bit_nr, int size, int searchall, unsigned int *next) ; | |
5496 | int journal_begin(struct reiserfs_transaction_handle *, struct super_block *p_s_sb, unsigned long) ; | |
5497 | + | |
5498 | +/* allocates a transaction handle, and starts a new transaction it */ | |
5499 | +struct reiserfs_transaction_handle * | |
5500 | +reiserfs_persistent_transaction(struct super_block *p_s_sb, unsigned long) ; | |
5501 | + | |
5502 | struct super_block *reiserfs_get_super(kdev_t dev) ; | |
5503 | void flush_async_commits(struct super_block *p_s_sb) ; | |
5504 | ||
5505 | @@ -1833,11 +1933,13 @@ | |
5506 | int reiserfs_insert_item (struct reiserfs_transaction_handle *th, | |
5507 | struct path * path, | |
5508 | const struct cpu_key * key, | |
5509 | - struct item_head * ih, const char * body); | |
5510 | + struct item_head * ih, | |
5511 | + struct inode *inode, const char * body); | |
5512 | ||
5513 | int reiserfs_paste_into_item (struct reiserfs_transaction_handle *th, | |
5514 | struct path * path, | |
5515 | const struct cpu_key * key, | |
5516 | + struct inode *inode, | |
5517 | const char * body, int paste_size); | |
5518 | ||
5519 | int reiserfs_cut_from_item (struct reiserfs_transaction_handle *th, | |
5520 | @@ -1854,7 +1956,7 @@ | |
5521 | struct buffer_head * p_s_un_bh); | |
5522 | ||
5523 | void reiserfs_delete_solid_item (struct reiserfs_transaction_handle *th, | |
5524 | - struct key * key); | |
5525 | + struct inode *inode, struct key * key); | |
5526 | void reiserfs_delete_object (struct reiserfs_transaction_handle *th, struct inode * p_s_inode); | |
5527 | void reiserfs_do_truncate (struct reiserfs_transaction_handle *th, | |
5528 | struct inode * p_s_inode, struct page *, | |
5529 | @@ -1895,8 +1997,18 @@ | |
5530 | int i_size, | |
5531 | struct dentry *dentry, | |
5532 | struct inode *inode); | |
5533 | -int reiserfs_sync_inode (struct reiserfs_transaction_handle *th, struct inode * inode); | |
5534 | -void reiserfs_update_sd (struct reiserfs_transaction_handle *th, struct inode * inode); | |
5535 | + | |
5536 | +int reiserfs_sync_inode (struct reiserfs_transaction_handle *th, | |
5537 | + struct inode * inode); | |
5538 | + | |
5539 | +void reiserfs_update_sd_size (struct reiserfs_transaction_handle *th, | |
5540 | + struct inode * inode, loff_t size); | |
5541 | + | |
5542 | +static inline void reiserfs_update_sd(struct reiserfs_transaction_handle *th, | |
5543 | + struct inode *inode) | |
5544 | +{ | |
5545 | + reiserfs_update_sd_size(th, inode, inode->i_size) ; | |
5546 | +} | |
5547 | ||
5548 | void sd_attrs_to_i_attrs( __u16 sd_attrs, struct inode *inode ); | |
5549 | void i_attrs_to_sd_attrs( struct inode *inode, __u16 *sd_attrs ); | |
5550 | @@ -1981,7 +2093,7 @@ | |
5551 | extern struct inode_operations reiserfs_file_inode_operations; | |
5552 | extern struct file_operations reiserfs_file_operations; | |
5553 | extern struct address_space_operations reiserfs_address_space_operations ; | |
5554 | -int get_new_buffer (struct reiserfs_transaction_handle *th, struct buffer_head *, | |
5555 | +int get_new_buffer (struct reiserfs_transaction_handle *th, struct inode *, struct buffer_head *, | |
5556 | struct buffer_head **, struct path *); | |
5557 | ||
5558 | ||
5559 | @@ -2095,7 +2207,7 @@ | |
5560 | ||
5561 | int reiserfs_parse_alloc_options (struct super_block *, char *); | |
5562 | int is_reusable (struct super_block * s, unsigned long block, int bit_value); | |
5563 | -void reiserfs_free_block (struct reiserfs_transaction_handle *th, unsigned long); | |
5564 | +void reiserfs_free_block (struct reiserfs_transaction_handle *th, struct inode *inode, unsigned long, int); | |
5565 | int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t *, b_blocknr_t * , int, int); | |
5566 | extern inline int reiserfs_new_form_blocknrs (struct tree_balance * tb, | |
5567 | b_blocknr_t *new_blocknrs, int amount_needed) | |
052932c9 AM |
5568 | diff -urN linux-2.4.22.org/include/linux/reiserfs_fs_i.h linux-2.4.22/include/linux/reiserfs_fs_i.h |
5569 | --- linux-2.4.22.org/include/linux/reiserfs_fs_i.h 2003-11-21 15:08:34.000000000 +0100 | |
5570 | +++ linux-2.4.22/include/linux/reiserfs_fs_i.h 2003-11-21 15:14:25.000000000 +0100 | |
e57e653a JR |
5571 | @@ -6,6 +6,8 @@ |
5572 | ||
5573 | #include <linux/list.h> | |
5574 | ||
5575 | +struct reiserfs_journal_list; | |
5576 | + | |
5577 | /** bitmasks for i_flags field in reiserfs-specific part of inode */ | |
5578 | typedef enum { | |
5579 | /** this says what format of key do all items (but stat data) of | |
5580 | @@ -23,7 +25,9 @@ | |
5581 | truncate or unlink. Safe link is used to avoid leakage of disk | |
5582 | space on crash with some files open, but unlinked. */ | |
5583 | i_link_saved_unlink_mask = 0x0010, | |
5584 | - i_link_saved_truncate_mask = 0x0020 | |
5585 | + i_link_saved_truncate_mask = 0x0020, | |
5586 | + /** are we logging data blocks for this file? */ | |
5587 | + i_data_log = 0x0040, | |
5588 | } reiserfs_inode_flags; | |
5589 | ||
5590 | ||
5591 | @@ -52,14 +56,14 @@ | |
5592 | ** needs to be committed in order for this inode to be properly | |
5593 | ** flushed */ | |
5594 | unsigned long i_trans_id ; | |
5595 | - unsigned long i_trans_index ; | |
5596 | + struct reiserfs_journal_list *i_jl; | |
5597 | ||
5598 | /* direct io needs to make sure the tail is on disk to avoid | |
5599 | * buffer alias problems. This records the transaction last | |
5600 | * involved in a direct->indirect conversion for this file | |
5601 | */ | |
5602 | unsigned long i_tail_trans_id; | |
5603 | - unsigned long i_tail_trans_index; | |
5604 | + struct reiserfs_journal_list *i_tail_jl; | |
5605 | }; | |
5606 | ||
5607 | #endif | |
052932c9 AM |
5608 | diff -urN linux-2.4.22.org/include/linux/reiserfs_fs_sb.h linux-2.4.22/include/linux/reiserfs_fs_sb.h |
5609 | --- linux-2.4.22.org/include/linux/reiserfs_fs_sb.h 2003-11-21 15:08:34.000000000 +0100 | |
5610 | +++ linux-2.4.22/include/linux/reiserfs_fs_sb.h 2003-11-21 15:14:25.000000000 +0100 | |
e57e653a JR |
5611 | @@ -120,7 +120,6 @@ |
5612 | #define JOURNAL_MAX_CNODE 1500 /* max cnodes to allocate. */ | |
5613 | #define JOURNAL_HASH_SIZE 8192 | |
5614 | #define JOURNAL_NUM_BITMAPS 5 /* number of copies of the bitmaps to have floating. Must be >= 2 */ | |
5615 | -#define JOURNAL_LIST_COUNT 64 | |
5616 | ||
5617 | /* these are bh_state bit flag offset numbers, for use in the buffer head */ | |
5618 | ||
5619 | @@ -167,20 +166,27 @@ | |
5620 | struct reiserfs_bitmap_node **bitmaps ; | |
5621 | } ; | |
5622 | ||
5623 | -/* | |
5624 | -** transaction handle which is passed around for all journal calls | |
5625 | -*/ | |
5626 | -struct reiserfs_transaction_handle { | |
5627 | - /* ifdef it. -Hans */ | |
5628 | - char *t_caller ; /* debugging use */ | |
5629 | - int t_blocks_logged ; /* number of blocks this writer has logged */ | |
5630 | - int t_blocks_allocated ; /* number of blocks this writer allocated */ | |
5631 | - unsigned long t_trans_id ; /* sanity check, equals the current trans id */ | |
5632 | - struct super_block *t_super ; /* super for this FS when journal_begin was | |
5633 | - called. saves calls to reiserfs_get_super */ | |
5634 | - int displace_new_blocks:1; /* if new block allocation occurres, that block | |
5635 | - should be displaced from others */ | |
5636 | -} ; | |
5637 | +struct reiserfs_journal_list; | |
5638 | + | |
5639 | +/* so, we're using fsync_buffers_list to do the ordered buffer writes, | |
5640 | + * but we don't want to have a full inode on each buffer list, it is | |
5641 | + * a big waste of space. | |
5642 | + * | |
5643 | + * instead we copy the very head of the inode into a list here, a kludge | |
5644 | + * but much smaller. | |
5645 | + */ | |
5646 | +struct reiserfs_inode_list { | |
5647 | + struct list_head i_hash; | |
5648 | + struct list_head i_list; | |
5649 | + struct list_head i_dentry; | |
5650 | + struct list_head i_dirty_buffers; | |
5651 | + | |
5652 | + /* we could be very smart and do math based on the location | |
5653 | + * of the inode list in the journal list struct. | |
5654 | + * lets do that after this works properly | |
5655 | + */ | |
5656 | + struct reiserfs_journal_list *jl; | |
5657 | +}; | |
5658 | ||
5659 | /* | |
5660 | ** one of these for each transaction. The most important part here is the j_realblock. | |
5661 | @@ -190,20 +196,32 @@ | |
5662 | ** to be overwritten */ | |
5663 | struct reiserfs_journal_list { | |
5664 | unsigned long j_start ; | |
5665 | + unsigned long j_state ; | |
5666 | unsigned long j_len ; | |
5667 | atomic_t j_nonzerolen ; | |
5668 | atomic_t j_commit_left ; | |
5669 | - atomic_t j_flushing ; | |
5670 | - atomic_t j_commit_flushing ; | |
5671 | atomic_t j_older_commits_done ; /* all commits older than this on disk*/ | |
5672 | + struct semaphore j_commit_lock ; | |
5673 | unsigned long j_trans_id ; | |
5674 | time_t j_timestamp ; | |
5675 | struct reiserfs_list_bitmap *j_list_bitmap ; | |
5676 | struct buffer_head *j_commit_bh ; /* commit buffer head */ | |
5677 | struct reiserfs_journal_cnode *j_realblock ; | |
5678 | struct reiserfs_journal_cnode *j_freedlist ; /* list of buffers that were freed during this trans. free each of these on flush */ | |
5679 | - wait_queue_head_t j_commit_wait ; /* wait for all the commit blocks to be flushed */ | |
5680 | - wait_queue_head_t j_flush_wait ; /* wait for all the real blocks to be flushed */ | |
5681 | + | |
5682 | + /* time ordered list of all the active transactions */ | |
5683 | + struct list_head j_list; | |
5684 | + | |
5685 | + /* time ordered list of all transactions not touched by kreiserfsd */ | |
5686 | + struct list_head j_working_list; | |
5687 | + | |
5688 | + /* for data=ordered support */ | |
5689 | + struct list_head j_ordered_bh_list; | |
5690 | + | |
5691 | + /* sigh, the tails have slightly different rules for flushing, they | |
5692 | + * need their own list | |
5693 | + */ | |
5694 | + struct list_head j_tail_bh_list; | |
5695 | } ; | |
5696 | ||
5697 | struct reiserfs_page_list ; /* defined in reiserfs_fs.h */ | |
5698 | @@ -230,16 +248,11 @@ | |
5699 | unsigned long j_last_flush_trans_id ; /* last fully flushed journal timestamp */ | |
5700 | struct buffer_head *j_header_bh ; | |
5701 | ||
5702 | - /* j_flush_pages must be flushed before the current transaction can | |
5703 | - ** commit | |
5704 | - */ | |
5705 | - struct reiserfs_page_list *j_flush_pages ; | |
5706 | time_t j_trans_start_time ; /* time this transaction started */ | |
5707 | - wait_queue_head_t j_wait ; /* wait journal_end to finish I/O */ | |
5708 | - atomic_t j_wlock ; /* lock for j_wait */ | |
5709 | + struct semaphore j_lock ; | |
5710 | + struct semaphore j_flush_sem ; | |
5711 | wait_queue_head_t j_join_wait ; /* wait for current transaction to finish before starting new one */ | |
5712 | atomic_t j_jlock ; /* lock for j_join_wait */ | |
5713 | - int j_journal_list_index ; /* journal list number of the current trans */ | |
5714 | int j_list_bitmap_index ; /* number of next list bitmap to use */ | |
5715 | int j_must_wait ; /* no more journal begins allowed. MUST sleep on j_join_wait */ | |
5716 | int j_next_full_flush ; /* next journal_end will flush all journal list */ | |
5717 | @@ -255,13 +268,28 @@ | |
5718 | ||
5719 | struct reiserfs_journal_cnode *j_cnode_free_list ; | |
5720 | struct reiserfs_journal_cnode *j_cnode_free_orig ; /* orig pointer returned from vmalloc */ | |
5721 | + struct reiserfs_journal_list *j_current_jl; | |
5722 | ||
5723 | int j_free_bitmap_nodes ; | |
5724 | int j_used_bitmap_nodes ; | |
5725 | + int j_num_lists; /* total number of active transactions */ | |
5726 | + int j_num_work_lists; /* number that need attention from kreiserfsd */ | |
5727 | + | |
5728 | + /* debugging to make sure things are flushed in order */ | |
5729 | + int j_last_flush_id; | |
5730 | + | |
5731 | + /* debugging to make sure things are committed in order */ | |
5732 | + int j_last_commit_id; | |
5733 | + | |
5734 | struct list_head j_bitmap_nodes ; | |
5735 | - struct list_head j_dirty_buffers ; | |
5736 | + | |
5737 | + /* list of all active transactions */ | |
5738 | + struct list_head j_journal_list; | |
5739 | + | |
5740 | + /* lists that haven't been touched by kreiserfsd */ | |
5741 | + struct list_head j_working_list; | |
5742 | + | |
5743 | struct reiserfs_list_bitmap j_list_bitmap[JOURNAL_NUM_BITMAPS] ; /* array of bitmaps to record the deleted blocks */ | |
5744 | - struct reiserfs_journal_list j_journal_list[JOURNAL_LIST_COUNT] ; /* array of all the journal lists */ | |
5745 | struct reiserfs_journal_cnode *j_hash_table[JOURNAL_HASH_SIZE] ; /* hash table for real buffer heads in current trans */ | |
5746 | struct reiserfs_journal_cnode *j_list_hash_table[JOURNAL_HASH_SIZE] ; /* hash table for all the real buffer heads in all | |
5747 | the transactions */ | |
5748 | @@ -413,6 +441,7 @@ | |
5749 | reiserfs_proc_info_data_t s_proc_info_data; | |
5750 | struct proc_dir_entry *procdir; | |
5751 | int reserved_blocks; /* amount of blocks reserved for further allocations */ | |
5752 | + struct list_head s_reiserfs_supers; | |
5753 | }; | |
5754 | ||
5755 | /* Definitions of reiserfs on-disk properties: */ | |
5756 | @@ -420,11 +449,12 @@ | |
5757 | #define REISERFS_3_6 1 | |
5758 | ||
5759 | /* Mount options */ | |
5760 | -#define REISERFS_LARGETAIL 0 /* large tails will be created in a session */ | |
5761 | -#define REISERFS_SMALLTAIL 17 /* small (for files less than block size) tails will be created in a session */ | |
5762 | -#define REPLAYONLY 3 /* replay journal and return 0. Use by fsck */ | |
5763 | -#define REISERFS_NOLOG 4 /* -o nolog: turn journalling off */ | |
5764 | -#define REISERFS_CONVERT 5 /* -o conv: causes conversion of old | |
5765 | +enum { | |
5766 | + REISERFS_LARGETAIL, /* large tails will be created in a session */ | |
5767 | + REISERFS_SMALLTAIL, /* small (for files less than block size) tails will be created in a session */ | |
5768 | + REPLAYONLY, /* replay journal and return 0. Use by fsck */ | |
5769 | + REISERFS_NOLOG, /* -o nolog: turn journalling off */ | |
5770 | + REISERFS_CONVERT, /* -o conv: causes conversion of old | |
5771 | format super block to the new | |
5772 | format. If not specified - old | |
5773 | partition will be dealt with in a | |
5774 | @@ -438,27 +468,25 @@ | |
5775 | ** the existing hash on the FS, so if you have a tea hash disk, and mount | |
5776 | ** with -o hash=rupasov, the mount will fail. | |
5777 | */ | |
5778 | -#define FORCE_TEA_HASH 6 /* try to force tea hash on mount */ | |
5779 | -#define FORCE_RUPASOV_HASH 7 /* try to force rupasov hash on mount */ | |
5780 | -#define FORCE_R5_HASH 8 /* try to force rupasov hash on mount */ | |
5781 | -#define FORCE_HASH_DETECT 9 /* try to detect hash function on mount */ | |
5782 | + FORCE_TEA_HASH, /* try to force tea hash on mount */ | |
5783 | + FORCE_RUPASOV_HASH, /* try to force rupasov hash on mount */ | |
5784 | + FORCE_R5_HASH, /* try to force rupasov hash on mount */ | |
5785 | + FORCE_HASH_DETECT, /* try to detect hash function on mount */ | |
5786 | ||
5787 | ||
5788 | /* used for testing experimental features, makes benchmarking new | |
5789 | features with and without more convenient, should never be used by | |
5790 | users in any code shipped to users (ideally) */ | |
5791 | ||
5792 | -#define REISERFS_NO_BORDER 11 | |
5793 | -#define REISERFS_NO_UNHASHED_RELOCATION 12 | |
5794 | -#define REISERFS_HASHED_RELOCATION 13 | |
5795 | -#define REISERFS_TEST4 14 | |
5796 | - | |
5797 | -#define REISERFS_TEST1 11 | |
5798 | -#define REISERFS_TEST2 12 | |
5799 | -#define REISERFS_TEST3 13 | |
5800 | -#define REISERFS_TEST4 14 | |
5801 | - | |
5802 | -#define REISERFS_ATTRS (15) | |
5803 | + REISERFS_NO_BORDER, | |
5804 | + REISERFS_NO_UNHASHED_RELOCATION, | |
5805 | + REISERFS_HASHED_RELOCATION, | |
5806 | + REISERFS_DATA_LOG, | |
5807 | + REISERFS_DATA_ORDERED, | |
5808 | + REISERFS_DATA_WRITEBACK, | |
5809 | + REISERFS_ATTRS, | |
5810 | + REISERFS_TEST4, | |
5811 | +}; | |
5812 | ||
5813 | #define reiserfs_r5_hash(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << FORCE_R5_HASH)) | |
5814 | #define reiserfs_rupasov_hash(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << FORCE_RUPASOV_HASH)) | |
5815 | @@ -467,6 +495,9 @@ | |
5816 | #define reiserfs_no_border(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_NO_BORDER)) | |
5817 | #define reiserfs_no_unhashed_relocation(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_NO_UNHASHED_RELOCATION)) | |
5818 | #define reiserfs_hashed_relocation(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_HASHED_RELOCATION)) | |
5819 | +#define reiserfs_data_log(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_DATA_LOG)) | |
5820 | +#define reiserfs_data_ordered(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_DATA_ORDERED)) | |
5821 | +#define reiserfs_data_writeback(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_DATA_WRITEBACK)) | |
5822 | #define reiserfs_test4(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_TEST4)) | |
5823 | ||
5824 | #define have_large_tails(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_LARGETAIL)) | |
5825 | @@ -480,8 +511,6 @@ | |
5826 | ||
5827 | void reiserfs_file_buffer (struct buffer_head * bh, int list); | |
5828 | int reiserfs_is_super(struct super_block *s) ; | |
5829 | -int journal_mark_dirty(struct reiserfs_transaction_handle *, struct super_block *, struct buffer_head *bh) ; | |
5830 | -int flush_old_commits(struct super_block *s, int) ; | |
5831 | int show_reiserfs_locks(void) ; | |
5832 | int reiserfs_resize(struct super_block *, unsigned long) ; | |
5833 | ||
5834 | @@ -492,8 +521,6 @@ | |
5835 | #define SB_BUFFER_WITH_SB(s) ((s)->u.reiserfs_sb.s_sbh) | |
5836 | #define SB_JOURNAL(s) ((s)->u.reiserfs_sb.s_journal) | |
5837 | #define SB_JOURNAL_1st_RESERVED_BLOCK(s) (SB_JOURNAL(s)->j_1st_reserved_block) | |
5838 | -#define SB_JOURNAL_LIST(s) (SB_JOURNAL(s)->j_journal_list) | |
5839 | -#define SB_JOURNAL_LIST_INDEX(s) (SB_JOURNAL(s)->j_journal_list_index) | |
5840 | #define SB_JOURNAL_LEN_FREE(s) (SB_JOURNAL(s)->j_journal_len_free) | |
5841 | #define SB_AP_BITMAP(s) ((s)->u.reiserfs_sb.s_ap_bitmap) | |
5842 | ||
052932c9 AM |
5843 | diff -urN linux-2.4.22.org/kernel/ksyms.c linux-2.4.22/kernel/ksyms.c |
5844 | --- linux-2.4.22.org/kernel/ksyms.c 2003-11-21 15:08:31.000000000 +0100 | |
5845 | +++ linux-2.4.22/kernel/ksyms.c 2003-11-21 15:15:21.000000000 +0100 | |
5846 | @@ -182,6 +182,7 @@ | |
e57e653a | 5847 | EXPORT_SYMBOL(end_buffer_io_async); |
052932c9 AM |
5848 | EXPORT_SYMBOL(__mark_buffer_dirty); |
5849 | EXPORT_SYMBOL(__mark_inode_dirty); | |
5850 | +EXPORT_SYMBOL(discard_buffer); /* for FS flushpage funcs */ | |
5851 | EXPORT_SYMBOL(fd_install); | |
5852 | EXPORT_SYMBOL(get_empty_filp); | |
5853 | EXPORT_SYMBOL(init_private_file); | |
5854 | diff -urN linux-2.4.22.org/mm/filemap.c linux-2.4.22/mm/filemap.c | |
5855 | --- linux-2.4.22.org/mm/filemap.c 2003-11-21 15:08:31.000000000 +0100 | |
5856 | +++ linux-2.4.22/mm/filemap.c 2003-11-21 15:14:25.000000000 +0100 | |
5857 | @@ -3041,6 +3041,14 @@ | |
5858 | } | |
e57e653a JR |
5859 | } |
5860 | ||
5861 | +static void update_inode_times(struct inode *inode) | |
5862 | +{ | |
5863 | + time_t now = CURRENT_TIME; | |
5864 | + if (inode->i_ctime != now || inode->i_mtime != now) { | |
5865 | + inode->i_ctime = inode->i_mtime = now; | |
5866 | + mark_inode_dirty_sync(inode); | |
5867 | + } | |
5868 | +} | |
5869 | /* | |
052932c9 AM |
5870 | * precheck_file_write(): |
5871 | * Check the conditions on a file descriptor prior to beginning a write | |
5872 | @@ -3302,8 +3310,7 @@ | |
e57e653a JR |
5873 | BUG(); |
5874 | ||
5875 | remove_suid(inode); | |
5876 | - inode->i_ctime = inode->i_mtime = CURRENT_TIME; | |
5877 | - mark_inode_dirty_sync(inode); | |
5878 | + update_inode_times(inode); | |
5879 | ||
5880 | written = generic_file_direct_IO(WRITE, file, (char *) buf, count, pos); | |
5881 | if (written > 0) { |