]> git.pld-linux.org Git - packages/kernel.git/blame - linux-2.4.22-data-loging+quota.patch
- obsolete
[packages/kernel.git] / linux-2.4.22-data-loging+quota.patch
CommitLineData
052932c9
AM
1diff -urN linux-2.4.22.org/fs/buffer.c linux-2.4.22/fs/buffer.c
2--- linux-2.4.22.org/fs/buffer.c 2003-11-21 15:08:24.000000000 +0100
3+++ linux-2.4.22/fs/buffer.c 2003-11-21 15:14:23.000000000 +0100
4@@ -659,6 +659,20 @@
e57e653a
JR
5 spin_unlock(&lru_list_lock);
6 }
7
8+void buffer_insert_list_journal_head(struct buffer_head *bh,
9+ struct list_head *list,
10+ void *journal_head)
11+{
12+ spin_lock(&lru_list_lock);
13+ if (buffer_attached(bh))
14+ list_del(&bh->b_inode_buffers);
15+ set_buffer_attached(bh);
16+ list_add(&bh->b_inode_buffers, list);
17+ bh->b_journal_head = journal_head;
18+ spin_unlock(&lru_list_lock);
19+}
20+EXPORT_SYMBOL(buffer_insert_list_journal_head);
21+
22 /*
23 * The caller must have the lru_list lock before calling the
24 * remove_inode_queue functions.
052932c9 25@@ -1370,7 +1384,7 @@
e57e653a
JR
26 /*
27 * Called when truncating a buffer on a page completely.
28 */
29-static void discard_buffer(struct buffer_head * bh)
30+void discard_buffer(struct buffer_head * bh)
31 {
32 if (buffer_mapped(bh) || buffer_delay(bh)) {
33 mark_buffer_clean(bh);
052932c9
AM
34diff -urN linux-2.4.22.org/fs/inode.c linux-2.4.22/fs/inode.c
35--- linux-2.4.22.org/fs/inode.c 2003-11-21 15:08:24.000000000 +0100
36+++ linux-2.4.22/fs/inode.c 2003-11-21 15:14:23.000000000 +0100
37@@ -476,7 +476,7 @@
e57e653a
JR
38 }
39 }
40
41-static void try_to_sync_unused_inodes(void * arg)
42+static void try_to_sync_unused_inodes(void)
43 {
44 struct super_block * sb;
45 int nr_inodes = inodes_stat.nr_unused;
052932c9 46@@ -495,7 +495,8 @@
e57e653a
JR
47 spin_unlock(&inode_lock);
48 }
49
50-static struct tq_struct unused_inodes_flush_task;
51+static DECLARE_WAIT_QUEUE_HEAD(kinoded_wait) ;
52+static atomic_t kinoded_goal = ATOMIC_INIT(0) ;
53
54 /**
55 * write_inode_now - write an inode to disk
052932c9 56@@ -758,7 +759,7 @@
e57e653a
JR
57 !inode_has_buffers(inode))
58 #define INODE(entry) (list_entry(entry, struct inode, i_list))
59
60-void prune_icache(int goal)
61+static void _prune_icache(int goal)
62 {
63 LIST_HEAD(list);
64 struct list_head *entry, *freeable = &list;
052932c9 65@@ -792,35 +793,29 @@
e57e653a
JR
66 spin_unlock(&inode_lock);
67
68 dispose_list(freeable);
69+ kmem_cache_shrink(inode_cachep);
70
71 /*
72- * If we didn't freed enough clean inodes schedule
73- * a sync of the dirty inodes, we cannot do it
74- * from here or we're either synchronously dogslow
75- * or we deadlock with oom.
76+ * If we didn't freed enough clean inodes
77+ * start a sync now
78 */
79 if (goal)
80- schedule_task(&unused_inodes_flush_task);
81+ try_to_sync_unused_inodes();
82+}
83+
84+void prune_icache(int goal) {
85+ atomic_add(goal, &kinoded_goal);
86+ if (atomic_read(&kinoded_goal) > 16) {
87+ wake_up_interruptible(&kinoded_wait);
88+ }
89 }
90
91 int shrink_icache_memory(int priority, int gfp_mask)
92 {
93 int count = 0;
94-
95- /*
96- * Nasty deadlock avoidance..
97- *
98- * We may hold various FS locks, and we don't
99- * want to recurse into the FS that called us
100- * in clear_inode() and friends..
101- */
102- if (!(gfp_mask & __GFP_FS))
103- return 0;
104-
105 count = inodes_stat.nr_unused / priority;
106-
107 prune_icache(count);
108- return kmem_cache_shrink(inode_cachep);
109+ return 0;
110 }
111
112 /*
052932c9 113@@ -1198,6 +1193,34 @@
e57e653a
JR
114 return res;
115 }
116
117+int kinoded(void *startup) {
118+
119+ struct task_struct *tsk = current;
120+ int goal ;
121+
122+ daemonize();
123+ strcpy(tsk->comm, "kinoded");
124+
125+ /* avoid getting signals */
126+ spin_lock_irq(&tsk->sigmask_lock);
127+ flush_signals(tsk);
128+ sigfillset(&tsk->blocked);
129+ recalc_sigpending(tsk);
130+ spin_unlock_irq(&tsk->sigmask_lock);
131+
132+ printk("kinoded started\n") ;
133+ complete((struct completion *)startup);
134+ while(1) {
135+ wait_event_interruptible(kinoded_wait,
136+ atomic_read(&kinoded_goal));
137+ while((goal = atomic_read(&kinoded_goal))) {
138+ _prune_icache(goal);
139+ atomic_sub(goal, &kinoded_goal);
140+ cond_resched();
141+ }
142+ }
143+}
144+
145 /*
146 * Initialize the hash tables.
147 */
052932c9 148@@ -1249,8 +1272,17 @@
e57e653a
JR
149 NULL);
150 if (!inode_cachep)
151 panic("cannot create inode slab cache");
152+}
153
154- unused_inodes_flush_task.routine = try_to_sync_unused_inodes;
155+/* we need to start a thread, and inode_init happens too early for that
156+** to work. So, add a second init func through module_init
157+*/
158+static int __init inode_mod_init(void)
159+{
160+ static struct completion startup __initdata = COMPLETION_INITIALIZER(startup);
161+ kernel_thread(kinoded, &startup, CLONE_FS | CLONE_FILES | CLONE_SIGNAL);
162+ wait_for_completion(&startup);
163+ return 0;
164 }
165
166 /**
052932c9 167@@ -1344,3 +1376,5 @@
e57e653a
JR
168 }
169
170 #endif
171+
172+module_init(inode_mod_init) ;
052932c9
AM
173diff -urN linux-2.4.22.org/fs/reiserfs/bitmap.c linux-2.4.22/fs/reiserfs/bitmap.c
174--- linux-2.4.22.org/fs/reiserfs/bitmap.c 2003-11-21 15:08:29.000000000 +0100
175+++ linux-2.4.22/fs/reiserfs/bitmap.c 2003-11-21 15:14:23.000000000 +0100
e57e653a
JR
176@@ -10,6 +10,7 @@
177 #include <linux/errno.h>
178 #include <linux/locks.h>
179 #include <linux/kernel.h>
180+#include <linux/quotaops.h>
181
182 #include <linux/reiserfs_fs.h>
183 #include <linux/reiserfs_fs_sb.h>
184@@ -287,7 +288,8 @@
185 }
186
187 static void _reiserfs_free_block (struct reiserfs_transaction_handle *th,
188- b_blocknr_t block)
189+ struct inode *inode, b_blocknr_t block,
190+ int for_unformatted)
191 {
192 struct super_block * s = th->t_super;
193 struct reiserfs_super_block * rs;
194@@ -296,7 +298,6 @@
195 int nr, offset;
196
197 PROC_INFO_INC( s, free_block );
198-
199 rs = SB_DISK_SUPER_BLOCK (s);
200 sbh = SB_BUFFER_WITH_SB (s);
201 apbi = SB_AP_BITMAP(s);
202@@ -309,7 +310,6 @@
203 block, bdevname(s->s_dev));
204 return;
205 }
206-
207 reiserfs_prepare_for_journal(s, apbi[nr].bh, 1 ) ;
208
209 /* clear bit for the given block in bit map */
210@@ -329,39 +329,55 @@
211 set_sb_free_blocks( rs, sb_free_blocks(rs) + 1 );
212
213 journal_mark_dirty (th, s, sbh);
214+ if (for_unformatted) {
215+#ifdef REISERQUOTA_DEBUG
216+ printk(KERN_DEBUG "reiserquota: freeing block id=%u\n", inode->i_uid);
217+#endif
218+ DQUOT_FREE_BLOCK_NODIRTY(inode, 1);
219+ }
220+
221 }
222
223 void reiserfs_free_block (struct reiserfs_transaction_handle *th,
224- unsigned long block) {
225+ struct inode *inode, unsigned long block,
226+ int for_unformatted)
227+{
228 struct super_block * s = th->t_super;
229
230 RFALSE(!s, "vs-4061: trying to free block on nonexistent device");
231 RFALSE(is_reusable (s, block, 1) == 0, "vs-4071: can not free such block");
232 /* mark it before we clear it, just in case */
233 journal_mark_freed(th, s, block) ;
234- _reiserfs_free_block(th, block) ;
235+ _reiserfs_free_block(th, inode, block, for_unformatted) ;
236 }
237
238 /* preallocated blocks don't need to be run through journal_mark_freed */
239 void reiserfs_free_prealloc_block (struct reiserfs_transaction_handle *th,
240- unsigned long block) {
241+ struct inode *inode,
242+ unsigned long block)
243+{
244 RFALSE(!th->t_super, "vs-4060: trying to free block on nonexistent device");
245 RFALSE(is_reusable (th->t_super, block, 1) == 0, "vs-4070: can not free such block");
246- _reiserfs_free_block(th, block) ;
247+ _reiserfs_free_block(th, inode, block, 1) ;
248 }
249
250 static void __discard_prealloc (struct reiserfs_transaction_handle * th,
251 struct inode * inode)
252 {
253 unsigned long save = inode->u.reiserfs_i.i_prealloc_block ;
254+ int dirty=0;
255 #ifdef CONFIG_REISERFS_CHECK
256 if (inode->u.reiserfs_i.i_prealloc_count < 0)
257 reiserfs_warning(th->t_super, "zam-4001:%s: inode has negative prealloc blocks count.\n", __FUNCTION__ );
258 #endif
259 while (inode->u.reiserfs_i.i_prealloc_count > 0) {
260- reiserfs_free_prealloc_block(th,inode->u.reiserfs_i.i_prealloc_block);
261+ reiserfs_free_prealloc_block(th, inode, inode->u.reiserfs_i.i_prealloc_block);
262 inode->u.reiserfs_i.i_prealloc_block++;
263 inode->u.reiserfs_i.i_prealloc_count --;
264+ dirty = 1 ;
265+ }
266+ if (dirty) {
267+ reiserfs_update_sd(th, inode) ;
268 }
269 inode->u.reiserfs_i.i_prealloc_block = save ;
270 list_del (&(inode->u.reiserfs_i.i_prealloc_list));
271@@ -599,7 +615,6 @@
272 if (hint->formatted_node || hint->inode == NULL) {
273 return 0;
274 }
275-
276 hash_in = le32_to_cpu((INODE_PKEY(hint->inode))->k_dir_id);
277 border = hint->beg + (unsigned long) keyed_hash(((char *) (&hash_in)), 4) % (hint->end - hint->beg - 1);
278 if (border > hint->search_start)
279@@ -776,6 +791,24 @@
280 int nr_allocated = 0;
281
282 determine_prealloc_size(hint);
283+ if (!hint->formatted_node) {
284+ int quota_ret;
285+#ifdef REISERQUOTA_DEBUG
286+ printk(KERN_DEBUG "reiserquota: allocating %d blocks id=%u\n", amount_needed, hint->inode->i_uid);
287+#endif
288+ quota_ret = DQUOT_ALLOC_BLOCK_NODIRTY(hint->inode, amount_needed);
289+ if (quota_ret) /* Quota exceeded? */
290+ return QUOTA_EXCEEDED;
291+ if (hint->preallocate && hint->prealloc_size ) {
292+#ifdef REISERQUOTA_DEBUG
293+ printk(KERN_DEBUG "reiserquota: allocating (prealloc) %d blocks id=%u\n", hint->prealloc_size, hint->inode->i_uid);
294+#endif
295+ quota_ret = DQUOT_PREALLOC_BLOCK_NODIRTY(hint->inode, hint->prealloc_size);
296+ if (quota_ret)
297+ hint->preallocate=hint->prealloc_size=0;
298+ }
299+ }
300+
301 while((nr_allocated
302 += allocate_without_wrapping_disk(hint, new_blocknrs + nr_allocated, start, finish,
303 amount_needed - nr_allocated, hint->prealloc_size))
304@@ -783,8 +816,14 @@
305
306 /* not all blocks were successfully allocated yet*/
307 if (second_pass) { /* it was a second pass; we must free all blocks */
308+ if (!hint->formatted_node) {
309+#ifdef REISERQUOTA_DEBUG
310+ printk(KERN_DEBUG "reiserquota: freeing (nospace) %d blocks id=%u\n", amount_needed + hint->prealloc_size - nr_allocated, hint->inode->i_uid);
311+#endif
312+ DQUOT_FREE_BLOCK_NODIRTY(hint->inode, amount_needed + hint->prealloc_size - nr_allocated); /* Free not allocated blocks */
313+ }
314 while (nr_allocated --)
315- reiserfs_free_block(hint->th, new_blocknrs[nr_allocated]);
316+ reiserfs_free_block(hint->th, hint->inode, new_blocknrs[nr_allocated], !hint->formatted_node);
317
318 return NO_DISK_SPACE;
319 } else { /* refine search parameters for next pass */
320@@ -794,6 +833,13 @@
321 continue;
322 }
323 }
324+ if ( !hint->formatted_node && amount_needed + hint->prealloc_size > nr_allocated + INODE_INFO(hint->inode)->i_prealloc_count) {
325+ /* Some of preallocation blocks were not allocated */
326+#ifdef REISERQUOTA_DEBUG
327+ printk(KERN_DEBUG "reiserquota: freeing (failed prealloc) %d blocks id=%u\n", amount_needed + hint->prealloc_size - nr_allocated - INODE_INFO(hint->inode)->i_prealloc_count, hint->inode->i_uid);
328+#endif
329+ DQUOT_FREE_BLOCK_NODIRTY(hint->inode, amount_needed + hint->prealloc_size - nr_allocated - INODE_INFO(hint->inode)->i_prealloc_count);
330+ }
331 return CARRY_ON;
332 }
333
334@@ -862,7 +908,7 @@
335
336 if (ret != CARRY_ON) {
337 while (amount_needed ++ < initial_amount_needed) {
338- reiserfs_free_block(hint->th, *(--new_blocknrs));
339+ reiserfs_free_block(hint->th, hint->inode, *(--new_blocknrs), 1);
340 }
341 }
342 return ret;
052932c9
AM
343diff -urN linux-2.4.22.org/fs/reiserfs/do_balan.c linux-2.4.22/fs/reiserfs/do_balan.c
344--- linux-2.4.22.org/fs/reiserfs/do_balan.c 2003-11-21 15:08:29.000000000 +0100
345+++ linux-2.4.22/fs/reiserfs/do_balan.c 2003-11-21 15:14:23.000000000 +0100
e57e653a
JR
346@@ -33,16 +33,8 @@
347 inline void do_balance_mark_leaf_dirty (struct tree_balance * tb,
348 struct buffer_head * bh, int flag)
349 {
350- if (reiserfs_dont_log(tb->tb_sb)) {
351- if (!test_and_set_bit(BH_Dirty, &bh->b_state)) {
352- __mark_buffer_dirty(bh) ;
353- tb->need_balance_dirty = 1;
354- }
355- } else {
356- int windex = push_journal_writer("do_balance") ;
357- journal_mark_dirty(tb->transaction_handle, tb->transaction_handle->t_super, bh) ;
358- pop_journal_writer(windex) ;
359- }
360+ journal_mark_dirty(tb->transaction_handle,
361+ tb->transaction_handle->t_super, bh) ;
362 }
363
364 #define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty
365@@ -1247,7 +1239,7 @@
366 if (buffer_dirty (tb->thrown[i]))
367 reiserfs_warning (tb->tb_sb, "free_thrown deals with dirty buffer %ld\n", blocknr);
368 brelse(tb->thrown[i]) ; /* incremented in store_thrown */
369- reiserfs_free_block (tb->transaction_handle, blocknr);
370+ reiserfs_free_block (tb->transaction_handle, NULL, blocknr, 0);
371 }
372 }
373 }
374@@ -1259,9 +1251,11 @@
375 set_blkh_level( blkh, FREE_LEVEL );
376 set_blkh_nr_item( blkh, 0 );
377
378- mark_buffer_clean (bh);
379+ if (buffer_dirty(bh))
380+ BUG();
381+ // mark_buffer_clean (bh);
382 /* reiserfs_free_block is no longer schedule safe
383- reiserfs_free_block (tb->transaction_handle, tb->tb_sb, bh->b_blocknr);
384+ reiserfs_free_block (tb->transaction_handle, NULL, tb->tb_sb, bh->b_blocknr, 0);
385 */
386
387 store_thrown (tb, bh);
388@@ -1575,6 +1569,7 @@
389 tb->tb_mode = flag;
390 tb->need_balance_dirty = 0;
391
392+ reiserfs_check_lock_depth("do balance");
393 if (FILESYSTEM_CHANGED_TB(tb)) {
394 reiserfs_panic(tb->tb_sb, "clm-6000: do_balance, fs generation has changed\n") ;
395 }
396@@ -1605,5 +1600,6 @@
397
398
399 do_balance_completed (tb);
400+ reiserfs_check_lock_depth("do balance2");
401
402 }
052932c9
AM
403diff -urN linux-2.4.22.org/fs/reiserfs/file.c linux-2.4.22/fs/reiserfs/file.c
404--- linux-2.4.22.org/fs/reiserfs/file.c 2003-11-21 15:08:29.000000000 +0100
405+++ linux-2.4.22/fs/reiserfs/file.c 2003-11-21 15:14:23.000000000 +0100
e57e653a
JR
406@@ -6,6 +6,7 @@
407 #include <linux/sched.h>
408 #include <linux/reiserfs_fs.h>
409 #include <linux/smp_lock.h>
410+#include <linux/quotaops.h>
411
412 /*
413 ** We pack the tails of files on file close, not at the time they are written.
414@@ -42,7 +43,6 @@
415 lock_kernel() ;
416 down (&inode->i_sem);
417 journal_begin(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3) ;
418- reiserfs_update_inode_transaction(inode) ;
419
420 #ifdef REISERFS_PREALLOCATE
421 reiserfs_discard_prealloc (&th, inode);
422@@ -93,7 +93,9 @@
423 static int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) {
424 struct inode *inode = dentry->d_inode ;
425 int error ;
426- if (attr->ia_valid & ATTR_SIZE) {
427+ unsigned int ia_valid = attr->ia_valid ;
428+
429+ if (ia_valid & ATTR_SIZE) {
430 /* version 2 items will be caught by the s_maxbytes check
431 ** done for us in vmtruncate
432 */
433@@ -101,8 +103,17 @@
434 attr->ia_size > MAX_NON_LFS)
435 return -EFBIG ;
436
437+ /* During a truncate, we have to make sure the new i_size is in
438+ ** the transaction before we start dropping updates to data logged
439+ ** or ordered write data pages.
440+ */
441+ if (attr->ia_size < inode->i_size && reiserfs_file_data_log(inode)) {
442+ struct reiserfs_transaction_handle th ;
443+ journal_begin(&th, inode->i_sb, 1) ;
444+ reiserfs_update_sd_size(&th, inode, attr->ia_size) ;
445+ journal_end(&th, inode->i_sb, 1) ;
446 /* fill in hole pointers in the expanding truncate case. */
447- if (attr->ia_size > inode->i_size) {
448+ } else if (attr->ia_size > inode->i_size) {
449 error = generic_cont_expand(inode, attr->ia_size) ;
450 if (inode->u.reiserfs_i.i_prealloc_count > 0) {
451 struct reiserfs_transaction_handle th ;
452@@ -123,15 +134,35 @@
453 return -EINVAL;
454
455 error = inode_change_ok(inode, attr) ;
456- if (!error)
457- inode_setattr(inode, attr) ;
458+ if (!error) {
459+ if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
460+ (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid))
461+ error = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0;
462
463+ if (!error)
464+ error = inode_setattr(inode, attr) ;
465+ }
466 return error ;
467 }
468
469+static ssize_t
470+reiserfs_file_write(struct file *f, const char *b, size_t count, loff_t *ppos)
471+{
472+ ssize_t ret;
473+ struct inode *inode = f->f_dentry->d_inode;
474+
475+ ret = generic_file_write(f, b, count, ppos);
476+ if (ret >= 0 && f->f_flags & O_SYNC) {
477+ lock_kernel();
478+ reiserfs_commit_for_inode(inode);
479+ unlock_kernel();
480+ }
481+ return ret;
482+}
483+
484 struct file_operations reiserfs_file_operations = {
485 read: generic_file_read,
486- write: generic_file_write,
487+ write: reiserfs_file_write,
488 ioctl: reiserfs_ioctl,
489 mmap: generic_file_mmap,
490 release: reiserfs_file_release,
052932c9
AM
491diff -urN linux-2.4.22.org/fs/reiserfs/fix_node.c linux-2.4.22/fs/reiserfs/fix_node.c
492--- linux-2.4.22.org/fs/reiserfs/fix_node.c 2003-11-21 15:08:29.000000000 +0100
493+++ linux-2.4.22/fs/reiserfs/fix_node.c 2003-11-21 15:14:23.000000000 +0100
e57e653a
JR
494@@ -795,8 +795,9 @@
495 else /* If we have enough already then there is nothing to do. */
496 return CARRY_ON;
497
498- if ( reiserfs_new_form_blocknrs (p_s_tb, a_n_blocknrs,
499- n_amount_needed) == NO_DISK_SPACE )
500+ /* No need to check quota - is not allocated for blocks used for formatted nodes */
501+ if (reiserfs_new_form_blocknrs (p_s_tb, a_n_blocknrs,
502+ n_amount_needed) == NO_DISK_SPACE)
503 return NO_DISK_SPACE;
504
505 /* for each blocknumber we just got, get a buffer and stick it on FEB */
506@@ -2121,7 +2122,8 @@
507
508 static void clear_all_dirty_bits(struct super_block *s,
509 struct buffer_head *bh) {
510- reiserfs_prepare_for_journal(s, bh, 0) ;
511+ // reiserfs_prepare_for_journal(s, bh, 0) ;
512+ set_bit(BH_JPrepared, &bh->b_state) ;
513 }
514
515 static int wait_tb_buffers_until_unlocked (struct tree_balance * p_s_tb)
516@@ -2518,7 +2520,7 @@
517 /* de-allocated block which was not used by balancing and
518 bforget about buffer for it */
519 brelse (tb->FEB[i]);
520- reiserfs_free_block (tb->transaction_handle, blocknr);
521+ reiserfs_free_block (tb->transaction_handle, NULL, blocknr, 0);
522 }
523 if (tb->used[i]) {
524 /* release used as new nodes including a new root */
052932c9
AM
525diff -urN linux-2.4.22.org/fs/reiserfs/ibalance.c linux-2.4.22/fs/reiserfs/ibalance.c
526--- linux-2.4.22.org/fs/reiserfs/ibalance.c 2003-11-21 15:08:29.000000000 +0100
527+++ linux-2.4.22/fs/reiserfs/ibalance.c 2003-11-21 15:14:23.000000000 +0100
e57e653a
JR
528@@ -632,7 +632,6 @@
529 /* use check_internal if new root is an internal node */
530 check_internal (new_root);
531 /*&&&&&&&&&&&&&&&&&&&&&&*/
532- tb->tb_sb->s_dirt = 1;
533
534 /* do what is needed for buffer thrown from tree */
535 reiserfs_invalidate_buffer(tb, tbSh);
536@@ -950,7 +949,6 @@
537 PUT_SB_ROOT_BLOCK( tb->tb_sb, tbSh->b_blocknr );
538 PUT_SB_TREE_HEIGHT( tb->tb_sb, SB_TREE_HEIGHT(tb->tb_sb) + 1 );
539 do_balance_mark_sb_dirty (tb, tb->tb_sb->u.reiserfs_sb.s_sbh, 1);
540- tb->tb_sb->s_dirt = 1;
541 }
542
543 if ( tb->blknum[h] == 2 ) {
052932c9
AM
544diff -urN linux-2.4.22.org/fs/reiserfs/inode.c linux-2.4.22/fs/reiserfs/inode.c
545--- linux-2.4.22.org/fs/reiserfs/inode.c 2003-11-21 15:08:29.000000000 +0100
546+++ linux-2.4.22/fs/reiserfs/inode.c 2003-11-21 15:14:23.000000000 +0100
e57e653a
JR
547@@ -4,9 +4,11 @@
548
549 #include <linux/config.h>
550 #include <linux/sched.h>
551+#include <linux/fs.h>
552 #include <linux/reiserfs_fs.h>
553 #include <linux/locks.h>
554 #include <linux/smp_lock.h>
555+#include <linux/quotaops.h>
556 #include <asm/uaccess.h>
557 #include <asm/unaligned.h>
558
559@@ -17,6 +19,8 @@
560 #define GET_BLOCK_READ_DIRECT 4 /* read the tail if indirect item not found */
561 #define GET_BLOCK_NO_ISEM 8 /* i_sem is not held, don't preallocate */
562
563+static int reiserfs_commit_write(struct file *, struct page *,
564+ unsigned from, unsigned to) ;
565 static int reiserfs_get_block (struct inode * inode, long block,
566 struct buffer_head * bh_result, int create);
567
568@@ -33,6 +37,7 @@
569
570 lock_kernel() ;
571
572+ DQUOT_FREE_INODE(inode);
573 /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */
574 if (INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */
575 down (&inode->i_sem);
576@@ -106,9 +111,13 @@
577 }
578
579 static void add_to_flushlist(struct inode *inode, struct buffer_head *bh) {
580- struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb) ;
581+ struct reiserfs_journal_list *jl = SB_JOURNAL(inode->i_sb)->j_current_jl;
582+ buffer_insert_list_journal_head(bh, &jl->j_ordered_bh_list, jl);
583+}
584
585- buffer_insert_list(bh, &j->j_dirty_buffers) ;
586+static void add_to_tail_list(struct inode *inode, struct buffer_head *bh) {
587+ struct reiserfs_journal_list *jl = SB_JOURNAL(inode->i_sb)->j_current_jl;
588+ buffer_insert_list_journal_head(bh, &jl->j_tail_bh_list, jl);
589 }
590
591 //
592@@ -201,15 +210,16 @@
593 return 0;
594 }
595
596-/*static*/ void restart_transaction(struct reiserfs_transaction_handle *th,
597- struct inode *inode, struct path *path) {
598- struct super_block *s = th->t_super ;
599- int len = th->t_blocks_allocated ;
600-
601+static void restart_transaction(struct reiserfs_transaction_handle *th,
602+ struct inode *inode, struct path *path,
603+ int jbegin_count) {
604+ /* we cannot restart while nested unless the parent allows it */
605+ if (!reiserfs_restartable_handle(th) && th->t_refcount > 1) {
606+ return ;
607+ }
608 pathrelse(path) ;
609 reiserfs_update_sd(th, inode) ;
610- journal_end(th, s, len) ;
611- journal_begin(th, s, len) ;
612+ reiserfs_restart_transaction(th, jbegin_count) ;
613 reiserfs_update_inode_transaction(inode) ;
614 }
615
616@@ -327,6 +337,10 @@
617 }
618 }
619 p += offset ;
620+ if ((offset + inode->i_sb->s_blocksize) > PAGE_CACHE_SIZE) {
621+printk("get_block_create_0 offset %lu too large\n", offset);
622+ }
623+
624 memset (p, 0, inode->i_sb->s_blocksize);
625 do {
626 if (!is_direct_le_ih (ih)) {
627@@ -421,10 +435,32 @@
628 static int reiserfs_get_block_direct_io (struct inode * inode, long block,
629 struct buffer_head * bh_result, int create) {
630 int ret ;
631-
632+ struct reiserfs_transaction_handle *th;
633+ int refcount = 0;
634+ struct super_block *s = inode->i_sb;
635+
636+ /* get_block might start a new transaction and leave it running.
637+ * test for that by checking for a transaction running right now
638+ * and recording its refcount. Run a journal_end if the refcount
639+ * after reiserfs_get_block is higher than it was before.
640+ */
641+ if (reiserfs_transaction_running(s)) {
642+ th = current->journal_info;
643+ refcount = th->t_refcount;
644+ }
645 bh_result->b_page = NULL;
646 ret = reiserfs_get_block(inode, block, bh_result, create) ;
647
648+ if (!ret && reiserfs_transaction_running(s)) {
649+ th = current->journal_info;
650+ if (th->t_refcount > refcount) {
651+ lock_kernel();
652+ reiserfs_update_sd(th, inode) ;
653+ journal_end(th, s, th->t_blocks_allocated);
654+ unlock_kernel();
655+ }
656+ }
657+
658 /* don't allow direct io onto tail pages */
659 if (ret == 0 && buffer_mapped(bh_result) && bh_result->b_blocknr == 0) {
660 /* make sure future calls to the direct io funcs for this offset
661@@ -459,7 +495,6 @@
662 struct buffer_head *bh_result,
663 loff_t tail_offset) {
664 unsigned long index ;
665- unsigned long tail_end ;
666 unsigned long tail_start ;
667 struct page * tail_page ;
668 struct page * hole_page = bh_result->b_page ;
669@@ -470,7 +505,6 @@
670
671 /* always try to read until the end of the block */
672 tail_start = tail_offset & (PAGE_CACHE_SIZE - 1) ;
673- tail_end = (tail_start | (bh_result->b_size - 1)) + 1 ;
674
675 index = tail_offset >> PAGE_CACHE_SHIFT ;
676 if ( !hole_page || index != hole_page->index) {
677@@ -492,16 +526,13 @@
678 ** data that has been read directly into the page, and block_prepare_write
679 ** won't trigger a get_block in this case.
680 */
681- fix_tail_page_for_writing(tail_page) ;
682- retval = block_prepare_write(tail_page, tail_start, tail_end,
683- reiserfs_get_block) ;
684+ retval = reiserfs_prepare_write(NULL, tail_page, tail_start, tail_start) ;
685 if (retval)
686 goto unlock ;
687
688 /* tail conversion might change the data in the page */
689 flush_dcache_page(tail_page) ;
690-
691- retval = generic_commit_write(NULL, tail_page, tail_start, tail_end) ;
692+ retval = reiserfs_commit_write(NULL, tail_page, tail_start, tail_start) ;
693
694 unlock:
695 if (tail_page != hole_page) {
696@@ -541,20 +572,34 @@
697 int done;
698 int fs_gen;
699 int windex ;
700- struct reiserfs_transaction_handle th ;
701+ struct reiserfs_transaction_handle *th = NULL ;
702 /* space reserved in transaction batch:
703 . 3 balancings in direct->indirect conversion
704 . 1 block involved into reiserfs_update_sd()
705+ . 1 bitmap block
706 XXX in practically impossible worst case direct2indirect()
707- can incur (much) more that 3 balancings. */
708- int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 1;
709+ can incur (much) more that 3 balancings, but we deal with
710+ direct2indirect lower down */
711+ int jbegin_count = JOURNAL_PER_BALANCE_CNT + 2;
712 int version;
713- int transaction_started = 0 ;
714+ int dangle = 1;
715 loff_t new_offset = (((loff_t)block) << inode->i_sb->s_blocksize_bits) + 1 ;
716+ int blocks_per_page = PAGE_CACHE_SIZE >> inode->i_blkbits;
717
718- /* bad.... */
719+ /* if this block might contain a tail, we need to be more conservative */
720+ if (new_offset <= (loff_t)(16 * 1024)) {
721+ jbegin_count += JOURNAL_PER_BALANCE_CNT * 2;
722+ }
723+ /* we might nest for the entire page, so we need to make sure
724+ * to reserve enough to insert pointers in the tree for each block
725+ * in the file
726+ */
727+ jbegin_count *= blocks_per_page;
728+ if (reiserfs_file_data_log(inode)) {
729+ jbegin_count += blocks_per_page;
730+
731+ }
732 lock_kernel() ;
733- th.t_trans_id = 0 ;
734 version = get_inode_item_key_version (inode);
735
736 if (block < 0) {
737@@ -579,6 +624,10 @@
738 return ret;
739 }
740
741+ /* don't leave the trans running if we are already nested */
742+ if (reiserfs_transaction_running(inode->i_sb))
743+ dangle = 0;
744+
745 /* If file is of such a size, that it might have a tail and tails are enabled
746 ** we should mark it as possibly needing tail packing on close
747 */
748@@ -591,10 +640,18 @@
749 /* set the key of the first byte in the 'block'-th block of file */
750 make_cpu_key (&key, inode, new_offset,
751 TYPE_ANY, 3/*key length*/);
752+
753+ /* reiserfs_commit_write will close any transaction currently
754+ ** running. So, if we are nesting into someone else, we have to
755+ ** make sure and bump the refcount
756+ */
757 if ((new_offset + inode->i_sb->s_blocksize - 1) > inode->i_size) {
758- journal_begin(&th, inode->i_sb, jbegin_count) ;
759+ th = reiserfs_persistent_transaction(inode->i_sb, jbegin_count) ;
760+ if (IS_ERR(th)) {
761+ retval = PTR_ERR(th) ;
762+ goto failure ;
763+ }
764 reiserfs_update_inode_transaction(inode) ;
765- transaction_started = 1 ;
766 }
767 research:
768
769@@ -614,28 +671,34 @@
770
771 if (allocation_needed (retval, allocated_block_nr, ih, item, pos_in_item)) {
772 /* we have to allocate block for the unformatted node */
773- if (!transaction_started) {
774+ if (!reiserfs_active_handle(th)) {
775 pathrelse(&path) ;
776- journal_begin(&th, inode->i_sb, jbegin_count) ;
777+ th = reiserfs_persistent_transaction(inode->i_sb, jbegin_count) ;
778+ if (IS_ERR(th)) {
779+ retval = PTR_ERR(th) ;
780+ goto failure ;
781+ }
782 reiserfs_update_inode_transaction(inode) ;
783- transaction_started = 1 ;
784 goto research ;
785 }
786
787- repeat = _allocate_block(&th, block, inode, &allocated_block_nr, &path, create);
788+ repeat = _allocate_block(th, block, inode, &allocated_block_nr, &path, create);
789
790- if (repeat == NO_DISK_SPACE) {
791+ if (repeat == NO_DISK_SPACE || repeat == QUOTA_EXCEEDED) {
792 /* restart the transaction to give the journal a chance to free
793 ** some blocks. releases the path, so we have to go back to
794 ** research if we succeed on the second try
795 */
796- restart_transaction(&th, inode, &path) ;
797- repeat = _allocate_block(&th, block, inode, &allocated_block_nr, NULL, create);
798+ restart_transaction(th, inode, &path, jbegin_count) ;
799+ repeat = _allocate_block(th, block, inode, &allocated_block_nr, NULL, create);
800
801- if (repeat != NO_DISK_SPACE) {
802+ if (repeat != NO_DISK_SPACE && repeat != QUOTA_EXCEEDED) {
803 goto research ;
804 }
805- retval = -ENOSPC;
806+ if (repeat == QUOTA_EXCEEDED)
807+ retval = -EDQUOT;
808+ else
809+ retval = -ENOSPC;
810 goto failure;
811 }
812
813@@ -660,15 +723,12 @@
814 bh_result->b_state |= (1UL << BH_New);
815 put_block_num(item, pos_in_item, allocated_block_nr) ;
816 unfm_ptr = allocated_block_nr;
817- journal_mark_dirty (&th, inode->i_sb, bh);
818- inode->i_blocks += (inode->i_sb->s_blocksize / 512) ;
819- reiserfs_update_sd(&th, inode) ;
820+ journal_mark_dirty (th, inode->i_sb, bh);
821+ reiserfs_update_sd(th, inode) ;
822 }
823 set_block_dev_mapped(bh_result, unfm_ptr, inode);
824 pathrelse (&path);
825 pop_journal_writer(windex) ;
826- if (transaction_started)
827- journal_end(&th, inode->i_sb, jbegin_count) ;
828
829 unlock_kernel() ;
830
831@@ -676,18 +736,23 @@
832 ** there is no need to make sure the inode is updated with this
833 ** transaction
834 */
835+ if (!dangle && reiserfs_active_handle(th))
836+ journal_end(th, inode->i_sb, jbegin_count) ;
837 return 0;
838 }
839
840- if (!transaction_started) {
841+ if (!reiserfs_active_handle(th)) {
842 /* if we don't pathrelse, we could vs-3050 on the buffer if
843 ** someone is waiting for it (they can't finish until the buffer
844- ** is released, we can start a new transaction until they finish)
845+ ** is released, we can't start a new transaction until they finish)
846 */
847 pathrelse(&path) ;
848- journal_begin(&th, inode->i_sb, jbegin_count) ;
849+ th = reiserfs_persistent_transaction(inode->i_sb, jbegin_count) ;
850+ if (IS_ERR(th)) {
851+ retval = PTR_ERR(th) ;
852+ goto failure ;
853+ }
854 reiserfs_update_inode_transaction(inode) ;
855- transaction_started = 1 ;
856 goto research;
857 }
858
859@@ -716,13 +781,11 @@
860 set_cpu_key_k_offset (&tmp_key, 1);
861 PATH_LAST_POSITION(&path) ++;
862
863- retval = reiserfs_insert_item (&th, &path, &tmp_key, &tmp_ih, (char *)&unp);
864+ retval = reiserfs_insert_item (th, &path, &tmp_key, &tmp_ih, inode, (char *)&unp);
865 if (retval) {
866- reiserfs_free_block (&th, allocated_block_nr);
867- goto failure; // retval == -ENOSPC or -EIO or -EEXIST
868+ reiserfs_free_block (th, inode, allocated_block_nr, 1);
869+ goto failure; // retval == -ENOSPC, -EDQUOT or -EIO or -EEXIST
870 }
871- if (unp)
872- inode->i_blocks += inode->i_sb->s_blocksize / 512;
873 //mark_tail_converted (inode);
874 } else if (is_direct_le_ih (ih)) {
875 /* direct item has to be converted */
876@@ -742,8 +805,13 @@
877 node. FIXME: this should also get into page cache */
878
879 pathrelse(&path) ;
880- journal_end(&th, inode->i_sb, jbegin_count) ;
881- transaction_started = 0 ;
882+ /* ugly, but we should only end the transaction if
883+ ** we aren't nested
884+ */
885+ if (th->t_refcount == 1) {
886+ journal_end(th, inode->i_sb, jbegin_count) ;
887+ th = NULL ;
888+ }
889
890 retval = convert_tail_for_hole(inode, bh_result, tail_offset) ;
891 if (retval) {
892@@ -751,20 +819,27 @@
893 reiserfs_warning(inode->i_sb, "clm-6004: convert tail failed inode %lu, error %d\n", inode->i_ino, retval) ;
894 if (allocated_block_nr) {
895 /* the bitmap, the super, and the stat data == 3 */
896- journal_begin(&th, inode->i_sb, 3) ;
897- reiserfs_free_block (&th, allocated_block_nr);
898- transaction_started = 1 ;
899+ if (!reiserfs_active_handle(th)) {
900+ th = reiserfs_persistent_transaction(inode->i_sb,3);
901+ }
902+ if (!IS_ERR(th)) {
903+ reiserfs_free_block(th,inode,allocated_block_nr,1);
904+ }
905+
906 }
907 goto failure ;
908 }
909 goto research ;
910 }
911- retval = direct2indirect (&th, inode, &path, unbh, tail_offset);
912+ retval = direct2indirect (th, inode, &path, unbh, tail_offset);
913 if (retval) {
914 reiserfs_unmap_buffer(unbh);
915- reiserfs_free_block (&th, allocated_block_nr);
916+ reiserfs_free_block (th, inode, allocated_block_nr, 1);
917 goto failure;
918 }
919+
920+ reiserfs_update_sd(th, inode) ;
921+
922 /* it is important the mark_buffer_uptodate is done after
923 ** the direct2indirect. The buffer might contain valid
924 ** data newer than the data on disk (read by readpage, changed,
925@@ -775,24 +850,25 @@
926 */
927 mark_buffer_uptodate (unbh, 1);
928
929- /* unbh->b_page == NULL in case of DIRECT_IO request, this means
930- buffer will disappear shortly, so it should not be added to
931- any of our lists.
932+ /* we've converted the tail, so we must
933+ ** flush unbh before the transaction commits.
934+ ** unbh->b_page will be NULL for direct io requests, and
935+ ** in that case there's no data to log, dirty or order
936 */
937 if ( unbh->b_page ) {
938- /* we've converted the tail, so we must
939- ** flush unbh before the transaction commits
940- */
941- add_to_flushlist(inode, unbh) ;
942-
943- /* mark it dirty now to prevent commit_write from adding
944- ** this buffer to the inode's dirty buffer list
945- */
946- __mark_buffer_dirty(unbh) ;
947+ if (reiserfs_file_data_log(inode)) {
948+ reiserfs_prepare_for_journal(inode->i_sb, unbh, 1) ;
949+ journal_mark_dirty(th, inode->i_sb, unbh) ;
950+ } else {
951+ /* mark it dirty now to prevent commit_write from adding
952+ ** this buffer to the inode's dirty buffer list
953+ */
954+ __mark_buffer_dirty(unbh) ;
955+ /* note, this covers the data=ordered case too */
956+ add_to_tail_list(inode, unbh) ;
957+ }
958 }
959
960- //inode->i_blocks += inode->i_sb->s_blocksize / 512;
961- //mark_tail_converted (inode);
962 } else {
963 /* append indirect item with holes if needed, when appending
964 pointer to 'block'-th block use block, which is already
965@@ -840,18 +916,16 @@
966 only have space for one block */
967 blocks_needed=max_to_insert?max_to_insert:1;
968 }
969- retval = reiserfs_paste_into_item (&th, &path, &tmp_key, (char *)un, UNFM_P_SIZE * blocks_needed);
970+ retval = reiserfs_paste_into_item (th, &path, &tmp_key, inode, (char *)un, UNFM_P_SIZE * blocks_needed);
971
972 if (blocks_needed != 1)
973 kfree(un);
974
975 if (retval) {
976- reiserfs_free_block (&th, allocated_block_nr);
977+ reiserfs_free_block (th, inode, allocated_block_nr, 1);
978 goto failure;
979 }
980- if (done) {
981- inode->i_blocks += inode->i_sb->s_blocksize / 512;
982- } else {
983+ if (!done) {
984 /* We need to mark new file size in case this function will be
985 interrupted/aborted later on. And we may do this only for
986 holes. */
987@@ -870,9 +944,12 @@
988 **
989 ** release the path so that anybody waiting on the path before
990 ** ending their transaction will be able to continue.
991+ **
992+ ** this only happens when inserting holes into the file, so it
993+ ** does not affect data=ordered safety at all
994 */
995- if (journal_transaction_should_end(&th, th.t_blocks_allocated)) {
996- restart_transaction(&th, inode, &path) ;
997+ if (journal_transaction_should_end(th, jbegin_count)) {
998+ restart_transaction(th, inode, &path, jbegin_count) ;
999 }
1000 /* inserting indirect pointers for a hole can take a
1001 ** long time. reschedule if needed
1002@@ -890,7 +967,7 @@
1003 "%K should not be found\n", &key);
1004 retval = -EEXIST;
1005 if (allocated_block_nr)
1006- reiserfs_free_block (&th, allocated_block_nr);
1007+ reiserfs_free_block (th, inode, allocated_block_nr, 1);
1008 pathrelse(&path) ;
1009 goto failure;
1010 }
1011@@ -902,20 +979,82 @@
1012
1013
1014 retval = 0;
1015- reiserfs_check_path(&path) ;
1016
1017 failure:
1018- if (transaction_started) {
1019- reiserfs_update_sd(&th, inode) ;
1020- journal_end(&th, inode->i_sb, jbegin_count) ;
1021+ pathrelse(&path) ;
1022+ /* if we had an error, end the transaction */
1023+ if (!IS_ERR(th) && reiserfs_active_handle(th)) {
1024+ if (retval != 0) {
1025+ reiserfs_update_sd(th, inode) ;
1026+ journal_end(th, inode->i_sb, jbegin_count) ;
1027+ th = NULL ;
1028+ } else if (!dangle) {
1029+ journal_end(th, inode->i_sb, jbegin_count) ;
1030+ th = NULL ;
1031+ }
1032 }
1033 pop_journal_writer(windex) ;
1034+ if (retval == 0 && reiserfs_active_handle(th) &&
1035+ current->journal_info != th) {
1036+ BUG() ;
1037+ }
1038 unlock_kernel() ;
1039- reiserfs_check_path(&path) ;
1040 return retval;
1041 }
1042
1043
1044+/* Compute real number of used bytes by file
1045+ * Following three functions can go away when we'll have enough space in stat item
1046+ */
1047+static int real_space_diff(struct inode *inode, int sd_size)
1048+{
1049+ int bytes;
1050+ loff_t blocksize = inode->i_sb->s_blocksize ;
1051+
1052+ if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode))
1053+ return sd_size ;
1054+
1055+ /* End of file is also in full block with indirect reference, so round
1056+ ** up to the next block.
1057+ **
1058+ ** there is just no way to know if the tail is actually packed
1059+ ** on the file, so we have to assume it isn't. When we pack the
1060+ ** tail, we add 4 bytes to pretend there really is an unformatted
1061+ ** node pointer
1062+ */
1063+ bytes = ((inode->i_size + (blocksize-1)) >> inode->i_sb->s_blocksize_bits) * UNFM_P_SIZE + sd_size;
1064+ return bytes ;
1065+}
1066+
1067+static inline loff_t to_real_used_space(struct inode *inode, ulong blocks,
1068+ int sd_size)
1069+{
1070+ if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) {
1071+ return inode->i_size + (loff_t)(real_space_diff(inode, sd_size)) ;
1072+ }
1073+ return ((loff_t)real_space_diff(inode, sd_size)) + (((loff_t)blocks) << 9);
1074+}
1075+
1076+/* Compute number of blocks used by file in ReiserFS counting */
1077+static inline ulong to_fake_used_blocks(struct inode *inode, int sd_size)
1078+{
1079+ loff_t bytes = inode_get_bytes(inode) ;
1080+ loff_t real_space = real_space_diff(inode, sd_size) ;
1081+
1082+ /* keeps fsck and non-quota versions of reiserfs happy */
1083+ if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) {
1084+ bytes += (loff_t)511 ;
1085+ }
1086+
1087+ /* files from before the quota patch might i_blocks such that
1088+ ** bytes < real_space. Deal with that here to prevent it from
1089+ ** going negative.
1090+ */
1091+ if (bytes < real_space)
1092+ return 0 ;
1093+ return (bytes - real_space) >> 9;
1094+}
1095+
1096 //
1097 // BAD: new directories have stat data of new type and all other items
1098 // of old type. Version stored in the inode says about body items, so
1099@@ -971,6 +1110,14 @@
1100
1101 rdev = sd_v1_rdev(sd);
1102 inode->u.reiserfs_i.i_first_direct_byte = sd_v1_first_direct_byte(sd);
1103+ /* an early bug in the quota code can give us an odd number for the
1104+ ** block count. This is incorrect, fix it here.
1105+ */
1106+ if (inode->i_blocks & 1) {
1107+ inode->i_blocks++ ;
1108+ }
1109+ inode_set_bytes(inode, to_real_used_space(inode, inode->i_blocks,
1110+ SD_V1_SIZE));
1111 /* nopack is initially zero for v1 objects. For v2 objects,
1112 nopack is initialised from sd_attrs */
1113 inode->u.reiserfs_i.i_flags &= ~i_nopack_mask;
1114@@ -1000,6 +1147,8 @@
1115 set_inode_item_key_version (inode, KEY_FORMAT_3_6);
1116
1117 set_inode_sd_version (inode, STAT_DATA_V2);
1118+ inode_set_bytes(inode, to_real_used_space(inode, inode->i_blocks,
1119+ SD_V2_SIZE));
1120 /* read persistent inode attributes from sd and initalise
1121 generic inode flags from them */
1122 inode -> u.reiserfs_i.i_attrs = sd_v2_attrs( sd );
1123@@ -1026,7 +1175,7 @@
1124
1125
1126 // update new stat data with inode fields
1127-static void inode2sd (void * sd, struct inode * inode)
1128+static void inode2sd (void * sd, struct inode * inode, loff_t new_size)
1129 {
1130 struct stat_data * sd_v2 = (struct stat_data *)sd;
1131 __u16 flags;
1132@@ -1034,12 +1183,12 @@
1133 set_sd_v2_mode(sd_v2, inode->i_mode );
1134 set_sd_v2_nlink(sd_v2, inode->i_nlink );
1135 set_sd_v2_uid(sd_v2, inode->i_uid );
1136- set_sd_v2_size(sd_v2, inode->i_size );
1137+ set_sd_v2_size(sd_v2, new_size);
1138 set_sd_v2_gid(sd_v2, inode->i_gid );
1139 set_sd_v2_mtime(sd_v2, inode->i_mtime );
1140 set_sd_v2_atime(sd_v2, inode->i_atime );
1141 set_sd_v2_ctime(sd_v2, inode->i_ctime );
1142- set_sd_v2_blocks(sd_v2, inode->i_blocks );
1143+ set_sd_v2_blocks(sd_v2, to_fake_used_blocks(inode, SD_V2_SIZE));
1144 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
1145 set_sd_v2_rdev(sd_v2, inode->i_rdev );
1146 else
1147@@ -1051,7 +1200,7 @@
1148
1149
1150 // used to copy inode's fields to old stat data
1151-static void inode2sd_v1 (void * sd, struct inode * inode)
1152+static void inode2sd_v1 (void * sd, struct inode * inode, loff_t new_size)
1153 {
1154 struct stat_data_v1 * sd_v1 = (struct stat_data_v1 *)sd;
1155
1156@@ -1059,7 +1208,7 @@
1157 set_sd_v1_uid(sd_v1, inode->i_uid );
1158 set_sd_v1_gid(sd_v1, inode->i_gid );
1159 set_sd_v1_nlink(sd_v1, inode->i_nlink );
1160- set_sd_v1_size(sd_v1, inode->i_size );
1161+ set_sd_v1_size(sd_v1, new_size);
1162 set_sd_v1_atime(sd_v1, inode->i_atime );
1163 set_sd_v1_ctime(sd_v1, inode->i_ctime );
1164 set_sd_v1_mtime(sd_v1, inode->i_mtime );
1165@@ -1067,7 +1216,7 @@
1166 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
1167 set_sd_v1_rdev(sd_v1, inode->i_rdev );
1168 else
1169- set_sd_v1_blocks(sd_v1, inode->i_blocks );
1170+ set_sd_v1_blocks(sd_v1, to_fake_used_blocks(inode, SD_V1_SIZE));
1171
1172 // Sigh. i_first_direct_byte is back
1173 set_sd_v1_first_direct_byte(sd_v1, inode->u.reiserfs_i.i_first_direct_byte);
1174@@ -1077,7 +1226,8 @@
1175 /* NOTE, you must prepare the buffer head before sending it here,
1176 ** and then log it after the call
1177 */
1178-static void update_stat_data (struct path * path, struct inode * inode)
1179+static void update_stat_data (struct path * path, struct inode * inode,
1180+ loff_t new_size)
1181 {
1182 struct buffer_head * bh;
1183 struct item_head * ih;
1184@@ -1091,17 +1241,16 @@
1185
1186 if (stat_data_v1 (ih)) {
1187 // path points to old stat data
1188- inode2sd_v1 (B_I_PITEM (bh, ih), inode);
1189+ inode2sd_v1 (B_I_PITEM (bh, ih), inode, new_size);
1190 } else {
1191- inode2sd (B_I_PITEM (bh, ih), inode);
1192+ inode2sd (B_I_PITEM (bh, ih), inode, new_size);
1193 }
1194
1195 return;
1196 }
1197
1198-
1199-void reiserfs_update_sd (struct reiserfs_transaction_handle *th,
1200- struct inode * inode)
1201+void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th,
1202+ struct inode *inode, loff_t new_size)
1203 {
1204 struct cpu_key key;
1205 INITIALIZE_PATH(path);
1206@@ -1151,7 +1300,7 @@
1207 }
1208 break;
1209 }
1210- update_stat_data (&path, inode);
1211+ update_stat_data (&path, inode, new_size);
1212 journal_mark_dirty(th, th->t_super, bh) ;
1213 pathrelse (&path);
1214 return;
1215@@ -1236,6 +1385,7 @@
1216 reiserfs_make_bad_inode( inode );
1217 }
1218
1219+ reiserfs_update_inode_transaction(inode);
1220 reiserfs_check_path(&path_to_sd) ; /* init inode should be relsing */
1221
1222 }
1223@@ -1415,8 +1565,6 @@
1224 ** does something when called for a synchronous update.
1225 */
1226 void reiserfs_write_inode (struct inode * inode, int do_sync) {
1227- struct reiserfs_transaction_handle th ;
1228- int jbegin_count = 1 ;
1229
1230 if (inode->i_sb->s_flags & MS_RDONLY) {
1231 reiserfs_warning(inode->i_sb, "clm-6005: writing inode %lu on readonly FS\n",
1232@@ -1430,9 +1578,7 @@
1233 */
1234 if (do_sync && !(current->flags & PF_MEMALLOC)) {
1235 lock_kernel() ;
1236- journal_begin(&th, inode->i_sb, jbegin_count) ;
1237- reiserfs_update_sd (&th, inode);
1238- journal_end_sync(&th, inode->i_sb, jbegin_count) ;
1239+ reiserfs_commit_for_inode(inode) ;
1240 unlock_kernel() ;
1241 }
1242 }
1243@@ -1450,6 +1596,7 @@
1244 /* stat data of new object is inserted already, this inserts the item
1245 containing "." and ".." entries */
1246 static int reiserfs_new_directory (struct reiserfs_transaction_handle *th,
1247+ struct inode *inode,
1248 struct item_head * ih, struct path * path,
1249 const struct inode * dir)
1250 {
1251@@ -1494,13 +1641,14 @@
1252 }
1253
1254 /* insert item, that is empty directory item */
1255- return reiserfs_insert_item (th, path, &key, ih, body);
1256+ return reiserfs_insert_item (th, path, &key, ih, inode, body);
1257 }
1258
1259
1260 /* stat data of object has been inserted, this inserts the item
1261 containing the body of symlink */
1262 static int reiserfs_new_symlink (struct reiserfs_transaction_handle *th,
1263+ struct inode *inode, /* Inode of symlink */
1264 struct item_head * ih,
1265 struct path * path, const char * symname, int item_len)
1266 {
1267@@ -1530,7 +1678,7 @@
1268 }
1269
1270 /* insert item, that is body of symlink */
1271- return reiserfs_insert_item (th, path, &key, ih, symname);
1272+ return reiserfs_insert_item (th, path, &key, ih, inode, symname);
1273 }
1274
1275
1276@@ -1604,7 +1752,8 @@
1277
1278 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1279 inode->i_size = i_size;
1280- inode->i_blocks = (inode->i_size + 511) >> 9;
1281+ inode->i_blocks = 0;
1282+ inode->i_bytes = 0;
1283 inode->u.reiserfs_i.i_first_direct_byte = S_ISLNK(mode) ? 1 :
1284 U32_MAX/*NO_BYTES_IN_DIRECT_ITEM*/;
1285
1286@@ -1638,9 +1787,9 @@
1287 err = -EINVAL;
1288 goto out_bad_inode;
1289 }
1290- inode2sd_v1 (&sd, inode);
1291+ inode2sd_v1 (&sd, inode, inode->i_size);
1292 } else
1293- inode2sd (&sd, inode);
1294+ inode2sd (&sd, inode, inode->i_size);
1295
1296 // these do not go to on-disk stat data
1297 inode->i_ino = le32_to_cpu (ih.ih_key.k_objectid);
1298@@ -1665,7 +1814,7 @@
1299 if (dir->u.reiserfs_i.new_packing_locality)
1300 th->displace_new_blocks = 1;
1301 #endif
1302- retval = reiserfs_insert_item (th, &path_to_key, &key, &ih, (char *)(&sd));
1303+ retval = reiserfs_insert_item (th, &path_to_key, &key, &ih, inode, (char *)(&sd));
1304 if (retval) {
1305 reiserfs_check_path(&path_to_key) ;
1306 err = retval;
1307@@ -1678,14 +1827,14 @@
1308 #endif
1309 if (S_ISDIR(mode)) {
1310 /* insert item with "." and ".." */
1311- retval = reiserfs_new_directory (th, &ih, &path_to_key, dir);
1312+ retval = reiserfs_new_directory (th, inode, &ih, &path_to_key, dir);
1313 }
1314
1315 if (S_ISLNK(mode)) {
1316 /* insert body of symlink */
1317 if (!old_format_only (sb))
1318 i_size = ROUND_UP(i_size);
1319- retval = reiserfs_new_symlink (th, &ih, &path_to_key, symname, i_size);
1320+ retval = reiserfs_new_symlink (th, inode, &ih, &path_to_key, symname, i_size);
1321 }
1322 if (retval) {
1323 err = retval;
1324@@ -1705,6 +1854,9 @@
1325
1326 /* dquot_drop must be done outside a transaction */
1327 journal_end(th, th->t_super, th->t_blocks_allocated) ;
1328+ DQUOT_FREE_INODE(inode);
1329+ DQUOT_DROP(inode);
1330+ inode->i_flags |= S_NOQUOTA;
1331 make_bad_inode(inode);
1332
1333 out_inserted_sd:
1334@@ -1816,6 +1968,7 @@
1335 unsigned length ;
1336 struct page *page = NULL ;
1337 int error ;
1338+ int need_balance_dirty = 0 ;
1339 struct buffer_head *bh = NULL ;
1340
1341 if (p_s_inode->i_size > 0) {
1342@@ -1848,34 +2001,58 @@
1343 transaction of truncating gets committed - on reboot the file
1344 either appears truncated properly or not truncated at all */
1345 add_save_link (&th, p_s_inode, 1);
1346+ if (page)
1347+ kmap(page);
1348 reiserfs_do_truncate (&th, p_s_inode, page, update_timestamps) ;
1349 pop_journal_writer(windex) ;
1350- journal_end(&th, p_s_inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 + 1 ) ;
1351-
1352- if (update_timestamps)
1353- remove_save_link (p_s_inode, 1/* truncate */);
1354
1355 if (page) {
1356+ if (!PageLocked(page))
1357+ BUG();
1358 length = offset & (blocksize - 1) ;
1359 /* if we are not on a block boundary */
1360 if (length) {
1361 length = blocksize - length ;
1362- memset((char *)kmap(page) + offset, 0, length) ;
1363+ if ((offset + length) > PAGE_CACHE_SIZE) {
1364+ BUG();
1365+ }
1366+ memset((char *)page_address(page) + offset, 0, length) ;
1367 flush_dcache_page(page) ;
1368- kunmap(page) ;
1369 if (buffer_mapped(bh) && bh->b_blocknr != 0) {
1370- if (!atomic_set_buffer_dirty(bh)) {
1371+ if (reiserfs_file_data_log(p_s_inode)) {
1372+ reiserfs_prepare_for_journal(p_s_inode->i_sb, bh, 1) ;
1373+ journal_mark_dirty(&th, p_s_inode->i_sb, bh) ;
1374+ } else {
1375+ /* it is safe to block here, but it would be faster
1376+ ** to balance dirty after the journal lock is dropped
1377+ */
1378+ if (!atomic_set_buffer_dirty(bh)) {
1379 set_buffer_flushtime(bh);
1380 refile_buffer(bh);
1381 buffer_insert_inode_data_queue(bh, p_s_inode);
1382- balance_dirty();
1383+ need_balance_dirty = 1;
1384+
1385+ if (reiserfs_data_ordered(p_s_inode->i_sb)) {
1386+ add_to_flushlist(p_s_inode, bh) ;
1387+ }
1388+ }
1389 }
1390 }
1391 }
1392+ kunmap(page);
1393+ }
1394+ journal_end(&th, p_s_inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 + 1) ;
1395+
1396+ if (update_timestamps)
1397+ remove_save_link(p_s_inode, 1/* truncate */);
1398+
1399+ if (page) {
1400 UnlockPage(page) ;
1401 page_cache_release(page) ;
1402 }
1403-
1404+ if (need_balance_dirty) {
1405+ balance_dirty() ;
1406+ }
1407 return ;
1408 }
1409
1410@@ -1944,6 +2121,8 @@
1411 goto research;
1412 }
1413
1414+ if (((B_I_PITEM(bh, ih) - bh->b_data) + pos_in_item + copy_size) > inode->i_sb->s_blocksize)
1415+ BUG();
1416 memcpy( B_I_PITEM(bh, ih) + pos_in_item, p + bytes_copied, copy_size) ;
1417
1418 journal_mark_dirty(&th, inode->i_sb, bh) ;
1419@@ -1971,9 +2150,37 @@
1420
1421 /* this is where we fill in holes in the file. */
1422 if (use_get_block) {
1423+ int old_refcount = 0 ;
1424+ struct reiserfs_transaction_handle *hole_th ;
1425+ if (reiserfs_transaction_running(inode->i_sb)) {
1426+ hole_th = current->journal_info ;
1427+ old_refcount = hole_th->t_refcount ;
1428+ }
1429 retval = reiserfs_get_block(inode, block, bh_result,
1430 GET_BLOCK_CREATE | GET_BLOCK_NO_ISEM) ;
1431 if (!retval) {
1432+ /* did reiserfs_get_block leave us a running transaction? */
1433+ if (reiserfs_transaction_running(inode->i_sb)) {
1434+ hole_th = current->journal_info ;
1435+ if (old_refcount < hole_th->t_refcount) {
1436+ lock_kernel() ;
1437+ /* we've filled a hole, make sure the new block
1438+ * gets to disk before transaction commit
1439+ */
1440+ if (buffer_mapped(bh_result) && bh_result->b_blocknr != 0 &&
1441+ reiserfs_data_ordered(inode->i_sb))
1442+ {
1443+ __mark_buffer_dirty(bh_result) ;
1444+ mark_buffer_uptodate(bh_result, 1);
1445+ /* no need to update the inode trans, already done */
1446+ add_to_flushlist(inode, bh_result) ;
1447+ }
1448+ reiserfs_update_sd(hole_th, inode) ;
1449+ journal_end(hole_th, hole_th->t_super,
1450+ hole_th->t_blocks_allocated) ;
1451+ unlock_kernel() ;
1452+ }
1453+ }
1454 if (!buffer_mapped(bh_result) || bh_result->b_blocknr == 0) {
1455 /* get_block failed to find a mapped unformatted node. */
1456 use_get_block = 0 ;
1457@@ -1988,33 +2195,41 @@
1458 /* helper func to get a buffer head ready for writepage to send to
1459 ** ll_rw_block
1460 */
1461-static inline void submit_bh_for_writepage(struct buffer_head **bhp, int nr) {
1462+static void submit_bh_for_writepage(struct page *page,
1463+ struct buffer_head **bhp, int nr) {
1464 struct buffer_head *bh ;
1465 int i;
1466
1467- /* lock them all first so the end_io handler doesn't unlock the page
1468- ** too early
1469+ /* lock them all first so the end_io handler doesn't
1470+ ** unlock too early
1471+ **
1472+ ** There's just no safe way to log the buffers during writepage,
1473+ ** we'll deadlock if kswapd tries to start a transaction.
1474+ **
1475+ ** There's also no useful way to tie them to a specific transaction,
1476+ ** so we just don't bother.
1477 */
1478 for(i = 0 ; i < nr ; i++) {
1479- bh = bhp[i] ;
1480- lock_buffer(bh) ;
1481- set_buffer_async_io(bh) ;
1482+ bh = bhp[i] ;
1483+ lock_buffer(bh);
1484+ set_buffer_async_io(bh);
1485+ set_bit(BH_Uptodate, &bh->b_state) ;
1486 }
1487 for(i = 0 ; i < nr ; i++) {
1488+ bh = bhp[i] ;
1489 /* submit_bh doesn't care if the buffer is dirty, but nobody
1490 ** later on in the call chain will be cleaning it. So, we
1491 ** clean the buffer here, it still gets written either way.
1492 */
1493- bh = bhp[i] ;
1494 clear_bit(BH_Dirty, &bh->b_state) ;
1495- set_bit(BH_Uptodate, &bh->b_state) ;
1496 submit_bh(WRITE, bh) ;
1497 }
1498 }
1499
1500 static int reiserfs_write_full_page(struct page *page) {
1501 struct inode *inode = page->mapping->host ;
1502- unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT ;
1503+ loff_t size = inode->i_size;
1504+ unsigned long end_index = size >> PAGE_CACHE_SHIFT ;
1505 unsigned last_offset = PAGE_CACHE_SIZE;
1506 int error = 0;
1507 unsigned long block ;
1508@@ -2024,21 +2239,36 @@
1509 struct buffer_head *arr[PAGE_CACHE_SIZE/512] ;
1510 int nr = 0 ;
1511
1512+ if (reiserfs_transaction_running(inode->i_sb)) {
1513+ BUG();
1514+ }
1515+
1516+ if (!PageLocked(page))
1517+ BUG();
1518+
1519 if (!page->buffers) {
1520 block_prepare_write(page, 0, 0, NULL) ;
1521 kunmap(page) ;
1522 }
1523+
1524+ if (reiserfs_transaction_running(inode->i_sb)) {
1525+ BUG();
1526+ }
1527 /* last page in the file, zero out any contents past the
1528 ** last byte in the file
1529 */
1530 if (page->index >= end_index) {
1531- last_offset = inode->i_size & (PAGE_CACHE_SIZE - 1) ;
1532+ char *p ;
1533+ last_offset = size & (PAGE_CACHE_SIZE - 1) ;
1534 /* no file contents in this page */
1535 if (page->index >= end_index + 1 || !last_offset) {
1536 error = -EIO ;
1537 goto fail ;
1538 }
1539- memset((char *)kmap(page)+last_offset, 0, PAGE_CACHE_SIZE-last_offset) ;
1540+ p = kmap(page);
1541+ if (last_offset > PAGE_CACHE_SIZE)
1542+ BUG();
1543+ memset(p + last_offset, 0, PAGE_CACHE_SIZE-last_offset) ;
1544 flush_dcache_page(page) ;
1545 kunmap(page) ;
1546 }
1547@@ -2079,7 +2309,7 @@
1548 ** nr == 0 without there being any kind of error.
1549 */
1550 if (nr) {
1551- submit_bh_for_writepage(arr, nr) ;
1552+ submit_bh_for_writepage(page, arr, nr) ;
1553 wakeup_page_waiters(page);
1554 } else {
1555 UnlockPage(page) ;
1556@@ -2091,7 +2321,7 @@
1557
1558 fail:
1559 if (nr) {
1560- submit_bh_for_writepage(arr, nr) ;
1561+ submit_bh_for_writepage(page, arr, nr) ;
1562 } else {
1563 UnlockPage(page) ;
1564 }
1565@@ -2116,10 +2346,46 @@
1566
1567 int reiserfs_prepare_write(struct file *f, struct page *page,
1568 unsigned from, unsigned to) {
1569+ int cur_refcount = 0 ;
1570+ int ret ;
1571 struct inode *inode = page->mapping->host ;
1572+ struct reiserfs_transaction_handle *th ;
1573+
1574 reiserfs_wait_on_write_block(inode->i_sb) ;
1575 fix_tail_page_for_writing(page) ;
1576- return block_prepare_write(page, from, to, reiserfs_get_block) ;
1577+
1578+ /* we look for a running transaction before the block_prepare_write
1579+ ** call, and then again afterwards. This lets us know if
1580+ ** reiserfs_get_block added any additional transactions, so we can
1581+ ** let reiserfs_commit_write know if he needs to close them.
1582+ ** this is just nasty
1583+ */
1584+ if (reiserfs_transaction_running(inode->i_sb)) {
1585+ th = current->journal_info ;
1586+ cur_refcount = th->t_refcount ;
1587+ }
1588+ ret = block_prepare_write(page, from, to, reiserfs_get_block) ;
1589+
1590+ /* it is very important that we only set the dangling bit when
1591+ ** there is no chance of additional nested transactions.
1592+ */
1593+ if (reiserfs_transaction_running(inode->i_sb)) {
1594+ th = current->journal_info ;
1595+ if (th->t_refcount > cur_refcount) {
1596+ /* if we return an error, commit_write isn't going to get called
1597+ * we need to make sure we end any transactions
1598+ * reiserfs_get_block left hanging around
1599+ */
1600+ if (ret) {
1601+ lock_kernel();
1602+ journal_end(th, th->t_super, th->t_blocks_allocated) ;
1603+ unlock_kernel();
1604+ } else {
1605+ reiserfs_set_handle_dangling(th) ;
1606+ }
1607+ }
1608+ }
1609+ return ret ;
1610 }
1611
1612
1613@@ -2127,20 +2393,96 @@
1614 return generic_block_bmap(as, block, reiserfs_bmap) ;
1615 }
1616
1617+/* taken from fs/buffer.c */
1618+static int __commit_write(struct reiserfs_transaction_handle *th,
1619+ struct inode *inode, struct page *page,
1620+ unsigned from, unsigned to, int *balance)
1621+{
1622+ unsigned block_start, block_end;
1623+ int partial = 0;
1624+ unsigned blocksize;
1625+ struct buffer_head *bh, *head;
1626+ int logbh = 0 ;
1627+
1628+ blocksize = 1 << inode->i_blkbits;
1629+ if (reiserfs_file_data_log(inode)) {
1630+ logbh = 1 ;
1631+ lock_kernel() ;
1632+ /* one for each block + the stat data, the caller closes the handle */
1633+ journal_begin(th, inode->i_sb,(PAGE_CACHE_SIZE >> inode->i_blkbits)+1);
1634+ reiserfs_update_inode_transaction(inode) ;
1635+ unlock_kernel() ;
1636+ }
1637+
1638+ for(bh = head = page->buffers, block_start = 0;
1639+ bh != head || !block_start;
1640+ block_start=block_end, bh = bh->b_this_page) {
1641+ block_end = block_start + blocksize;
1642+ if (block_end <= from || block_start >= to) {
1643+ if (!buffer_uptodate(bh))
1644+ partial = 1;
1645+ } else {
1646+ set_bit(BH_Uptodate, &bh->b_state);
1647+ if (logbh) {
1648+ lock_kernel() ;
1649+ reiserfs_prepare_for_journal(inode->i_sb, bh, 1) ;
1650+ journal_mark_dirty (th, inode->i_sb, bh);
1651+ unlock_kernel() ;
1652+ } else if (!atomic_set_buffer_dirty(bh)) {
1653+ __mark_dirty(bh);
1654+ if (reiserfs_data_ordered(inode->i_sb)) {
1655+ lock_kernel();
1656+ add_to_flushlist(inode, bh);
1657+ /* if we don't update the inode trans information,
1658+ * an fsync(fd) might not catch these data blocks
1659+ */
1660+ reiserfs_update_inode_transaction(inode);
1661+ unlock_kernel();
1662+ } else {
1663+ buffer_insert_inode_data_queue(bh, inode);
1664+ }
1665+ *balance = 1;
1666+ }
1667+ }
1668+ }
1669+
1670+ /*
1671+ * is this a partial write that happened to make all buffers
1672+ * uptodate then we can optimize away a bogus readpage() for
1673+ * the next read(). Here we 'discover' wether the page went
1674+ * uptodate as a result of this (potentially partial) write.
1675+ */
1676+ if (!partial)
1677+ SetPageUptodate(page);
1678+ return 0;
1679+}
1680+
1681 static int reiserfs_commit_write(struct file *f, struct page *page,
1682 unsigned from, unsigned to) {
1683 struct inode *inode = page->mapping->host ;
1684 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
1685 int ret ;
1686-
1687+ int need_balance = 0;
1688+ struct reiserfs_transaction_handle th ;
1689+ struct reiserfs_transaction_handle *dth = NULL ;
1690+
1691+ /* we must do this before anything that might nest a transaction or
1692+ ** mess with the handle flags
1693+ */
1694+ if (reiserfs_transaction_running(inode->i_sb)) {
1695+ dth = current->journal_info ;
1696+ if (reiserfs_dangling_handle(dth)) {
1697+ reiserfs_clear_handle_dangling(dth) ;
1698+ } else {
1699+ dth = NULL ;
1700+ }
1701+ }
1702 reiserfs_wait_on_write_block(inode->i_sb) ;
1703+
1704+ th.t_flags = 0 ;
1705+ ret = __commit_write(&th, inode, page, from, to, &need_balance) ;
1706
1707- /* generic_commit_write does this for us, but does not update the
1708- ** transaction tracking stuff when the size changes. So, we have
1709- ** to do the i_size updates here.
1710- */
1711 if (pos > inode->i_size) {
1712- struct reiserfs_transaction_handle th ;
1713 lock_kernel();
1714 /* If the file have grown beyond the border where it
1715 can have a tail, unmark it as needing a tail
1716@@ -2149,24 +2491,135 @@
1717 (have_small_tails (inode->i_sb) && inode->i_size > block_size(inode)) )
1718 inode->u.reiserfs_i.i_flags &= ~i_pack_on_close_mask;
1719
1720- journal_begin(&th, inode->i_sb, 1) ;
1721+ if (!reiserfs_active_handle(&th)) {
1722+ journal_begin(&th, inode->i_sb, 1) ;
1723+ }
1724 reiserfs_update_inode_transaction(inode) ;
1725 inode->i_size = pos ;
1726 reiserfs_update_sd(&th, inode) ;
1727- journal_end(&th, inode->i_sb, 1) ;
1728- unlock_kernel();
1729+ journal_end(&th, th.t_super, th.t_blocks_allocated) ;
1730+ unlock_kernel() ;
1731+ } else if (reiserfs_active_handle(&th)) {
1732+ /* in case commit_write left one running and the i_size update did
1733+ ** not close it
1734+ */
1735+ lock_kernel() ;
1736+ journal_end(&th, th.t_super, th.t_blocks_allocated) ;
1737+ unlock_kernel() ;
1738 }
1739-
1740- ret = generic_commit_write(f, page, from, to) ;
1741
1742- /* we test for O_SYNC here so we can commit the transaction
1743- ** for any packed tails the file might have had
1744+ /* did reiserfs_get_block leave us with a running transaction?
1745 */
1746- if (f && (f->f_flags & O_SYNC)) {
1747+ if (dth) {
1748 lock_kernel() ;
1749- reiserfs_commit_for_inode(inode) ;
1750+ journal_end(dth, dth->t_super, dth->t_blocks_allocated) ;
1751 unlock_kernel();
1752 }
1753+
1754+ kunmap(page) ;
1755+
1756+ if (need_balance)
1757+ balance_dirty();
1758+
1759+ return ret ;
1760+}
1761+
1762+/* decide if this buffer needs to stay around for data logging or ordered
1763+** write purposes
1764+*/
1765+static int flushpage_can_drop(struct inode *inode, struct buffer_head *bh) {
1766+ int ret = 1 ;
1767+
1768+ if (!buffer_mapped(bh)) {
1769+ return 1 ;
1770+ }
1771+ if (reiserfs_file_data_log(inode)) {
1772+ lock_kernel() ;
1773+ /* very conservative, leave the buffer pinned if anyone might need it.
1774+ ** this should be changed to drop the buffer if it is only in the
1775+ ** current transaction
1776+ */
1777+ if (buffer_journaled(bh) || buffer_journal_dirty(bh)) {
1778+ ret = 0 ;
1779+ }
1780+ unlock_kernel() ;
1781+ }
1782+ if (reiserfs_data_ordered(inode->i_sb)) {
1783+ if (buffer_dirty(bh) && bh->b_journal_head) {
1784+ struct reiserfs_journal_list *jl = NULL;
1785+ lock_kernel();
1786+
1787+ /* we can race against fsync_inode_buffers if we aren't careful */
1788+ if (buffer_attached(bh) && buffer_dirty(bh))
1789+ jl = bh->b_journal_head;
1790+
1791+ /* why is this safe?
1792+ * reiserfs_setattr updates i_size in the on disk
1793+ * stat data before allowing vmtruncate to be called.
1794+ *
1795+ * If buffer was put onto the ordered list for this
1796+ * transaction, we know for sure either this transaction
1797+ * or an older one already has updated i_size on disk,
1798+ * and this ordered data won't be referenced in the file
1799+ * if we crash.
1800+ *
1801+ * if the buffer was put onto the ordered list for an older
1802+ * transaction, we need to leave it around
1803+ */
1804+ if (jl != SB_JOURNAL(inode->i_sb)->j_current_jl) {
1805+ ret = 0;
1806+ }
1807+ unlock_kernel();
1808+ }
1809+ }
1810+ return ret ;
1811+}
1812+
1813+/* stolen from fs/buffer.c:discard_bh_page */
1814+static int reiserfs_flushpage(struct page *page, unsigned long offset) {
1815+ struct buffer_head *head, *bh, *next;
1816+ struct inode *inode = page->mapping->host ;
1817+ unsigned int curr_off = 0;
1818+ int ret = 1;
1819+
1820+ if (!PageLocked(page))
1821+ BUG();
1822+ if (!page->buffers)
1823+ return 1;
1824+
1825+ head = page->buffers;
1826+ bh = head;
1827+ do {
1828+ unsigned int next_off = curr_off + bh->b_size;
1829+ next = bh->b_this_page;
1830+
1831+ /* is this buffer to be completely truncated away? */
1832+ if (offset <= curr_off) {
1833+ if (flushpage_can_drop(inode, bh))
1834+ discard_buffer(bh);
1835+ else
1836+ ret = 0 ;
1837+ }
1838+ curr_off = next_off;
1839+ bh = next;
1840+ } while (bh != head);
1841+
1842+ /*
1843+ * subtle. We release buffer-heads only if this is
1844+ * the 'final' flushpage. We have invalidated the get_block
1845+ * cached value unconditionally, so real IO is not
1846+ * possible anymore.
1847+ *
1848+ * If the free doesn't work out, the buffers can be
1849+ * left around - they just turn into anonymous buffers
1850+ * instead.
1851+ */
1852+ if (!offset) {
1853+ if (!ret || !try_to_free_buffers(page, 0))
1854+ return 0;
1855+ if (page->buffers)
1856+ BUG();
1857+ }
1858 return ret ;
1859 }
1860
1861@@ -2222,6 +2675,9 @@
1862 struct kiobuf *iobuf, unsigned long blocknr,
1863 int blocksize)
1864 {
1865+ if (reiserfs_data_ordered(inode->i_sb) || reiserfs_file_data_log(inode)) {
1866+ return -EINVAL;
1867+ }
1868 lock_kernel();
1869 reiserfs_commit_for_tail(inode);
1870 unlock_kernel();
1871@@ -2237,4 +2693,5 @@
1872 commit_write: reiserfs_commit_write,
1873 bmap: reiserfs_aop_bmap,
1874 direct_IO: reiserfs_direct_io,
1875+ flushpage: reiserfs_flushpage,
1876 } ;
052932c9
AM
1877diff -urN linux-2.4.22.org/fs/reiserfs/ioctl.c linux-2.4.22/fs/reiserfs/ioctl.c
1878--- linux-2.4.22.org/fs/reiserfs/ioctl.c 2003-11-21 15:08:29.000000000 +0100
1879+++ linux-2.4.22/fs/reiserfs/ioctl.c 2003-11-21 15:14:23.000000000 +0100
e57e653a
JR
1880@@ -25,12 +25,21 @@
1881 switch (cmd) {
1882 case REISERFS_IOC_UNPACK:
1883 if( S_ISREG( inode -> i_mode ) ) {
1884- if (arg)
1885- return reiserfs_unpack (inode, filp);
1886- else
1887- return 0;
1888+ if (arg) {
1889+ int result;
1890+ result = reiserfs_unpack (inode, filp);
1891+ if (reiserfs_file_data_log(inode)) {
1892+ struct reiserfs_transaction_handle th;
1893+ lock_kernel();
1894+ journal_begin(&th, inode->i_sb, 1);
1895+ SB_JOURNAL(inode->i_sb)->j_must_wait = 1;
1896+ journal_end_sync(&th, inode->i_sb, 1);
1897+ unlock_kernel();
1898+ }
1899+ } else
1900+ return 0;
1901 } else
1902- return -ENOTTY;
1903+ return -ENOTTY;
1904 /*
1905 * Following {G,S}ETFLAGS, and {G,S}ETVERSION are providing ext2
1906 * binary compatible interface (used by lsattr(1), and chattr(1)) and
1907@@ -97,6 +106,7 @@
1908 int retval = 0;
1909 int index ;
1910 struct page *page ;
1911+ struct address_space *mapping ;
1912 unsigned long write_from ;
1913 unsigned long blocksize = inode->i_sb->s_blocksize ;
1914
1915@@ -127,19 +137,20 @@
1916 ** reiserfs_get_block to unpack the tail for us.
1917 */
1918 index = inode->i_size >> PAGE_CACHE_SHIFT ;
1919- page = grab_cache_page(inode->i_mapping, index) ;
1920+ mapping = inode->i_mapping ;
1921+ page = grab_cache_page(mapping, index) ;
1922 retval = -ENOMEM;
1923 if (!page) {
1924 goto out ;
1925 }
1926- retval = reiserfs_prepare_write(NULL, page, write_from, blocksize) ;
1927+ retval = mapping->a_ops->prepare_write(NULL, page, write_from, write_from) ;
1928 if (retval)
1929 goto out_unlock ;
1930
1931 /* conversion can change page contents, must flush */
1932 flush_dcache_page(page) ;
1933 inode->u.reiserfs_i.i_flags |= i_nopack_mask;
1934- kunmap(page) ; /* mapped by prepare_write */
1935+ retval = mapping->a_ops->commit_write(NULL, page, write_from, write_from) ;
1936
1937 out_unlock:
1938 UnlockPage(page) ;
052932c9
AM
1939diff -urN linux-2.4.22.org/fs/reiserfs/journal.c linux-2.4.22/fs/reiserfs/journal.c
1940--- linux-2.4.22.org/fs/reiserfs/journal.c 2003-11-21 15:08:29.000000000 +0100
1941+++ linux-2.4.22/fs/reiserfs/journal.c 2003-11-21 15:14:23.000000000 +0100
e57e653a
JR
1942@@ -33,17 +33,17 @@
1943 ** -- Note, if you call this as an immediate flush from
1944 ** from within kupdate, it will ignore the immediate flag
1945 **
1946-** The commit thread -- a writer process for async commits. It allows a
1947-** a process to request a log flush on a task queue.
1948-** the commit will happen once the commit thread wakes up.
1949-** The benefit here is the writer (with whatever
1950-** related locks it has) doesn't have to wait for the
1951-** log blocks to hit disk if it doesn't want to.
1952+** The commit thread -- a writer process for metadata and async commits.
1953+** this allows us to do less io with the journal lock
1954+** held.
1955 */
1956
1957+#define EXPORT_SYMTAB
1958+#include <linux/module.h>
1959 #include <linux/config.h>
1960 #include <asm/uaccess.h>
1961 #include <asm/system.h>
1962+#include <linux/init.h>
1963
1964 #include <linux/sched.h>
1965 #include <asm/semaphore.h>
1966@@ -59,17 +59,25 @@
1967 #include <linux/string.h>
1968 #include <linux/smp_lock.h>
1969
1970+/* gets a struct reiserfs_journal_list * from a list head */
1971+#define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
1972+ j_list))
1973+#define JOURNAL_WORK_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
1974+ j_working_list))
1975+
1976 /* the number of mounted filesystems. This is used to decide when to
1977 ** start and kill the commit thread
1978 */
1979 static int reiserfs_mounted_fs_count = 0 ;
1980
1981-/* wake this up when you add something to the commit thread task queue */
1982+static struct list_head kreiserfsd_supers = LIST_HEAD_INIT(kreiserfsd_supers);
1983+
1984+/* wake this up when you want help from the commit thread */
1985 DECLARE_WAIT_QUEUE_HEAD(reiserfs_commit_thread_wait) ;
1986
1987-/* wait on this if you need to be sure you task queue entries have been run */
1988+/* so we can wait for the commit thread to make progress */
1989 static DECLARE_WAIT_QUEUE_HEAD(reiserfs_commit_thread_done) ;
1990-DECLARE_TASK_QUEUE(reiserfs_commit_thread_tq) ;
1991+DECLARE_MUTEX(kreiserfsd_sem) ;
1992
1993 #define JOURNAL_TRANS_HALF 1018 /* must be correct to keep the desc and commit
1994 structs at 4k */
1995@@ -82,6 +90,9 @@
1996
1997 #define BLOCK_NEEDS_FLUSH 4 /* used in flush_journal_list */
1998
1999+/* journal list state bits */
2000+#define LIST_TOUCHED 1
2001+
2002 /* flags for do_journal_end */
2003 #define FLUSH_ALL 1 /* flush commit and real blocks */
2004 #define COMMIT_NOW 2 /* end and commit this transaction */
2005@@ -89,6 +100,9 @@
2006
2007 /* state bits for the journal */
2008 #define WRITERS_BLOCKED 1 /* set when new writers not allowed */
2009+#define WRITERS_QUEUED 2 /* set when log is full due to too many
2010+ * writers
2011+ */
2012
2013 static int do_journal_end(struct reiserfs_transaction_handle *,struct super_block *,unsigned long nblocks,int flags) ;
2014 static int flush_journal_list(struct super_block *s, struct reiserfs_journal_list *jl, int flushall) ;
2015@@ -107,7 +121,7 @@
2016 ** make schedule happen after I've freed a block. Look at remove_from_transaction and journal_mark_freed for
2017 ** more details.
2018 */
2019-static int reiserfs_clean_and_file_buffer(struct buffer_head *bh) {
2020+static inline int reiserfs_clean_and_file_buffer(struct buffer_head *bh) {
2021 if (bh) {
2022 clear_bit(BH_Dirty, &bh->b_state) ;
2023 refile_buffer(bh) ;
2024@@ -473,6 +487,8 @@
2025 int pop_journal_writer(int index) {
2026 #ifdef CONFIG_REISERFS_CHECK
2027 if (index >= 0) {
2028+ if (index >= 512)
2029+ BUG();
2030 journal_writers[index] = NULL ;
2031 }
2032 #endif
2033@@ -522,6 +538,12 @@
2034 return 0 ;
2035 }
2036
2037+ /* when data logging is on, no special action is needed for the data
2038+ * blocks
2039+ */
2040+ if (reiserfs_data_log(p_s_sb))
2041+ search_all = 0;
2042+
2043 PROC_INFO_INC( p_s_sb, journal.in_journal );
2044 /* If we aren't doing a search_all, this is a metablock, and it will be logged before use.
2045 ** if we crash before the transaction that freed it commits, this transaction won't
2046@@ -549,6 +571,7 @@
2047
2048 /* is it in the current transaction. This should never happen */
2049 if ((cn = get_journal_hash_dev(SB_JOURNAL(p_s_sb)->j_hash_table, dev,bl,size))) {
2050+ BUG();
2051 return 1;
2052 }
2053
2054@@ -574,17 +597,12 @@
2055 /* lock the current transaction */
2056 inline static void lock_journal(struct super_block *p_s_sb) {
2057 PROC_INFO_INC( p_s_sb, journal.lock_journal );
2058- while(atomic_read(&(SB_JOURNAL(p_s_sb)->j_wlock)) > 0) {
2059- PROC_INFO_INC( p_s_sb, journal.lock_journal_wait );
2060- sleep_on(&(SB_JOURNAL(p_s_sb)->j_wait)) ;
2061- }
2062- atomic_set(&(SB_JOURNAL(p_s_sb)->j_wlock), 1) ;
2063+ down(&SB_JOURNAL(p_s_sb)->j_lock);
2064 }
2065
2066 /* unlock the current transaction */
2067 inline static void unlock_journal(struct super_block *p_s_sb) {
2068- atomic_dec(&(SB_JOURNAL(p_s_sb)->j_wlock)) ;
2069- wake_up(&(SB_JOURNAL(p_s_sb)->j_wait)) ;
2070+ up(&SB_JOURNAL(p_s_sb)->j_lock);
2071 }
2072
2073 /*
2074@@ -602,6 +620,83 @@
2075 jl->j_list_bitmap = NULL ;
2076 }
2077
2078+static int journal_list_still_alive(struct super_block *s,
2079+ unsigned long trans_id)
2080+{
2081+ struct list_head *entry = &SB_JOURNAL(s)->j_journal_list;
2082+ struct reiserfs_journal_list *jl;
2083+
2084+ if (!list_empty(entry)) {
2085+ jl = JOURNAL_LIST_ENTRY(entry->next);
2086+ if (jl->j_trans_id <= trans_id) {
2087+ return 1;
2088+ }
2089+ }
2090+ return 0;
2091+}
2092+
2093+static int flush_older_commits(struct super_block *s, struct reiserfs_journal_list *jl) {
2094+ struct reiserfs_journal_list *other_jl;
2095+ struct reiserfs_journal_list *first_jl;
2096+ struct list_head *entry;
2097+ unsigned long trans_id = jl->j_trans_id;
2098+ unsigned long other_trans_id;
2099+ unsigned long first_trans_id;
2100+
2101+find_first:
2102+ /*
2103+ * first we walk backwards to find the oldest uncommitted transation
2104+ */
2105+ first_jl = jl;
2106+ entry = jl->j_list.prev;
2107+ while(1) {
2108+ other_jl = JOURNAL_LIST_ENTRY(entry);
2109+ if (entry == &SB_JOURNAL(s)->j_journal_list ||
2110+ atomic_read(&other_jl->j_older_commits_done))
2111+ break;
2112+
2113+ first_jl = other_jl;
2114+ entry = other_jl->j_list.prev;
2115+ }
2116+
2117+ /* if we didn't find any older uncommitted transactions, return now */
2118+ if (first_jl == jl) {
2119+ return 0;
2120+ }
2121+
2122+ first_trans_id = first_jl->j_trans_id;
2123+
2124+ entry = &first_jl->j_list;
2125+ while(1) {
2126+ other_jl = JOURNAL_LIST_ENTRY(entry);
2127+ other_trans_id = other_jl->j_trans_id;
2128+
2129+ if (other_trans_id < trans_id) {
2130+ if (atomic_read(&other_jl->j_commit_left) != 0) {
2131+ flush_commit_list(s, other_jl, 0);
2132+
2133+ /* list we were called with is gone, return */
2134+ if (!journal_list_still_alive(s, trans_id))
2135+ return 1;
2136+
2137+ /* the one we just flushed is gone, this means all
2138+ * older lists are also gone, so first_jl is no longer
2139+ * valid either. Go back to the beginning.
2140+ */
2141+ if (!journal_list_still_alive(s, other_trans_id)) {
2142+ goto find_first;
2143+ }
2144+ }
2145+ entry = entry->next;
2146+ if (entry == &SB_JOURNAL(s)->j_journal_list)
2147+ return 0;
2148+ } else {
2149+ return 0;
2150+ }
2151+ }
2152+ return 0;
2153+}
2154+
2155 /*
2156 ** if this journal list still has commit blocks unflushed, send them to disk.
2157 **
2158@@ -611,16 +706,19 @@
2159 */
2160 static int flush_commit_list(struct super_block *s, struct reiserfs_journal_list *jl, int flushall) {
2161 int i, count ;
2162- int index = 0 ;
2163 int bn ;
2164 int retry_count = 0 ;
2165 int orig_commit_left = 0 ;
2166 struct buffer_head *tbh = NULL ;
2167- struct reiserfs_journal_list *other_jl ;
2168+ unsigned long trans_id = jl->j_trans_id;
2169
2170 reiserfs_check_lock_depth("flush_commit_list") ;
2171
2172 if (atomic_read(&jl->j_older_commits_done)) {
2173+ if (!list_empty(&jl->j_ordered_bh_list))
2174+ BUG();
2175+ if (!list_empty(&jl->j_tail_bh_list))
2176+ BUG();
2177 return 0 ;
2178 }
2179
2180@@ -628,50 +726,51 @@
2181 ** us is on disk too
2182 */
2183 if (jl->j_len <= 0) {
2184+ BUG();
2185 return 0 ;
2186 }
2187+ if (trans_id == SB_JOURNAL(s)->j_trans_id)
2188+ BUG();
2189+
2190 if (flushall) {
2191- /* we _must_ make sure the transactions are committed in order. Start with the
2192- ** index after this one, wrap all the way around
2193- */
2194- index = (jl - SB_JOURNAL_LIST(s)) + 1 ;
2195- for (i = 0 ; i < JOURNAL_LIST_COUNT ; i++) {
2196- other_jl = SB_JOURNAL_LIST(s) + ( (index + i) % JOURNAL_LIST_COUNT) ;
2197- if (other_jl && other_jl != jl && other_jl->j_len > 0 && other_jl->j_trans_id > 0 &&
2198- other_jl->j_trans_id <= jl->j_trans_id && (atomic_read(&(jl->j_older_commits_done)) == 0)) {
2199- flush_commit_list(s, other_jl, 0) ;
2200- }
2201+ if (flush_older_commits(s, jl) == 1) {
2202+ /* list disappeared during flush_older_commits. return */
2203+ return 0;
2204 }
2205 }
2206
2207 count = 0 ;
2208- /* don't flush the commit list for the current transactoin */
2209- if (jl == ((SB_JOURNAL_LIST(s) + SB_JOURNAL_LIST_INDEX(s)))) {
2210- return 0 ;
2211- }
2212
2213 /* make sure nobody is trying to flush this one at the same time */
2214- if (atomic_read(&(jl->j_commit_flushing))) {
2215- sleep_on(&(jl->j_commit_wait)) ;
2216- if (flushall) {
2217- atomic_set(&(jl->j_older_commits_done), 1) ;
2218- }
2219- return 0 ;
2220+ down(&jl->j_commit_lock);
2221+ if (!journal_list_still_alive(s, trans_id)) {
2222+ up(&jl->j_commit_lock);
2223+ return 0;
2224 }
2225+ if (jl->j_trans_id == 0)
2226+ BUG();
2227
2228 /* this commit is done, exit */
2229 if (atomic_read(&(jl->j_commit_left)) <= 0) {
2230 if (flushall) {
2231 atomic_set(&(jl->j_older_commits_done), 1) ;
2232 }
2233+ if (!list_empty(&jl->j_ordered_bh_list))
2234+ BUG();
2235+ if (!list_empty(&jl->j_tail_bh_list))
2236+ BUG();
2237+ up(&jl->j_commit_lock);
2238 return 0 ;
2239 }
2240- /* keeps others from flushing while we are flushing */
2241- atomic_set(&(jl->j_commit_flushing), 1) ;
2242-
2243
2244+ /* write any buffers that must hit disk before the commit is done */
2245+ while(!list_empty(&jl->j_ordered_bh_list)) {
2246+ unlock_kernel();
2247+ fsync_buffers_list(&jl->j_ordered_bh_list);
2248+ lock_kernel();
2249+ }
2250 if (jl->j_len > SB_JOURNAL_TRANS_MAX(s)) {
2251- reiserfs_panic(s, "journal-512: flush_commit_list: length is %lu, list number %d\n", jl->j_len, jl - SB_JOURNAL_LIST(s)) ;
2252+ reiserfs_panic(s, "journal-512: flush_commit_list: length is %lu, trans_id %lu\n", jl->j_len, jl->j_trans_id) ;
2253 return 0 ;
2254 }
2255
2256@@ -701,7 +800,7 @@
2257 if (buffer_dirty(tbh)) {
2258 reiserfs_warning(s, "journal-569: flush_commit_list, block already dirty!\n") ;
2259 } else {
2260- mark_buffer_dirty(tbh) ;
2261+ atomic_set_buffer_dirty(tbh);
2262 }
2263 ll_rw_block(WRITE, 1, &tbh) ;
2264 count++ ;
2265@@ -745,16 +844,22 @@
2266 atomic_dec(&(jl->j_commit_left)) ;
2267 bforget(jl->j_commit_bh) ;
2268
2269+ if (SB_JOURNAL(s)->j_last_commit_id != 0 &&
2270+ (jl->j_trans_id - SB_JOURNAL(s)->j_last_commit_id) != 1) {
2271+ reiserfs_warning(s, "clm-2200: dev %s, last commit %lu, current %lu\n",
2272+ kdevname(s->s_dev), SB_JOURNAL(s)->j_last_commit_id,
2273+ SB_JOURNAL(s)->j_last_commit_id);
2274+ }
2275+ SB_JOURNAL(s)->j_last_commit_id = jl->j_trans_id;
2276+
2277 /* now, every commit block is on the disk. It is safe to allow blocks freed during this transaction to be reallocated */
2278 cleanup_freed_for_journal_list(s, jl) ;
2279
2280 if (flushall) {
2281 atomic_set(&(jl->j_older_commits_done), 1) ;
2282 }
2283- atomic_set(&(jl->j_commit_flushing), 0) ;
2284- wake_up(&(jl->j_commit_wait)) ;
2285+ up(&jl->j_commit_lock);
2286
2287- s->s_dirt = 1 ;
2288 return 0 ;
2289 }
2290
2291@@ -853,22 +958,27 @@
2292 ** flush any and all journal lists older than you are
2293 ** can only be called from flush_journal_list
2294 */
2295-static int flush_older_journal_lists(struct super_block *p_s_sb, struct reiserfs_journal_list *jl, unsigned long trans_id) {
2296- int i, index ;
2297- struct reiserfs_journal_list *other_jl ;
2298-
2299- index = jl - SB_JOURNAL_LIST(p_s_sb) ;
2300- for (i = 0 ; i < JOURNAL_LIST_COUNT ; i++) {
2301- other_jl = SB_JOURNAL_LIST(p_s_sb) + ((index + i) % JOURNAL_LIST_COUNT) ;
2302- if (other_jl && other_jl->j_len > 0 &&
2303- other_jl->j_trans_id > 0 &&
2304- other_jl->j_trans_id < trans_id &&
2305- other_jl != jl) {
2306- /* do not flush all */
2307- flush_journal_list(p_s_sb, other_jl, 0) ;
2308+static int flush_older_journal_lists(struct super_block *p_s_sb,
2309+ struct reiserfs_journal_list *jl)
2310+{
2311+ struct list_head *entry;
2312+ struct reiserfs_journal_list *other_jl ;
2313+ unsigned long trans_id = jl->j_trans_id;
2314+
2315+ /* we know we are the only ones flushing things, no extra race
2316+ * protection is required.
2317+ */
2318+restart:
2319+ entry = SB_JOURNAL(p_s_sb)->j_journal_list.next;
2320+ other_jl = JOURNAL_LIST_ENTRY(entry);
2321+ if (other_jl->j_trans_id < trans_id) {
2322+ /* do not flush all */
2323+ flush_journal_list(p_s_sb, other_jl, 0) ;
2324+
2325+ /* other_jl is now deleted from the list */
2326+ goto restart;
2327 }
2328- }
2329- return 0 ;
2330+ return 0 ;
2331 }
2332
2333 static void reiserfs_end_buffer_io_sync(struct buffer_head *bh, int uptodate) {
2334@@ -881,14 +991,23 @@
2335 put_bh(bh) ;
2336 }
2337 static void submit_logged_buffer(struct buffer_head *bh) {
2338- lock_buffer(bh) ;
2339 get_bh(bh) ;
2340 bh->b_end_io = reiserfs_end_buffer_io_sync ;
2341 mark_buffer_notjournal_new(bh) ;
2342 clear_bit(BH_Dirty, &bh->b_state) ;
2343+ if (!buffer_uptodate(bh))
2344+ BUG();
2345 submit_bh(WRITE, bh) ;
2346 }
2347
2348+static void del_from_work_list(struct super_block *s,
2349+ struct reiserfs_journal_list *jl) {
2350+ if (!list_empty(&jl->j_working_list)) {
2351+ list_del_init(&jl->j_working_list);
2352+ SB_JOURNAL(s)->j_num_work_lists--;
2353+ }
2354+}
2355+
2356 /* flush a journal list, both commit and real blocks
2357 **
2358 ** always set flushall to 1, unless you are calling from inside
2359@@ -909,29 +1028,27 @@
2360 unsigned long j_len_saved = jl->j_len ;
2361
2362 if (j_len_saved <= 0) {
2363- return 0 ;
2364+ BUG();
2365 }
2366
2367 if (atomic_read(&SB_JOURNAL(s)->j_wcount) != 0) {
2368 reiserfs_warning(s, "clm-2048: flush_journal_list called with wcount %d\n",
2369 atomic_read(&SB_JOURNAL(s)->j_wcount)) ;
2370 }
2371- /* if someone is getting the commit list, we must wait for them */
2372- while (atomic_read(&(jl->j_commit_flushing))) {
2373- sleep_on(&(jl->j_commit_wait)) ;
2374- }
2375- /* if someone is flushing this list, we must wait for them */
2376- while (atomic_read(&(jl->j_flushing))) {
2377- sleep_on(&(jl->j_flush_wait)) ;
2378- }
2379
2380- /* this list is now ours, we can change anything we want */
2381- atomic_set(&(jl->j_flushing), 1) ;
2382+ if (jl->j_trans_id == 0)
2383+ BUG();
2384+
2385+ /* if flushall == 0, the lock is already held */
2386+ if (flushall) {
2387+ down(&SB_JOURNAL(s)->j_flush_sem);
2388+ } else if (!down_trylock(&SB_JOURNAL(s)->j_flush_sem)) {
2389+ BUG();
2390+ }
2391
2392 count = 0 ;
2393 if (j_len_saved > SB_JOURNAL_TRANS_MAX(s)) {
2394- reiserfs_panic(s, "journal-715: flush_journal_list, length is %lu, list number %d\n", j_len_saved, jl - SB_JOURNAL_LIST(s)) ;
2395- atomic_dec(&(jl->j_flushing)) ;
2396+ reiserfs_panic(s, "journal-715: flush_journal_list, length is %lu, transid %lu\n", j_len_saved, jl->j_trans_id) ;
2397 return 0 ;
2398 }
2399
2400@@ -981,13 +1098,13 @@
2401 get_bh(saved_bh) ;
2402
2403 if (buffer_journal_dirty(saved_bh)) {
2404+ if (!can_dirty(cn))
2405+ BUG();
2406 was_jwait = 1 ;
2407- mark_buffer_notjournal_dirty(saved_bh) ;
2408- /* undo the inc from journal_mark_dirty */
2409- put_bh(saved_bh) ;
2410- }
2411- if (can_dirty(cn)) {
2412- was_dirty = 1 ;
2413+ was_dirty = 1;
2414+ } else if (can_dirty(cn)) {
2415+ /* everything with !pjl && jwait should be writable */
2416+ BUG();
2417 }
2418 }
2419
2420@@ -995,7 +1112,8 @@
2421 ** sure they are commited, and don't try writing it to disk
2422 */
2423 if (pjl) {
2424- flush_commit_list(s, pjl, 1) ;
2425+ if (atomic_read(&pjl->j_commit_left))
2426+ flush_commit_list(s, pjl, 1) ;
2427 goto free_cnode ;
2428 }
2429
2430@@ -1029,7 +1147,12 @@
2431 /* we inc again because saved_bh gets decremented at free_cnode */
2432 get_bh(saved_bh) ;
2433 set_bit(BLOCK_NEEDS_FLUSH, &cn->state) ;
2434+ lock_buffer(saved_bh);
2435 submit_logged_buffer(saved_bh) ;
2436+ if (cn->blocknr != saved_bh->b_blocknr) {
2437+printk("cn %lu does not match bh %lu\n", cn->blocknr, saved_bh->b_blocknr);
2438+ BUG();
2439+ }
2440 count++ ;
2441 } else {
2442 reiserfs_warning(s, "clm-2082: Unable to flush buffer %lu in flush_journal_list\n",
2443@@ -1057,9 +1180,23 @@
2444 if (!cn->bh) {
2445 reiserfs_panic(s, "journal-1012: cn->bh is NULL\n") ;
2446 }
2447+ if (cn->blocknr != cn->bh->b_blocknr) {
2448+printk("2cn %lu does not match bh %lu\n", cn->blocknr, cn->bh->b_blocknr);
2449+ BUG();
2450+ }
2451 if (!buffer_uptodate(cn->bh)) {
2452- reiserfs_panic(s, "journal-949: buffer write failed\n") ;
2453+ reiserfs_panic(s, "journal-949: buffer %lu write failed\n", cn->bh->b_blocknr) ;
2454 }
2455+
2456+ /* note, we must clear the JDirty_wait bit after the up to date
2457+ ** check, otherwise we race against our flushpage routine
2458+ */
2459+ if (!test_and_clear_bit(BH_JDirty_wait, &cn->bh->b_state))
2460+ BUG();
2461+
2462+ /* undo the inc from journal_mark_dirty */
2463+ put_bh(cn->bh) ;
2464+
2465 refile_buffer(cn->bh) ;
2466 brelse(cn->bh) ;
2467 }
2468@@ -1074,7 +1211,7 @@
2469 ** replayed after a crash
2470 */
2471 if (flushall) {
2472- flush_older_journal_lists(s, jl, jl->j_trans_id) ;
2473+ flush_older_journal_lists(s, jl);
2474 }
2475
2476 /* before we can remove everything from the hash tables for this
2477@@ -1089,46 +1226,137 @@
2478 update_journal_header_block(s, (jl->j_start + jl->j_len + 2) % SB_ONDISK_JOURNAL_SIZE(s), jl->j_trans_id) ;
2479 }
2480 remove_all_from_journal_list(s, jl, 0) ;
2481+ list_del(&jl->j_list);
2482+ SB_JOURNAL(s)->j_num_lists--;
2483+ del_from_work_list(s, jl);
2484+
2485+ if (SB_JOURNAL(s)->j_last_flush_id != 0 &&
2486+ (jl->j_trans_id - SB_JOURNAL(s)->j_last_flush_id) != 1) {
2487+ reiserfs_warning(s, "clm-2201: dev %s, last flush %lu, current %lu\n",
2488+ kdevname(s->s_dev), SB_JOURNAL(s)->j_last_flush_id,
2489+ SB_JOURNAL(s)->j_last_flush_id);
2490+ }
2491+ SB_JOURNAL(s)->j_last_flush_id = jl->j_trans_id;
2492+
2493+ /* not strictly required since we are freeing the list, but it should
2494+ * help find code using dead lists later on
2495+ */
2496 jl->j_len = 0 ;
2497 atomic_set(&(jl->j_nonzerolen), 0) ;
2498 jl->j_start = 0 ;
2499 jl->j_realblock = NULL ;
2500 jl->j_commit_bh = NULL ;
2501 jl->j_trans_id = 0 ;
2502- atomic_dec(&(jl->j_flushing)) ;
2503- wake_up(&(jl->j_flush_wait)) ;
2504+ jl->j_state = 0;
2505+
2506+ if (!list_empty(&jl->j_ordered_bh_list))
2507+ BUG();
2508+
2509+ if (!list_empty(&jl->j_tail_bh_list))
2510+ BUG();
2511+
2512+ // kmem_cache_free(journal_list_cachep, jl);
2513+ reiserfs_kfree(jl, sizeof(struct reiserfs_journal_list), s);
2514+
2515+ if (flushall)
2516+ up(&SB_JOURNAL(s)->j_flush_sem);
2517 return 0 ;
2518 }
2519
2520
2521-static int kupdate_one_transaction(struct super_block *s,
2522+#define CHUNK_SIZE 32
2523+struct buffer_chunk {
2524+ struct buffer_head *bh[CHUNK_SIZE];
2525+ int nr;
2526+};
2527+
2528+static void write_chunk(struct buffer_chunk *chunk) {
2529+ int i;
2530+ for (i = 0; i < chunk->nr ; i++) {
2531+ submit_logged_buffer(chunk->bh[i]) ;
2532+ }
2533+ chunk->nr = 0;
2534+}
2535+
2536+static void add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh) {
2537+ if (chunk->nr >= CHUNK_SIZE)
2538+ BUG();
2539+ chunk->bh[chunk->nr++] = bh;
2540+ if (chunk->nr >= CHUNK_SIZE)
2541+ write_chunk(chunk);
2542+}
2543+
2544+static int write_one_transaction(struct super_block *s,
2545+ struct reiserfs_journal_list *jl,
2546+ struct buffer_chunk *chunk)
2547+{
2548+ struct reiserfs_journal_list *pjl ; /* previous list for this cn */
2549+ struct reiserfs_journal_cnode *cn;
2550+ int ret = 0 ;
2551+
2552+ jl->j_state |= LIST_TOUCHED;
2553+ if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0) {
2554+ del_from_work_list(s, jl);
2555+ return 0;
2556+ }
2557+ del_from_work_list(s, jl);
2558+
2559+ cn = jl->j_realblock ;
2560+ while(cn) {
2561+ /* if the blocknr == 0, this has been cleared from the hash,
2562+ ** skip it
2563+ */
2564+ if (cn->blocknr == 0) {
2565+ goto next ;
2566+ }
2567+ /* look for a more recent transaction that logged this
2568+ ** buffer. Only the most recent transaction with a buffer in
2569+ ** it is allowed to send that buffer to disk
2570+ */
2571+ pjl = find_newer_jl_for_cn(cn) ;
2572+ if (!pjl && cn->bh && buffer_journal_dirty(cn->bh) && can_dirty(cn)) {
2573+ if (!test_bit(BH_JPrepared, &cn->bh->b_state)) {
2574+ struct buffer_head *tmp_bh;
2575+ /* we can race against journal_mark_freed when we try
2576+ * to lock_buffer(cn->bh), so we have to inc the buffer
2577+ * count, and recheck things after locking
2578+ */
2579+ tmp_bh = cn->bh;
2580+ get_bh(tmp_bh);
2581+ set_bit(BLOCK_NEEDS_FLUSH, &cn->state) ;
2582+ lock_buffer(tmp_bh);
2583+ if (cn->bh && buffer_journal_dirty(tmp_bh) &&
2584+ !test_bit(BH_JPrepared, &tmp_bh->b_state))
2585+ {
2586+ add_to_chunk(chunk, tmp_bh);
2587+ ret++;
2588+ } else {
2589+ /* note, cn->bh might be null now */
2590+ unlock_buffer(tmp_bh);
2591+ }
2592+ put_bh(tmp_bh);
2593+ }
2594+ }
2595+next:
2596+ cn = cn->next ;
2597+ if (current->need_resched)
2598+ schedule();
2599+ }
2600+ return ret ;
2601+}
2602+
2603+static int wait_one_transaction(struct super_block *s,
2604 struct reiserfs_journal_list *jl)
2605 {
2606 struct reiserfs_journal_list *pjl ; /* previous list for this cn */
2607 struct reiserfs_journal_cnode *cn, *walk_cn ;
2608 unsigned long blocknr ;
2609- int run = 0 ;
2610- int orig_trans_id = jl->j_trans_id ;
2611 struct buffer_head *saved_bh ;
2612 int ret = 0 ;
2613
2614- /* if someone is getting the commit list, we must wait for them */
2615- while (atomic_read(&(jl->j_commit_flushing))) {
2616- sleep_on(&(jl->j_commit_wait)) ;
2617- }
2618- /* if someone is flushing this list, we must wait for them */
2619- while (atomic_read(&(jl->j_flushing))) {
2620- sleep_on(&(jl->j_flush_wait)) ;
2621- }
2622- /* was it flushed while we slept? */
2623- if (jl->j_len <= 0 || jl->j_trans_id != orig_trans_id) {
2624- return 0 ;
2625+ if (atomic_read(&jl->j_commit_left) != 0 || jl->j_len <= 0) {
2626+ BUG();
2627 }
2628-
2629- /* this list is now ours, we can change anything we want */
2630- atomic_set(&(jl->j_flushing), 1) ;
2631-
2632-loop_start:
2633 cn = jl->j_realblock ;
2634 while(cn) {
2635 saved_bh = NULL ;
2636@@ -1143,27 +1371,14 @@
2637 ** it is allowed to send that buffer to disk
2638 */
2639 pjl = find_newer_jl_for_cn(cn) ;
2640- if (run == 0 && !pjl && cn->bh && buffer_journal_dirty(cn->bh) &&
2641- can_dirty(cn))
2642- {
2643- if (!test_bit(BH_JPrepared, &cn->bh->b_state)) {
2644- set_bit(BLOCK_NEEDS_FLUSH, &cn->state) ;
2645- submit_logged_buffer(cn->bh) ;
2646- } else {
2647- /* someone else is using this buffer. We can't
2648- ** send it to disk right now because they might
2649- ** be changing/logging it.
2650- */
2651- ret = 1 ;
2652- }
2653- } else if (test_bit(BLOCK_NEEDS_FLUSH, &cn->state)) {
2654+ if (test_bit(BLOCK_NEEDS_FLUSH, &cn->state)) {
2655 clear_bit(BLOCK_NEEDS_FLUSH, &cn->state) ;
2656 if (!pjl && cn->bh) {
2657 wait_on_buffer(cn->bh) ;
2658- }
2659- /* check again, someone could have logged while we scheduled */
2660- pjl = find_newer_jl_for_cn(cn) ;
2661+ /* check again, someone could have logged while we scheduled */
2662+ pjl = find_newer_jl_for_cn(cn) ;
2663
2664+ }
2665 /* before the JDirty_wait bit is set, the
2666 ** buffer is added to the hash list. So, if we are
2667 ** run in the middle of a do_journal_end, we will notice
2668@@ -1210,60 +1425,182 @@
2669 }
2670 next:
2671 cn = cn->next ;
2672+ if (current->need_resched)
2673+ schedule();
2674 }
2675- /* the first run through the loop sends all the dirty buffers to
2676- ** ll_rw_block.
2677- ** the second run through the loop does all the accounting
2678- */
2679- if (run++ == 0) {
2680- goto loop_start ;
2681+ return ret ;
2682+}
2683+
2684+static int kupdate_transactions(struct super_block *s,
2685+ struct reiserfs_journal_list *jl,
2686+ struct reiserfs_journal_list **next_jl,
2687+ unsigned long *next_trans_id,
2688+ int num_blocks,
2689+ int num_trans) {
2690+ int ret = 0;
2691+ int written = 0 ;
2692+ int transactions_flushed = 0;
2693+ unsigned long orig_trans_id = jl->j_trans_id;
2694+ struct reiserfs_journal_list *orig_jl = jl;
2695+ struct buffer_chunk chunk;
2696+ struct list_head *entry;
2697+ chunk.nr = 0;
2698+
2699+ down(&SB_JOURNAL(s)->j_flush_sem);
2700+ if (!journal_list_still_alive(s, orig_trans_id)) {
2701+ goto done;
2702 }
2703
2704- atomic_set(&(jl->j_flushing), 0) ;
2705- wake_up(&(jl->j_flush_wait)) ;
2706- return ret ;
2707+ /* we've got j_flush_sem held, nobody is going to delete any
2708+ * of these lists out from underneath us
2709+ */
2710+ while((num_trans && transactions_flushed < num_trans) ||
2711+ (!num_trans && written < num_blocks)) {
2712+
2713+ if (jl->j_len == 0 || (jl->j_state & LIST_TOUCHED) ||
2714+ atomic_read(&jl->j_commit_left))
2715+ {
2716+ del_from_work_list(s, jl);
2717+ break;
2718+ }
2719+ ret = write_one_transaction(s, jl, &chunk);
2720+
2721+ if (ret < 0)
2722+ goto done;
2723+ transactions_flushed++;
2724+ written += ret;
2725+ entry = jl->j_list.next;
2726+
2727+ /* did we wrap? */
2728+ if (entry == &SB_JOURNAL(s)->j_journal_list) {
2729+ break;
2730+ }
2731+ jl = JOURNAL_LIST_ENTRY(entry);
2732+
2733+ /* don't bother with older transactions */
2734+ if (jl->j_trans_id <= orig_trans_id)
2735+ break;
2736+ }
2737+ if (chunk.nr) {
2738+ write_chunk(&chunk);
2739+ }
2740+
2741+ jl = orig_jl;
2742+ *next_jl = jl;
2743+ *next_trans_id = jl->j_trans_id;
2744+ ret = transactions_flushed;
2745+ while(transactions_flushed--) {
2746+
2747+ wait_one_transaction(s, jl);
2748+ entry = jl->j_list.next;
2749+ jl = JOURNAL_LIST_ENTRY(entry);
2750+
2751+ /* make sure we can really count */
2752+ if (jl->j_trans_id <= orig_trans_id && transactions_flushed > 0) {
2753+printk("flushing %s %lu, orig_trans_id was %lu\n", kdevname(s->s_dev), jl->j_trans_id, orig_trans_id);
2754+ BUG();
2755+ }
2756+ *next_jl = jl;
2757+ *next_trans_id = jl->j_trans_id;
2758+ }
2759+
2760+done:
2761+ up(&SB_JOURNAL(s)->j_flush_sem);
2762+ return ret;
2763 }
2764+
2765+/* for o_sync and fsync heavy applications, they tend to use
2766+** all the journa list slots with tiny transactions. These
2767+** trigger lots and lots of calls to update the header block, which
2768+** adds seeks and slows things down.
2769+**
2770+** This function tries to clear out a large chunk of the journal lists
2771+** at once, which makes everything faster since only the newest journal
2772+** list updates the header block
2773+*/
2774+static int flush_used_journal_lists(struct super_block *s,
2775+ struct reiserfs_journal_list *jl) {
2776+ unsigned long len = 0;
2777+ unsigned long cur_len;
2778+ int ret;
2779+ int i;
2780+ struct reiserfs_journal_list *tjl;
2781+ struct reiserfs_journal_list *flush_jl;
2782+ unsigned long trans_id;
2783+
2784+ flush_jl = tjl = jl;
2785+
2786+ /* flush for 256 transactions or 256 blocks, whichever comes first */
2787+ for(i = 0 ; i < 256 && len < 256 ; i++) {
2788+ if (atomic_read(&tjl->j_commit_left) ||
2789+ tjl->j_trans_id < jl->j_trans_id) {
2790+ break;
2791+ }
2792+ cur_len = atomic_read(&tjl->j_nonzerolen);
2793+ if (cur_len > 0) {
2794+ tjl->j_state &= ~LIST_TOUCHED;
2795+ }
2796+ len += cur_len;
2797+ flush_jl = tjl;
2798+ if (tjl->j_list.next == &SB_JOURNAL(s)->j_journal_list)
2799+ break;
2800+ tjl = JOURNAL_LIST_ENTRY(tjl->j_list.next);
2801+ }
2802+ /* try to find a group of blocks we can flush across all the
2803+ ** transactions, but only bother if we've actually spanned
2804+ ** across multiple lists
2805+ */
2806+ if (flush_jl != jl) {
2807+ ret = kupdate_transactions(s, jl, &tjl, &trans_id, len, i);
2808+ }
2809+ flush_journal_list(s, flush_jl, 1) ;
2810+ return 0;
2811+}
2812+
2813+
2814 /* since we never give dirty buffers to bdflush/kupdate, we have to
2815 ** flush them ourselves. This runs through the journal lists, finds
2816 ** old metadata in need of flushing and sends it to disk.
2817 ** this does not end transactions, commit anything, or free
2818 ** cnodes.
2819-**
2820-** returns the highest transaction id that was flushed last time
2821 */
2822 static unsigned long reiserfs_journal_kupdate(struct super_block *s) {
2823- struct reiserfs_journal_list *jl ;
2824- int i ;
2825- int start ;
2826+ struct reiserfs_journal_list *jl, *next_jl;
2827+ unsigned long trans_id, next_trans_id;
2828 time_t age ;
2829- int ret = 0 ;
2830
2831- start = SB_JOURNAL_LIST_INDEX(s) ;
2832+ jl = JOURNAL_WORK_ENTRY(SB_JOURNAL(s)->j_working_list.next);
2833
2834- /* safety check to prevent flush attempts during a mount */
2835- if (start < 0) {
2836+restart:
2837+ /* kupdate transactions might not set next_trans_id, it must be
2838+ * initialized before each call
2839+ */
2840+ next_trans_id = 0;
2841+ if (list_empty(&SB_JOURNAL(s)->j_working_list)) {
2842 return 0 ;
2843 }
2844- i = (start + 1) % JOURNAL_LIST_COUNT ;
2845- while(i != start) {
2846- jl = SB_JOURNAL_LIST(s) + i ;
2847- age = CURRENT_TIME - jl->j_timestamp ;
2848- if (jl->j_len > 0 && // age >= (JOURNAL_MAX_COMMIT_AGE * 2) &&
2849- atomic_read(&(jl->j_nonzerolen)) > 0 &&
2850- atomic_read(&(jl->j_commit_left)) == 0) {
2851+ trans_id = jl->j_trans_id;
2852
2853- if (jl->j_trans_id == SB_JOURNAL(s)->j_trans_id) {
2854- break ;
2855- }
2856- /* if ret was already 1, we want to preserve that */
2857- ret |= kupdate_one_transaction(s, jl) ;
2858- }
2859- if (atomic_read(&(jl->j_nonzerolen)) > 0) {
2860- ret |= 1 ;
2861- }
2862- i = (i + 1) % JOURNAL_LIST_COUNT ;
2863+ /* check for race with the code that frees lists */
2864+ if (jl->j_trans_id == 0)
2865+ BUG();
2866+ age = CURRENT_TIME - jl->j_timestamp ;
2867+ if (age >= SB_JOURNAL_MAX_COMMIT_AGE(s) &&
2868+ atomic_read(&jl->j_nonzerolen) > 0 &&
2869+ atomic_read(&jl->j_commit_left) == 0)
2870+ {
2871+ if (kupdate_transactions(s, jl, &next_jl, &next_trans_id, 32, 32) < 0)
2872+ return 0;
2873+ if (next_jl != JOURNAL_WORK_ENTRY(&SB_JOURNAL(s)->j_working_list) &&
2874+ next_trans_id > trans_id)
2875+ {
2876+ if (journal_list_still_alive(s, next_trans_id)) {
2877+ jl = next_jl;
2878+ goto restart;
2879+ }
2880+ }
2881 }
2882- return ret ;
2883+ return 0;
2884 }
2885
2886 /*
2887@@ -1307,6 +1644,12 @@
2888 }
2889
2890 static void free_journal_ram(struct super_block *p_s_sb) {
2891+
2892+ // kmem_cache_free(journal_list_cachep, SB_JOURNAL(p_s_sb)->j_current_jl);
2893+ reiserfs_kfree(SB_JOURNAL(p_s_sb)->j_current_jl,
2894+ sizeof(struct reiserfs_journal_list), p_s_sb);
2895+ SB_JOURNAL(p_s_sb)->j_num_lists--;
2896+
2897 vfree(SB_JOURNAL(p_s_sb)->j_cnode_free_orig) ;
2898 free_list_bitmaps(p_s_sb, SB_JOURNAL(p_s_sb)->j_list_bitmap) ;
2899 free_bitmap_nodes(p_s_sb) ; /* must be after free_list_bitmaps */
2900@@ -1327,6 +1670,10 @@
2901 static int do_journal_release(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, int error) {
2902 struct reiserfs_transaction_handle myth ;
2903
2904+ down(&kreiserfsd_sem);
2905+ list_del(&p_s_sb->u.reiserfs_sb.s_reiserfs_supers);
2906+ up(&kreiserfsd_sem);
2907+
2908 /* we only want to flush out transactions if we were called with error == 0
2909 */
2910 if (!error && !(p_s_sb->s_flags & MS_RDONLY)) {
2911@@ -1813,66 +2160,6 @@
2912 return 0 ;
2913 }
2914
2915-
2916-struct reiserfs_journal_commit_task {
2917- struct super_block *p_s_sb ;
2918- int jindex ;
2919- int wake_on_finish ; /* if this is one, we wake the task_done queue, if it
2920- ** is zero, we free the whole struct on finish
2921- */
2922- struct reiserfs_journal_commit_task *self ;
2923- struct wait_queue *task_done ;
2924- struct tq_struct task ;
2925-} ;
2926-
2927-static void reiserfs_journal_commit_task_func(struct reiserfs_journal_commit_task *ct) {
2928-
2929- struct reiserfs_journal_list *jl ;
2930- jl = SB_JOURNAL_LIST(ct->p_s_sb) + ct->jindex ;
2931-
2932- flush_commit_list(ct->p_s_sb, SB_JOURNAL_LIST(ct->p_s_sb) + ct->jindex, 1) ;
2933-
2934- if (jl->j_len > 0 && atomic_read(&(jl->j_nonzerolen)) > 0 &&
2935- atomic_read(&(jl->j_commit_left)) == 0) {
2936- kupdate_one_transaction(ct->p_s_sb, jl) ;
2937- }
2938- reiserfs_kfree(ct->self, sizeof(struct reiserfs_journal_commit_task), ct->p_s_sb) ;
2939-}
2940-
2941-static void setup_commit_task_arg(struct reiserfs_journal_commit_task *ct,
2942- struct super_block *p_s_sb,
2943- int jindex) {
2944- if (!ct) {
2945- reiserfs_panic(NULL, "journal-1360: setup_commit_task_arg called with NULL struct\n") ;
2946- }
2947- ct->p_s_sb = p_s_sb ;
2948- ct->jindex = jindex ;
2949- ct->task_done = NULL ;
2950- INIT_LIST_HEAD(&ct->task.list) ;
2951- ct->task.sync = 0 ;
2952- ct->task.routine = (void *)(void *)reiserfs_journal_commit_task_func ;
2953- ct->self = ct ;
2954- ct->task.data = (void *)ct ;
2955-}
2956-
2957-static void commit_flush_async(struct super_block *p_s_sb, int jindex) {
2958- struct reiserfs_journal_commit_task *ct ;
2959- /* using GFP_NOFS, GFP_KERNEL could try to flush inodes, which will try
2960- ** to start/join a transaction, which will deadlock
2961- */
2962- ct = reiserfs_kmalloc(sizeof(struct reiserfs_journal_commit_task), GFP_NOFS, p_s_sb) ;
2963- if (ct) {
2964- setup_commit_task_arg(ct, p_s_sb, jindex) ;
2965- queue_task(&(ct->task), &reiserfs_commit_thread_tq);
2966- wake_up(&reiserfs_commit_thread_wait) ;
2967- } else {
2968-#ifdef CONFIG_REISERFS_CHECK
2969- reiserfs_warning(p_s_sb, "journal-1540: kmalloc failed, doing sync commit\n") ;
2970-#endif
2971- flush_commit_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + jindex, 1) ;
2972- }
2973-}
2974-
2975 /*
2976 ** this is the commit thread. It is started with kernel_thread on
2977 ** FS mount, and journal_release() waits for it to exit.
2978@@ -1885,6 +2172,9 @@
2979 ** then run the per filesystem commit task queue when we wakeup.
2980 */
2981 static int reiserfs_journal_commit_thread(void *nullp) {
2982+ struct list_head *entry, *safe ;
2983+ struct super_block *s;
2984+ time_t last_run = 0;
2985
2986 daemonize() ;
2987
2988@@ -1897,13 +2187,73 @@
2989 lock_kernel() ;
2990 while(1) {
2991
2992- while(TQ_ACTIVE(reiserfs_commit_thread_tq)) {
2993- run_task_queue(&reiserfs_commit_thread_tq) ;
2994+restart:
2995+ down(&kreiserfsd_sem);
2996+ list_for_each_safe(entry, safe, &kreiserfsd_supers) {
2997+ s = list_entry(entry, struct super_block,
2998+ u.reiserfs_sb.s_reiserfs_supers);
2999+ if (!(s->s_flags & MS_RDONLY)) {
3000+ flush_async_commits(s);
3001+
3002+ if (CURRENT_TIME - last_run > 5) {
3003+ reiserfs_flush_old_commits(s);
3004+ }
3005+
3006+ if (!list_empty(&SB_JOURNAL(s)->j_working_list)) {
3007+ struct reiserfs_journal_list *jl, *tjl;
3008+ unsigned long trans_id ;
3009+ unsigned long start;
3010+ unsigned long cur_start;
3011+ unsigned long nfract = SB_ONDISK_JOURNAL_SIZE(s) / 4;
3012+ int ret;
3013+
3014+ jl = JOURNAL_WORK_ENTRY(SB_JOURNAL(s)->j_working_list.next);
3015+ cur_start = SB_JOURNAL(s)->j_start;
3016+ start = jl->j_start;
3017+
3018+ /* pretend the log doesn't actually wrap */
3019+ if (cur_start < start) {
3020+ cur_start = cur_start + SB_ONDISK_JOURNAL_SIZE(s);
3021+ }
3022+
3023+ /* if the first transaction on the working list is more
3024+ * than nfract blocks away from the current transaction start
3025+ * or there are more than 128 working lists, start
3026+ * a background flush
3027+ */
3028+ if (cur_start - start > nfract ||
3029+ SB_JOURNAL(s)->j_num_work_lists > 32) {
3030+ tjl=JOURNAL_LIST_ENTRY(SB_JOURNAL(s)->j_journal_list.next);
3031+ ret = kupdate_transactions(s, jl, &tjl, &trans_id,32,128);
3032+ }
3033+ }
3034+ }
3035 }
3036+ /* check again for new async commits that need tending */
3037+ list_for_each_safe(entry, safe, &kreiserfsd_supers) {
3038+ s = list_entry(entry, struct super_block,
3039+ u.reiserfs_sb.s_reiserfs_supers);
3040+ if (!list_empty(&SB_JOURNAL(s)->j_journal_list)) {
3041+ struct reiserfs_journal_list *jl;
3042+ struct list_head *entry;
3043+
3044+ /* last entry is the youngest, commit it and you get everything */
3045+ entry = SB_JOURNAL(s)->j_journal_list.prev;
3046+ jl = JOURNAL_LIST_ENTRY(entry);
3047+ if (!atomic_read(&(jl->j_older_commits_done))) {
3048+ /* give new mounts a chance to come in */
3049+ up(&kreiserfsd_sem);
3050+ last_run = CURRENT_TIME;
3051+ wake_up_all(&reiserfs_commit_thread_done) ;
3052+ goto restart;
3053+ }
3054+ }
3055+ }
3056+ up(&kreiserfsd_sem);
3057+ last_run = CURRENT_TIME;
3058
3059 /* if there aren't any more filesystems left, break */
3060 if (reiserfs_mounted_fs_count <= 0) {
3061- run_task_queue(&reiserfs_commit_thread_tq) ;
3062 break ;
3063 }
3064 wake_up(&reiserfs_commit_thread_done) ;
3065@@ -1914,12 +2264,28 @@
3066 return 0 ;
3067 }
3068
3069+static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s)
3070+{
3071+ struct reiserfs_journal_list *jl;
3072+retry:
3073+ // jl = (struct reiserfs_journal_list *)kmem_cache_alloc(journal_list_cachep, SLAB_NOFS);
3074+ jl = reiserfs_kmalloc(sizeof(struct reiserfs_journal_list), GFP_NOFS, s);
3075+ if (!jl) {
3076+ yield();
3077+ goto retry;
3078+ }
3079+ memset(jl, 0, sizeof(*jl));
3080+ INIT_LIST_HEAD(&jl->j_list);
3081+ INIT_LIST_HEAD(&jl->j_working_list);
3082+ INIT_LIST_HEAD(&jl->j_ordered_bh_list);
3083+ INIT_LIST_HEAD(&jl->j_tail_bh_list);
3084+ sema_init(&jl->j_commit_lock, 1);
3085+ SB_JOURNAL(s)->j_num_lists++;
3086+ return jl;
3087+}
3088+
3089 static void journal_list_init(struct super_block *p_s_sb) {
3090- int i ;
3091- for (i = 0 ; i < JOURNAL_LIST_COUNT ; i++) {
3092- init_waitqueue_head(&(SB_JOURNAL_LIST(p_s_sb)[i].j_commit_wait)) ;
3093- init_waitqueue_head(&(SB_JOURNAL_LIST(p_s_sb)[i].j_flush_wait)) ;
3094- }
3095+ SB_JOURNAL(p_s_sb)->j_current_jl = alloc_journal_list(p_s_sb);
3096 }
3097
3098 static int release_journal_dev( struct super_block *super,
3099@@ -1952,7 +2318,6 @@
3100 int blkdev_mode = FMODE_READ | FMODE_WRITE;
3101
3102 result = 0;
3103-
3104 journal -> j_dev_bd = NULL;
3105 journal -> j_dev_file = NULL;
3106 jdev = SB_JOURNAL_DEV( super ) =
3107@@ -2030,7 +2395,6 @@
3108 printk( "journal_init_dev: journal device: %s", kdevname( SB_JOURNAL_DEV( super ) ) );
3109 return result;
3110 }
3111-
3112 /*
3113 ** must be called once on fs mount. calls journal_read for you
3114 */
3115@@ -2041,6 +2405,7 @@
3116 struct reiserfs_super_block * rs;
3117 struct reiserfs_journal_header *jh;
3118 struct reiserfs_journal *journal;
3119+ struct reiserfs_journal_list *jl;
3120
3121 if (sizeof(struct reiserfs_journal_commit) != 4096 ||
3122 sizeof(struct reiserfs_journal_desc) != 4096) {
3123@@ -2054,7 +2419,6 @@
3124 reiserfs_warning(p_s_sb, "Journal size %d is less than 512+1 blocks, which unsupported\n", SB_ONDISK_JOURNAL_SIZE(p_s_sb));
3125 return 1 ;
3126 }
3127-
3128 journal = SB_JOURNAL(p_s_sb) = vmalloc(sizeof (struct reiserfs_journal)) ;
3129 if (!journal) {
3130 reiserfs_warning(p_s_sb, "journal-1256: unable to get memory for journal structure\n") ;
3131@@ -2155,15 +2519,9 @@
3132 SB_JOURNAL_MAX_BATCH(p_s_sb) = SB_JOURNAL_TRANS_MAX(p_s_sb)*9 / 10;
3133 }
3134 }
3135-
3136 brelse (bhjh);
3137
3138 SB_JOURNAL(p_s_sb)->j_list_bitmap_index = 0 ;
3139- SB_JOURNAL_LIST_INDEX(p_s_sb) = -10000 ; /* make sure flush_old_commits does not try to flush a list while replay is on */
3140-
3141- /* clear out the journal list array */
3142- memset(SB_JOURNAL_LIST(p_s_sb), 0,
3143- sizeof(struct reiserfs_journal_list) * JOURNAL_LIST_COUNT) ;
3144
3145 journal_list_init(p_s_sb) ;
3146
3147@@ -2171,8 +2529,6 @@
3148 JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *)) ;
3149 memset(journal_writers, 0, sizeof(char *) * 512) ; /* debug code */
3150
3151- INIT_LIST_HEAD(&(SB_JOURNAL(p_s_sb)->j_dirty_buffers)) ;
3152-
3153 SB_JOURNAL(p_s_sb)->j_start = 0 ;
3154 SB_JOURNAL(p_s_sb)->j_len = 0 ;
3155 SB_JOURNAL(p_s_sb)->j_len_alloc = 0 ;
3156@@ -2182,13 +2538,15 @@
3157 SB_JOURNAL(p_s_sb)->j_last = NULL ;
3158 SB_JOURNAL(p_s_sb)->j_first = NULL ;
3159 init_waitqueue_head(&(SB_JOURNAL(p_s_sb)->j_join_wait)) ;
3160- init_waitqueue_head(&(SB_JOURNAL(p_s_sb)->j_wait)) ;
3161-
3162+ sema_init(&SB_JOURNAL(p_s_sb)->j_lock, 1);
3163+ sema_init(&SB_JOURNAL(p_s_sb)->j_flush_sem, 1);
3164+ INIT_LIST_HEAD (&SB_JOURNAL(p_s_sb)->j_journal_list);
3165+ INIT_LIST_HEAD (&SB_JOURNAL(p_s_sb)->j_working_list);
3166+
3167 SB_JOURNAL(p_s_sb)->j_trans_id = 10 ;
3168 SB_JOURNAL(p_s_sb)->j_mount_id = 10 ;
3169 SB_JOURNAL(p_s_sb)->j_state = 0 ;
3170 atomic_set(&(SB_JOURNAL(p_s_sb)->j_jlock), 0) ;
3171- atomic_set(&(SB_JOURNAL(p_s_sb)->j_wlock), 0) ;
3172 SB_JOURNAL(p_s_sb)->j_cnode_free_list = allocate_cnodes(num_cnodes) ;
3173 SB_JOURNAL(p_s_sb)->j_cnode_free_orig = SB_JOURNAL(p_s_sb)->j_cnode_free_list ;
3174 SB_JOURNAL(p_s_sb)->j_cnode_free = SB_JOURNAL(p_s_sb)->j_cnode_free_list ?
3175@@ -2196,8 +2554,9 @@
3176 SB_JOURNAL(p_s_sb)->j_cnode_used = 0 ;
3177 SB_JOURNAL(p_s_sb)->j_must_wait = 0 ;
3178 init_journal_hash(p_s_sb) ;
3179- SB_JOURNAL_LIST(p_s_sb)[0].j_list_bitmap = get_list_bitmap(p_s_sb, SB_JOURNAL_LIST(p_s_sb)) ;
3180- if (!(SB_JOURNAL_LIST(p_s_sb)[0].j_list_bitmap)) {
3181+ jl = SB_JOURNAL(p_s_sb)->j_current_jl;
3182+ jl->j_list_bitmap = get_list_bitmap(p_s_sb, jl) ;
3183+ if (!jl->j_list_bitmap) {
3184 reiserfs_warning(p_s_sb, "journal-2005, get_list_bitmap failed for journal list 0\n") ;
3185 goto free_and_return;
3186 }
3187@@ -2205,8 +2564,6 @@
3188 reiserfs_warning(p_s_sb, "Replay Failure, unable to mount\n") ;
3189 goto free_and_return;
3190 }
3191- /* once the read is done, we can set this where it belongs */
3192- SB_JOURNAL_LIST_INDEX(p_s_sb) = 0 ;
3193
3194 if (reiserfs_dont_log (p_s_sb))
3195 return 0;
3196@@ -2216,6 +2573,9 @@
3197 kernel_thread((void *)(void *)reiserfs_journal_commit_thread, NULL,
3198 CLONE_FS | CLONE_FILES | CLONE_VM) ;
3199 }
3200+ down(&kreiserfsd_sem);
3201+ list_add(&p_s_sb->u.reiserfs_sb.s_reiserfs_supers, &kreiserfsd_supers);
3202+ up(&kreiserfsd_sem);
3203 return 0 ;
3204
3205 free_and_return:
3206@@ -2230,7 +2590,9 @@
3207 */
3208 int journal_transaction_should_end(struct reiserfs_transaction_handle *th, int new_alloc) {
3209 time_t now = CURRENT_TIME ;
3210- if (reiserfs_dont_log(th->t_super))
3211+
3212+ /* cannot restart while nested unless the parent allows it */
3213+ if (!reiserfs_restartable_handle(th) && th->t_refcount > 1)
3214 return 0 ;
3215 if ( SB_JOURNAL(th->t_super)->j_must_wait > 0 ||
3216 (SB_JOURNAL(th->t_super)->j_len_alloc + new_alloc) >= SB_JOURNAL_MAX_BATCH(th->t_super) ||
3217@@ -2239,9 +2601,48 @@
3218 SB_JOURNAL(th->t_super)->j_cnode_free < (SB_JOURNAL_TRANS_MAX(th->t_super) * 3)) {
3219 return 1 ;
3220 }
3221+
3222+ /* we are allowing them to continue in the current transaction, so
3223+ * we have to bump the blocks allocated now.
3224+ */
3225+ th->t_blocks_allocated += new_alloc;
3226+ SB_JOURNAL(th->t_super)->j_len_alloc += new_alloc;
3227+
3228 return 0 ;
3229 }
3230
3231+int
3232+reiserfs_restart_transaction(struct reiserfs_transaction_handle *th, int num) {
3233+ int refcount = th->t_refcount ;
3234+ struct super_block *s = th->t_super ;
3235+ int flags = th->t_flags ;
3236+ int parent_flags = 0;
3237+ struct reiserfs_transaction_handle *saved_th = current->journal_info ;
3238+
3239+ /* if refcount is > 1, saved_th is the parent we've nested into, save
3240+ ** his flags as well. So far, only intermezzo needs this, 99% of the
3241+ ** time it is horribly unsafe.
3242+ */
3243+ if (refcount > 1) {
3244+ if (!reiserfs_restartable_handle(saved_th)) {
3245+ BUG() ;
3246+ }
3247+ th->t_refcount = 1;
3248+ parent_flags = saved_th->t_flags ;
3249+ }
3250+ th->t_flags = 0 ;
3251+ journal_end(th, s, th->t_blocks_allocated) ;
3252+ journal_begin(th, s, num) ;
3253+ th->t_flags = flags;
3254+ if (refcount > 1) {
3255+ current->journal_info = saved_th ;
3256+ th->t_refcount = refcount ;
3257+ memcpy(saved_th, th, sizeof(*th)) ;
3258+ saved_th->t_flags = parent_flags ;
3259+ }
3260+ return 0 ;
3261+}
3262+
3263 /* this must be called inside a transaction, and requires the
3264 ** kernel_lock to be held
3265 */
3266@@ -2268,6 +2669,37 @@
3267 !test_bit(WRITERS_BLOCKED, &SB_JOURNAL(s)->j_state)) ;
3268 }
3269
3270+static void queue_log_writer(struct super_block *s) {
3271+ set_bit(WRITERS_QUEUED, &SB_JOURNAL(s)->j_state);
3272+ sleep_on(&SB_JOURNAL(s)->j_join_wait);
3273+}
3274+
3275+static void wake_queued_writers(struct super_block *s) {
3276+ if (test_and_clear_bit(WRITERS_QUEUED, &SB_JOURNAL(s)->j_state)) {
3277+ wake_up(&SB_JOURNAL(s)->j_join_wait);
3278+ }
3279+}
3280+
3281+static void let_transaction_grow(struct super_block *sb,
3282+ unsigned long trans_id)
3283+{
3284+ unsigned long bcount = SB_JOURNAL(sb)->j_bcount;
3285+ while(1) {
3286+ yield();
3287+ while ((atomic_read(&SB_JOURNAL(sb)->j_wcount) > 0 ||
3288+ atomic_read(&SB_JOURNAL(sb)->j_jlock)) &&
3289+ SB_JOURNAL(sb)->j_trans_id == trans_id) {
3290+ queue_log_writer(sb);
3291+ }
3292+ if (SB_JOURNAL(sb)->j_trans_id != trans_id)
3293+ break;
3294+ if (bcount == SB_JOURNAL(sb)->j_bcount)
3295+ break;
3296+ bcount = SB_JOURNAL(sb)->j_bcount;
3297+ }
3298+}
3299+
3300+
3301 /* join == true if you must join an existing transaction.
3302 ** join == false if you can deal with waiting for others to finish
3303 **
3304@@ -2275,8 +2707,10 @@
3305 ** expect to use in nblocks.
3306 */
3307 static int do_journal_begin_r(struct reiserfs_transaction_handle *th, struct super_block * p_s_sb,unsigned long nblocks,int join) {
3308- time_t now = CURRENT_TIME ;
3309+ time_t now ;
3310 int old_trans_id ;
3311+ struct reiserfs_transaction_handle myth ;
3312+ int sched_count = 0;
3313
3314 reiserfs_check_lock_depth("journal_begin") ;
3315 RFALSE( p_s_sb->s_flags & MS_RDONLY,
3316@@ -2287,9 +2721,14 @@
3317 return 0 ;
3318 }
3319 PROC_INFO_INC( p_s_sb, journal.journal_being );
3320+ /* set here for journal_join */
3321+ th->t_refcount = 1;
3322+ th->t_flags = 0 ;
3323+ th->t_super = p_s_sb ;
3324
3325 relock:
3326 lock_journal(p_s_sb) ;
3327+ SB_JOURNAL(p_s_sb)->j_bcount++ ;
3328
3329 if (test_bit(WRITERS_BLOCKED, &SB_JOURNAL(p_s_sb)->j_state)) {
3330 unlock_journal(p_s_sb) ;
3331@@ -2297,12 +2736,12 @@
3332 PROC_INFO_INC( p_s_sb, journal.journal_relock_writers );
3333 goto relock ;
3334 }
3335+ now = CURRENT_TIME;
3336
3337 /* if there is no room in the journal OR
3338 ** if this transaction is too old, and we weren't called joinable, wait for it to finish before beginning
3339 ** we don't sleep if there aren't other writers
3340 */
3341-
3342 if ( (!join && SB_JOURNAL(p_s_sb)->j_must_wait > 0) ||
3343 ( !join && (SB_JOURNAL(p_s_sb)->j_len_alloc + nblocks + 2) >= SB_JOURNAL_MAX_BATCH(p_s_sb)) ||
3344 (!join && atomic_read(&(SB_JOURNAL(p_s_sb)->j_wcount)) > 0 && SB_JOURNAL(p_s_sb)->j_trans_start_time > 0 &&
3345@@ -2310,54 +2749,128 @@
3346 (!join && atomic_read(&(SB_JOURNAL(p_s_sb)->j_jlock)) ) ||
3347 (!join && SB_JOURNAL(p_s_sb)->j_cnode_free < (SB_JOURNAL_TRANS_MAX(p_s_sb) * 3))) {
3348
3349+ old_trans_id = SB_JOURNAL(p_s_sb)->j_trans_id ;
3350 unlock_journal(p_s_sb) ; /* allow others to finish this transaction */
3351
3352- /* if writer count is 0, we can just force this transaction to end, and start
3353- ** a new one afterwards.
3354- */
3355- if (atomic_read(&(SB_JOURNAL(p_s_sb)->j_wcount)) <= 0) {
3356- struct reiserfs_transaction_handle myth ;
3357- journal_join(&myth, p_s_sb, 1) ;
3358- reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ;
3359- journal_mark_dirty(&myth, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ;
3360- do_journal_end(&myth, p_s_sb,1,COMMIT_NOW) ;
3361+ if (!join && (SB_JOURNAL(p_s_sb)->j_len_alloc + nblocks + 2) >=
3362+ SB_JOURNAL_MAX_BATCH(p_s_sb) &&
3363+ ((SB_JOURNAL(p_s_sb)->j_len + nblocks + 2) * 100) <
3364+ (SB_JOURNAL(p_s_sb)->j_len_alloc * 75))
3365+ {
3366+ if (atomic_read(&SB_JOURNAL(p_s_sb)->j_wcount) > 10) {
3367+ sched_count++;
3368+ queue_log_writer(p_s_sb);
3369+ goto relock;
3370+ }
3371+ }
3372+ /* don't mess with joining the transaction if all we have to do is
3373+ * wait for someone else to do a commit
3374+ */
3375+ if (atomic_read(&SB_JOURNAL(p_s_sb)->j_jlock)) {
3376+ while (SB_JOURNAL(p_s_sb)->j_trans_id == old_trans_id &&
3377+ atomic_read(&SB_JOURNAL(p_s_sb)->j_jlock)) {
3378+ queue_log_writer(p_s_sb);
3379+ }
3380+ goto relock;
3381+ }
3382+ journal_join(&myth, p_s_sb, 1) ;
3383+
3384+ /* someone might have ended the transaction while we joined */
3385+ if (old_trans_id != SB_JOURNAL(p_s_sb)->j_trans_id) {
3386+ do_journal_end(&myth, p_s_sb, 1, 0) ;
3387 } else {
3388- /* but if the writer count isn't zero, we have to wait for the current writers to finish.
3389- ** They won't batch on transaction end once we set j_jlock
3390- */
3391- atomic_set(&(SB_JOURNAL(p_s_sb)->j_jlock), 1) ;
3392- old_trans_id = SB_JOURNAL(p_s_sb)->j_trans_id ;
3393- while(atomic_read(&(SB_JOURNAL(p_s_sb)->j_jlock)) &&
3394- SB_JOURNAL(p_s_sb)->j_trans_id == old_trans_id) {
3395- sleep_on(&(SB_JOURNAL(p_s_sb)->j_join_wait)) ;
3396- }
3397+ do_journal_end(&myth, p_s_sb, 1, COMMIT_NOW) ;
3398 }
3399 PROC_INFO_INC( p_s_sb, journal.journal_relock_wcount );
3400 goto relock ;
3401 }
3402
3403 if (SB_JOURNAL(p_s_sb)->j_trans_start_time == 0) { /* we are the first writer, set trans_id */
3404- SB_JOURNAL(p_s_sb)->j_trans_start_time = now ;
3405+ SB_JOURNAL(p_s_sb)->j_trans_start_time = CURRENT_TIME;
3406 }
3407 atomic_inc(&(SB_JOURNAL(p_s_sb)->j_wcount)) ;
3408 SB_JOURNAL(p_s_sb)->j_len_alloc += nblocks ;
3409 th->t_blocks_logged = 0 ;
3410 th->t_blocks_allocated = nblocks ;
3411- th->t_super = p_s_sb ;
3412 th->t_trans_id = SB_JOURNAL(p_s_sb)->j_trans_id ;
3413- th->t_caller = "Unknown" ;
3414+ reiserfs_set_handle_active(th) ;
3415 unlock_journal(p_s_sb) ;
3416- p_s_sb->s_dirt = 1;
3417 return 0 ;
3418 }
3419
3420+struct reiserfs_transaction_handle *
3421+reiserfs_persistent_transaction(struct super_block *s, unsigned long nblocks) {
3422+ int ret ;
3423+ struct reiserfs_transaction_handle *th ;
3424
3425+ /* if we're nesting into an existing transaction. It will be
3426+ ** persistent on its own
3427+ */
3428+ if (reiserfs_transaction_running(s)) {
3429+ th = current->journal_info ;
3430+ th->t_refcount++ ;
3431+ if (th->t_refcount < 2) {
3432+ BUG() ;
3433+ }
3434+ return th ;
3435+ }
3436+ th = reiserfs_kmalloc(sizeof(struct reiserfs_transaction_handle), GFP_NOFS, s) ;
3437+ if (!th) {
3438+ return ERR_PTR(-ENOMEM) ;
3439+ }
3440+ ret = journal_begin(th, s, nblocks) ;
3441+ if (ret) {
3442+ reiserfs_kfree(th, sizeof(struct reiserfs_transaction_handle), s) ;
3443+ return ERR_PTR(ret) ;
3444+ }
3445+ /* do_journal_end is now responsible for freeing the handle */
3446+ reiserfs_set_handle_persistent(th) ;
3447+ return th ;
3448+}
3449 static int journal_join(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, unsigned long nblocks) {
3450+ struct reiserfs_transaction_handle *cur_th = current->journal_info;
3451+
3452+ /* this keeps do_journal_end from NULLing out the current->journal_info
3453+ ** pointer
3454+ */
3455+ th->t_handle_save = cur_th ;
3456+ if (cur_th && cur_th->t_refcount > 1) {
3457+ BUG() ;
3458+ }
3459 return do_journal_begin_r(th, p_s_sb, nblocks, 1) ;
3460 }
3461
3462 int journal_begin(struct reiserfs_transaction_handle *th, struct super_block * p_s_sb, unsigned long nblocks) {
3463- return do_journal_begin_r(th, p_s_sb, nblocks, 0) ;
3464+ struct reiserfs_transaction_handle *cur_th = current->journal_info ;
3465+ int ret ;
3466+
3467+ th->t_handle_save = NULL ;
3468+ if (cur_th) {
3469+ /* we are nesting into the current transaction */
3470+ if (cur_th->t_super == p_s_sb) {
3471+ cur_th->t_refcount++ ;
3472+ memcpy(th, cur_th, sizeof(*th));
3473+ th->t_flags = 0 ;
3474+ reiserfs_set_handle_active(th) ;
3475+ if (th->t_refcount <= 1)
3476+ printk("BAD: refcount <= 1, but journal_info != 0\n");
3477+ return 0;
3478+ } else {
3479+ /* we've ended up with a handle from a different filesystem.
3480+ ** save it and restore on journal_end. This should never
3481+ ** really happen...
3482+ */
3483+ reiserfs_warning(p_s_sb, "clm-2100: nesting info a different FS\n") ;
3484+ th->t_handle_save = current->journal_info ;
3485+ current->journal_info = th;
3486+ }
3487+ } else {
3488+ current->journal_info = th;
3489+ }
3490+ ret = do_journal_begin_r(th, p_s_sb, nblocks, 0) ;
3491+ if (current->journal_info != th)
3492+ BUG() ;
3493+ return ret ;
3494 }
3495
3496 /* not used at all */
3497@@ -2389,7 +2902,7 @@
3498 reiserfs_panic(th->t_super, "journal-1577: handle trans id %ld != current trans id %ld\n",
3499 th->t_trans_id, SB_JOURNAL(p_s_sb)->j_trans_id);
3500 }
3501- p_s_sb->s_dirt = 1 ;
3502+ p_s_sb->s_dirt = 1;
3503
3504 prepared = test_and_clear_bit(BH_JPrepared, &bh->b_state) ;
3505 /* already in this transaction, we are done */
3506@@ -2413,6 +2926,7 @@
3507
3508 if (atomic_read(&(SB_JOURNAL(p_s_sb)->j_wcount)) <= 0) {
3509 reiserfs_warning(p_s_sb, "journal-1409: journal_mark_dirty returning because j_wcount was %d\n", atomic_read(&(SB_JOURNAL(p_s_sb)->j_wcount))) ;
3510+ BUG();
3511 return 1 ;
3512 }
3513 /* this error means I've screwed up, and we've overflowed the transaction.
3514@@ -2479,25 +2993,36 @@
3515 return 0 ;
3516 }
3517
3518-/*
3519-** if buffer already in current transaction, do a journal_mark_dirty
3520-** otherwise, just mark it dirty and move on. Used for writes to meta blocks
3521-** that don't need journaling
3522-*/
3523-int journal_mark_dirty_nolog(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, struct buffer_head *bh) {
3524- if (reiserfs_dont_log(th->t_super) || buffer_journaled(bh) ||
3525- buffer_journal_dirty(bh)) {
3526- return journal_mark_dirty(th, p_s_sb, bh) ;
3527- }
3528- if (get_journal_hash_dev(SB_JOURNAL(p_s_sb)->j_list_hash_table, bh->b_dev,bh->b_blocknr,bh->b_size)) {
3529- return journal_mark_dirty(th, p_s_sb, bh) ;
3530- }
3531- mark_buffer_dirty(bh) ;
3532- return 0 ;
3533-}
3534-
3535 int journal_end(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, unsigned long nblocks) {
3536- return do_journal_end(th, p_s_sb, nblocks, 0) ;
3537+
3538+ int ret;
3539+ if (!current->journal_info && th->t_refcount > 1)
3540+ printk("REISER-NESTING: th NULL, refcount %d\n", th->t_refcount);
3541+ if (th->t_refcount > 1) {
3542+ struct reiserfs_transaction_handle *cur_th = current->journal_info ;
3543+
3544+ /* we aren't allowed to close a nested transaction on a different
3545+ ** filesystem from the one in the task struct
3546+ */
3547+ if (cur_th->t_super != th->t_super)
3548+ BUG() ;
3549+
3550+ th->t_refcount--;
3551+ if (th != cur_th) {
3552+ int flags = cur_th->t_flags ;
3553+ /* nested handles are never persistent */
3554+ if (reiserfs_persistent_handle(th)) {
3555+ BUG() ;
3556+ }
3557+ memcpy(cur_th, th, sizeof(*th));
3558+ th->t_flags = 0 ;
3559+ cur_th->t_flags = flags ;
3560+ }
3561+ ret = 0;
3562+ } else {
3563+ ret = do_journal_end(th, p_s_sb, nblocks, 0) ;
3564+ }
3565+ return ret;
3566 }
3567
3568 /* removes from the current transaction, relsing and descrementing any counters.
3569@@ -2600,6 +3125,10 @@
3570 */
3571 int journal_end_sync(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, unsigned long nblocks) {
3572
3573+ /* you are not allowed to sync while nested, very, very bad */
3574+ if (th->t_refcount > 1) {
3575+ BUG() ;
3576+ }
3577 if (SB_JOURNAL(p_s_sb)->j_len == 0) {
3578 reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ;
3579 journal_mark_dirty(th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ;
3580@@ -2624,12 +3153,14 @@
3581 **
3582 */
3583 void flush_async_commits(struct super_block *p_s_sb) {
3584- int i ;
3585+ struct reiserfs_journal_list *jl;
3586+ struct list_head *entry;
3587
3588- for (i = 0 ; i < JOURNAL_LIST_COUNT ; i++) {
3589- if (i != SB_JOURNAL_LIST_INDEX(p_s_sb)) {
3590- flush_commit_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + i, 1) ;
3591- }
3592+ if (!list_empty(&SB_JOURNAL(p_s_sb)->j_journal_list)) {
3593+ /* last entry is the youngest, commit it and you get everything */
3594+ entry = SB_JOURNAL(p_s_sb)->j_journal_list.prev;
3595+ jl = JOURNAL_LIST_ENTRY(entry);
3596+ flush_commit_list(p_s_sb, jl, 1);
3597 }
3598 }
3599
3600@@ -2637,58 +3168,39 @@
3601 ** flushes any old transactions to disk
3602 ** ends the current transaction if it is too old
3603 **
3604-** also calls flush_journal_list with old_only == 1, which allows me to reclaim
3605-** memory and such from the journal lists whose real blocks are all on disk.
3606-**
3607-** called by sync_dev_journal from buffer.c
3608 */
3609-int flush_old_commits(struct super_block *p_s_sb, int immediate) {
3610- int i ;
3611- int count = 0;
3612- int start ;
3613- time_t now ;
3614- struct reiserfs_transaction_handle th ;
3615-
3616- start = SB_JOURNAL_LIST_INDEX(p_s_sb) ;
3617- now = CURRENT_TIME ;
3618+int reiserfs_flush_old_commits(struct super_block *p_s_sb) {
3619+ time_t now ;
3620+ struct reiserfs_transaction_handle th ;
3621+
3622+ now = CURRENT_TIME ;
3623+ /* safety check so we don't flush while we are replaying the log during
3624+ * mount
3625+ */
3626+ if (list_empty(&SB_JOURNAL(p_s_sb)->j_journal_list)) {
3627+ return 0 ;
3628+ }
3629
3630- /* safety check so we don't flush while we are replaying the log during mount */
3631- if (SB_JOURNAL_LIST_INDEX(p_s_sb) < 0) {
3632- return 0 ;
3633- }
3634- /* starting with oldest, loop until we get to the start */
3635- i = (SB_JOURNAL_LIST_INDEX(p_s_sb) + 1) % JOURNAL_LIST_COUNT ;
3636- while(i != start) {
3637- if (SB_JOURNAL_LIST(p_s_sb)[i].j_len > 0 && ((now - SB_JOURNAL_LIST(p_s_sb)[i].j_timestamp) > SB_JOURNAL_MAX_COMMIT_AGE(p_s_sb) ||
3638- immediate)) {
3639- /* we have to check again to be sure the current transaction did not change */
3640- if (i != SB_JOURNAL_LIST_INDEX(p_s_sb)) {
3641- flush_commit_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + i, 1) ;
3642- }
3643- }
3644- i = (i + 1) % JOURNAL_LIST_COUNT ;
3645- count++ ;
3646- }
3647- /* now, check the current transaction. If there are no writers, and it is too old, finish it, and
3648- ** force the commit blocks to disk
3649- */
3650- if (!immediate && atomic_read(&(SB_JOURNAL(p_s_sb)->j_wcount)) <= 0 &&
3651- SB_JOURNAL(p_s_sb)->j_trans_start_time > 0 &&
3652- SB_JOURNAL(p_s_sb)->j_len > 0 &&
3653- (now - SB_JOURNAL(p_s_sb)->j_trans_start_time) > SB_JOURNAL_MAX_TRANS_AGE(p_s_sb)) {
3654- journal_join(&th, p_s_sb, 1) ;
3655- reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ;
3656- journal_mark_dirty(&th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ;
3657- do_journal_end(&th, p_s_sb,1, COMMIT_NOW) ;
3658- } else if (immediate) { /* belongs above, but I wanted this to be very explicit as a special case. If they say to
3659- flush, we must be sure old transactions hit the disk too. */
3660- journal_join(&th, p_s_sb, 1) ;
3661- reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ;
3662- journal_mark_dirty(&th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ;
3663- do_journal_end(&th, p_s_sb,1, COMMIT_NOW | WAIT) ;
3664- }
3665- reiserfs_journal_kupdate(p_s_sb) ;
3666- return 0 ;
3667+ /* check the current transaction. If there are no writers, and it is
3668+ * too old, finish it, and force the commit blocks to disk
3669+ */
3670+ if (atomic_read(&(SB_JOURNAL(p_s_sb)->j_wcount)) <= 0 &&
3671+ SB_JOURNAL(p_s_sb)->j_trans_start_time > 0 &&
3672+ SB_JOURNAL(p_s_sb)->j_len > 0 &&
3673+ (now - SB_JOURNAL(p_s_sb)->j_trans_start_time) >
3674+ SB_JOURNAL_MAX_TRANS_AGE(p_s_sb))
3675+ {
3676+ journal_join(&th, p_s_sb, 1) ;
3677+ reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ;
3678+ journal_mark_dirty(&th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ;
3679+
3680+ /* we're only being called from kreiserfsd, it makes no sense to do
3681+ ** an async commit so that kreiserfsd can do it later
3682+ */
3683+ do_journal_end(&th, p_s_sb,1, COMMIT_NOW | WAIT) ;
3684+ }
3685+ reiserfs_journal_kupdate(p_s_sb) ;
3686+ return p_s_sb->s_dirt;
3687 }
3688
3689 /*
3690@@ -2709,6 +3221,7 @@
3691 int flush = flags & FLUSH_ALL ;
3692 int commit_now = flags & COMMIT_NOW ;
3693 int wait_on_commit = flags & WAIT ;
3694+ struct reiserfs_journal_list *jl;
3695
3696 if (th->t_trans_id != SB_JOURNAL(p_s_sb)->j_trans_id) {
3697 reiserfs_panic(th->t_super, "journal-1577: handle trans id %ld != current trans id %ld\n",
3698@@ -2727,8 +3240,9 @@
3699 if (SB_JOURNAL(p_s_sb)->j_len == 0) {
3700 int wcount = atomic_read(&(SB_JOURNAL(p_s_sb)->j_wcount)) ;
3701 unlock_journal(p_s_sb) ;
3702- if (atomic_read(&(SB_JOURNAL(p_s_sb)->j_jlock)) > 0 && wcount <= 0) {
3703- atomic_dec(&(SB_JOURNAL(p_s_sb)->j_jlock)) ;
3704+ BUG();
3705+ if (atomic_read(&(SB_JOURNAL(p_s_sb)->j_jlock)) > 0 && wcount <= 0) {
3706+ atomic_set(&(SB_JOURNAL(p_s_sb)->j_jlock), 0) ;
3707 wake_up(&(SB_JOURNAL(p_s_sb)->j_join_wait)) ;
3708 }
3709 return 0 ;
3710@@ -2741,24 +3255,37 @@
3711 */
3712 if (atomic_read(&(SB_JOURNAL(p_s_sb)->j_wcount)) > 0) {
3713 if (flush || commit_now) {
3714- int orig_jindex = SB_JOURNAL_LIST_INDEX(p_s_sb) ;
3715+ unsigned trans_id ;
3716+
3717+ jl = SB_JOURNAL(p_s_sb)->j_current_jl;
3718+ trans_id = jl->j_trans_id;
3719+
3720 atomic_set(&(SB_JOURNAL(p_s_sb)->j_jlock), 1) ;
3721 if (flush) {
3722 SB_JOURNAL(p_s_sb)->j_next_full_flush = 1 ;
3723 }
3724 unlock_journal(p_s_sb) ;
3725+
3726 /* sleep while the current transaction is still j_jlocked */
3727- while(atomic_read(&(SB_JOURNAL(p_s_sb)->j_jlock)) &&
3728- SB_JOURNAL(p_s_sb)->j_trans_id == th->t_trans_id) {
3729- sleep_on(&(SB_JOURNAL(p_s_sb)->j_join_wait)) ;
3730- }
3731- if (commit_now) {
3732- if (wait_on_commit) {
3733- flush_commit_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + orig_jindex, 1) ;
3734- } else {
3735- commit_flush_async(p_s_sb, orig_jindex) ;
3736+ while(SB_JOURNAL(p_s_sb)->j_trans_id == trans_id) {
3737+ if (atomic_read(&SB_JOURNAL(p_s_sb)->j_jlock)) {
3738+ queue_log_writer(p_s_sb);
3739+ } else {
3740+ lock_journal(p_s_sb);
3741+ if (SB_JOURNAL(p_s_sb)->j_trans_id == trans_id) {
3742+ atomic_set(&(SB_JOURNAL(p_s_sb)->j_jlock), 1) ;
3743+ }
3744+ unlock_journal(p_s_sb);
3745 }
3746 }
3747+ if (SB_JOURNAL(p_s_sb)->j_trans_id == trans_id) {
3748+ BUG();
3749+ }
3750+ if (commit_now && journal_list_still_alive(p_s_sb, trans_id) &&
3751+ wait_on_commit)
3752+ {
3753+ flush_commit_list(p_s_sb, jl, 1) ;
3754+ }
3755 return 0 ;
3756 }
3757 unlock_journal(p_s_sb) ;
3758@@ -2776,8 +3303,8 @@
3759 if (!(SB_JOURNAL(p_s_sb)->j_must_wait > 0) && !(atomic_read(&(SB_JOURNAL(p_s_sb)->j_jlock))) && !flush && !commit_now &&
3760 (SB_JOURNAL(p_s_sb)->j_len < SB_JOURNAL_MAX_BATCH(p_s_sb)) &&
3761 SB_JOURNAL(p_s_sb)->j_len_alloc < SB_JOURNAL_MAX_BATCH(p_s_sb) && SB_JOURNAL(p_s_sb)->j_cnode_free > (SB_JOURNAL_TRANS_MAX(p_s_sb) * 3)) {
3762- SB_JOURNAL(p_s_sb)->j_bcount++ ;
3763 unlock_journal(p_s_sb) ;
3764+
3765 return 0 ;
3766 }
3767
3768@@ -2807,16 +3334,13 @@
3769 struct reiserfs_list_bitmap *jb = NULL ;
3770 int cleaned = 0 ;
3771
3772- if (reiserfs_dont_log(th->t_super)) {
3773- bh = sb_get_hash_table(p_s_sb, blocknr) ;
3774- if (bh && buffer_dirty (bh)) {
3775- reiserfs_warning (p_s_sb, "journal_mark_freed(dont_log): dirty buffer on hash list: %lx %ld\n", bh->b_state, blocknr);
3776- BUG ();
3777- }
3778- brelse (bh);
3779- return 0 ;
3780+ cn = get_journal_hash_dev(SB_JOURNAL(p_s_sb)->j_hash_table, p_s_sb->s_dev,
3781+ blocknr, p_s_sb->s_blocksize) ;
3782+ if (cn && cn->bh) {
3783+ bh = cn->bh ;
3784+ get_bh(bh) ;
3785 }
3786- bh = sb_get_hash_table(p_s_sb, blocknr) ;
3787+
3788 /* if it is journal new, we just remove it from this transaction */
3789 if (bh && buffer_journal_new(bh)) {
3790 mark_buffer_notjournal_new(bh) ;
3791@@ -2824,14 +3348,22 @@
3792 cleaned = remove_from_transaction(p_s_sb, blocknr, cleaned) ;
3793 } else {
3794 /* set the bit for this block in the journal bitmap for this transaction */
3795- jb = SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_list_bitmap ;
3796+ jb = SB_JOURNAL(p_s_sb)->j_current_jl->j_list_bitmap;
3797 if (!jb) {
3798 reiserfs_panic(p_s_sb, "journal-1702: journal_mark_freed, journal_list_bitmap is NULL\n") ;
3799 }
3800- set_bit_in_list_bitmap(p_s_sb, blocknr, jb) ;
3801
3802- /* Note, the entire while loop is not allowed to schedule. */
3803+ /* we set bits in the list bitmap so the block won't be reallocated
3804+ * as a data block which might get flushed before this transaction
3805+ * commits. When data logging is on, the block might get reallocated
3806+ * as a data block, but we know the data block won't get flushed before
3807+ * we commit
3808+ */
3809+ if (!reiserfs_data_log(p_s_sb)) {
3810+ set_bit_in_list_bitmap(p_s_sb, blocknr, jb) ;
3811+ }
3812
3813+ /* Note, the entire while loop is not allowed to schedule. */
3814 if (bh) {
3815 clear_prepared_bits(bh) ;
3816 }
3817@@ -2876,57 +3408,77 @@
3818
3819 void reiserfs_update_inode_transaction(struct inode *inode) {
3820
3821- inode->u.reiserfs_i.i_trans_index = SB_JOURNAL_LIST_INDEX(inode->i_sb);
3822-
3823+ inode->u.reiserfs_i.i_jl = SB_JOURNAL(inode->i_sb)->j_current_jl;
3824 inode->u.reiserfs_i.i_trans_id = SB_JOURNAL(inode->i_sb)->j_trans_id ;
3825 }
3826
3827 void reiserfs_update_tail_transaction(struct inode *inode) {
3828
3829- inode->u.reiserfs_i.i_tail_trans_index = SB_JOURNAL_LIST_INDEX(inode->i_sb);
3830-
3831+ inode->u.reiserfs_i.i_tail_jl = SB_JOURNAL(inode->i_sb)->j_current_jl;
3832 inode->u.reiserfs_i.i_tail_trans_id = SB_JOURNAL(inode->i_sb)->j_trans_id ;
3833 }
3834
3835-static void __commit_trans_index(struct inode *inode, unsigned long id,
3836- unsigned long index)
3837+static void __commit_trans_jl(struct inode *inode, unsigned long id,
3838+ struct reiserfs_journal_list *jl)
3839 {
3840- struct reiserfs_journal_list *jl ;
3841 struct reiserfs_transaction_handle th ;
3842 struct super_block *sb = inode->i_sb ;
3843
3844- jl = SB_JOURNAL_LIST(sb) + index;
3845-
3846 /* is it from the current transaction, or from an unknown transaction? */
3847 if (id == SB_JOURNAL(sb)->j_trans_id) {
3848- journal_join(&th, sb, 1) ;
3849+ jl = SB_JOURNAL(sb)->j_current_jl;
3850+ /* try to let other writers come in and grow this transaction */
3851+ let_transaction_grow(sb, id);
3852+ if (SB_JOURNAL(sb)->j_trans_id != id) {
3853+ goto flush_commit_only;
3854+ }
3855+
3856+ journal_begin(&th, sb, 1) ;
3857+
3858+ /* someone might have ended this transaction while we joined */
3859+ if (SB_JOURNAL(sb)->j_trans_id != id) {
3860+ reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb), 1) ;
3861+ journal_mark_dirty(&th, sb, SB_BUFFER_WITH_SB(sb)) ;
3862+ journal_end(&th, sb, 1) ;
3863+ goto flush_commit_only;
3864+ }
3865+
3866 journal_end_sync(&th, sb, 1) ;
3867- } else if (jl->j_trans_id == id) {
3868- flush_commit_list(sb, jl, 1) ;
3869+
3870+ } else {
3871+ /* this gets tricky, we have to make sure the journal list in
3872+ * the inode still exists. We know the list is still around
3873+ * if we've got a larger transaction id than the oldest list
3874+ */
3875+flush_commit_only:
3876+ if (journal_list_still_alive(inode->i_sb, id)) {
3877+ flush_commit_list(sb, jl, 1) ;
3878+ }
3879 }
3880- /* if the transaction id does not match, this list is long since flushed
3881- ** and we don't have to do anything here
3882- */
3883+ /* otherwise the list is gone, and long since committed */
3884 }
3885 void reiserfs_commit_for_tail(struct inode *inode) {
3886 unsigned long id = inode->u.reiserfs_i.i_tail_trans_id;
3887- unsigned long index = inode->u.reiserfs_i.i_tail_trans_index;
3888+ struct reiserfs_journal_list *jl = inode->u.reiserfs_i.i_tail_jl;
3889
3890 /* for tails, if this info is unset there's nothing to commit */
3891- if (id && index)
3892- __commit_trans_index(inode, id, index);
3893+ if (id && jl)
3894+ __commit_trans_jl(inode, id, jl);
3895 }
3896 void reiserfs_commit_for_inode(struct inode *inode) {
3897 unsigned long id = inode->u.reiserfs_i.i_trans_id;
3898- unsigned long index = inode->u.reiserfs_i.i_trans_index;
3899+ struct reiserfs_journal_list *jl = inode->u.reiserfs_i.i_jl;
3900
3901- /* for the whole inode, assume unset id or index means it was
3902+ /* for the whole inode, assume unset id means it was
3903 * changed in the current transaction. More conservative
3904 */
3905- if (!id || !index)
3906+ if (!id || !jl) {
3907 reiserfs_update_inode_transaction(inode) ;
3908+ id = inode->u.reiserfs_i.i_trans_id;
3909+ /* jl will be updated in __commit_trans_jl */
3910+ }
3911
3912- __commit_trans_index(inode, id, index);
3913+ __commit_trans_jl(inode, id, jl);
3914 }
3915
3916 void reiserfs_restore_prepared_buffer(struct super_block *p_s_sb,
3917@@ -2954,8 +3506,6 @@
3918 int retry_count = 0 ;
3919
3920 PROC_INFO_INC( p_s_sb, journal.prepare );
3921- if (reiserfs_dont_log (p_s_sb))
3922- return;
3923
3924 while(!test_bit(BH_JPrepared, &bh->b_state) ||
3925 (wait && buffer_locked(bh))) {
3926@@ -2964,16 +3514,37 @@
3927 return ;
3928 }
3929 set_bit(BH_JPrepared, &bh->b_state) ;
3930+
3931 if (wait) {
3932 RFALSE( buffer_locked(bh) && cur_tb != NULL,
3933 "waiting while do_balance was running\n") ;
3934+ /* only data buffers are allowed to come in dirty, and they
3935+ * never get run through restore_prepared_buffer. So we can
3936+ * just mark them clean here and know it is safe
3937+ */
3938+ mark_buffer_clean(bh);
3939 wait_on_buffer(bh) ;
3940- }
3941+ }
3942 PROC_INFO_INC( p_s_sb, journal.prepare_retry );
3943 retry_count++ ;
3944 }
3945 }
3946-
3947+static void flush_old_journal_lists(struct super_block *s) {
3948+ struct reiserfs_journal_list *jl;
3949+ struct list_head *entry;
3950+ time_t now = CURRENT_TIME;
3951+
3952+ while(!list_empty(&SB_JOURNAL(s)->j_journal_list)) {
3953+ entry = SB_JOURNAL(s)->j_journal_list.next;
3954+ jl = JOURNAL_LIST_ENTRY(entry);
3955+ /* this check should always be run, to send old lists to disk */
3956+ if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4))) {
3957+ flush_used_journal_lists(s, jl);
3958+ } else {
3959+ break;
3960+ }
3961+ }
3962+}
3963 /*
3964 ** long and ugly. If flush, will not return until all commit
3965 ** blocks and all real buffers in the trans are on disk.
3966@@ -2990,18 +3561,30 @@
3967 struct buffer_head *c_bh ; /* commit bh */
3968 struct buffer_head *d_bh ; /* desc bh */
3969 int cur_write_start = 0 ; /* start index of current log write */
3970- int cur_blocks_left = 0 ; /* number of journal blocks left to write */
3971 int old_start ;
3972 int i ;
3973- int jindex ;
3974- int orig_jindex ;
3975 int flush = flags & FLUSH_ALL ;
3976 int commit_now = flags & COMMIT_NOW ;
3977 int wait_on_commit = flags & WAIT ;
3978 struct reiserfs_super_block *rs ;
3979+ struct reiserfs_journal_list *jl, *temp_jl;
3980+ struct list_head *entry, *safe;
3981+ int wakeup_kreiserfsd = 0;
3982+ unsigned long jindex;
3983+ unsigned long commit_trans_id;
3984+
3985+ if (th->t_refcount > 1)
3986+ BUG() ;
3987
3988+ reiserfs_check_lock_depth("journal end");
3989+ current->journal_info = th->t_handle_save;
3990 if (reiserfs_dont_log(th->t_super)) {
3991- return 0 ;
3992+ goto out ;
3993+ }
3994+
3995+ if (SB_JOURNAL(p_s_sb)->j_len == 0) {
3996+ reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ;
3997+ journal_mark_dirty(th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ;
3998 }
3999
4000 lock_journal(p_s_sb) ;
4001@@ -3018,7 +3601,9 @@
4002 ** it tells us if we should continue with the journal_end, or just return
4003 */
4004 if (!check_journal_end(th, p_s_sb, nblocks, flags)) {
4005- return 0 ;
4006+ p_s_sb->s_dirt = 1;
4007+ wake_queued_writers(p_s_sb);
4008+ goto out ;
4009 }
4010
4011 /* check_journal_end might set these, check again */
4012@@ -3037,8 +3622,11 @@
4013 }
4014
4015 #ifdef REISERFS_PREALLOCATE
4016+ /* quota ops might need to nest, setup the journal_info pointer for them */
4017+ current->journal_info = th ;
4018 reiserfs_discard_all_prealloc(th); /* it should not involve new blocks into
4019 * the transaction */
4020+ current->journal_info = th->t_handle_save ;
4021 #endif
4022
4023 rs = SB_DISK_SUPER_BLOCK(p_s_sb) ;
4024@@ -3059,25 +3647,23 @@
4025 mark_buffer_uptodate(c_bh, 1) ;
4026
4027 /* init this journal list */
4028- atomic_set(&(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_older_commits_done), 0) ;
4029- SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_trans_id = SB_JOURNAL(p_s_sb)->j_trans_id ;
4030- SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_timestamp = SB_JOURNAL(p_s_sb)->j_trans_start_time ;
4031- SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_commit_bh = c_bh ;
4032- SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_start = SB_JOURNAL(p_s_sb)->j_start ;
4033- SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_len = SB_JOURNAL(p_s_sb)->j_len ;
4034- atomic_set(&(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_nonzerolen), SB_JOURNAL(p_s_sb)->j_len) ;
4035- atomic_set(&(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_commit_left), SB_JOURNAL(p_s_sb)->j_len + 2);
4036- SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_realblock = NULL ;
4037- atomic_set(&(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_commit_flushing), 1) ;
4038- atomic_set(&(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_flushing), 1) ;
4039-
4040- /* which is faster, locking/unlocking at the start and end of the for
4041- ** or locking once per iteration around the insert_journal_hash?
4042- ** eitherway, we are write locking insert_journal_hash. The ENTIRE FOR
4043- ** LOOP MUST not cause schedule to occur.
4044- */
4045+ jl = SB_JOURNAL(p_s_sb)->j_current_jl;
4046+
4047+ /* save the transaction id in case we need to commit it later */
4048+ commit_trans_id = jl->j_trans_id;
4049
4050- /* for each real block, add it to the journal list hash,
4051+ atomic_set(&jl->j_older_commits_done, 0) ;
4052+ jl->j_trans_id = SB_JOURNAL(p_s_sb)->j_trans_id ;
4053+ jl->j_timestamp = SB_JOURNAL(p_s_sb)->j_trans_start_time ;
4054+ jl->j_commit_bh = c_bh ;
4055+ jl->j_start = SB_JOURNAL(p_s_sb)->j_start ;
4056+ jl->j_len = SB_JOURNAL(p_s_sb)->j_len ;
4057+ atomic_set(&jl->j_nonzerolen, SB_JOURNAL(p_s_sb)->j_len) ;
4058+ atomic_set(&jl->j_commit_left, SB_JOURNAL(p_s_sb)->j_len + 2);
4059+ jl->j_realblock = NULL ;
4060+
4061+ /* The ENTIRE FOR LOOP MUST not cause schedule to occur.
4062+ ** for each real block, add it to the journal list hash,
4063 ** copy into real block index array in the commit or desc block
4064 */
4065 for (i = 0, cn = SB_JOURNAL(p_s_sb)->j_first ; cn ; cn = cn->next, i++) {
4066@@ -3087,7 +3673,7 @@
4067 reiserfs_panic(p_s_sb, "journal-1676, get_cnode returned NULL\n") ;
4068 }
4069 if (i == 0) {
4070- SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_realblock = jl_cn ;
4071+ jl->j_realblock = jl_cn ;
4072 }
4073 jl_cn->prev = last_cn ;
4074 jl_cn->next = NULL ;
4075@@ -3105,7 +3691,7 @@
4076 jl_cn->state = 0 ;
4077 jl_cn->dev = cn->bh->b_dev ;
4078 jl_cn->bh = cn->bh ;
4079- jl_cn->jlist = SB_JOURNAL_LIST(p_s_sb) + SB_JOURNAL_LIST_INDEX(p_s_sb) ;
4080+ jl_cn->jlist = jl;
4081 insert_journal_hash(SB_JOURNAL(p_s_sb)->j_list_hash_table, jl_cn) ;
4082 if (i < JOURNAL_TRANS_HALF) {
4083 desc->j_realblock[i] = cpu_to_le32(cn->bh->b_blocknr) ;
4084@@ -3130,29 +3716,34 @@
4085 reiserfs_warning(p_s_sb, "journal-2020: do_journal_end: BAD desc->j_len is ZERO\n") ;
4086 atomic_set(&(SB_JOURNAL(p_s_sb)->j_jlock), 0) ;
4087 wake_up(&(SB_JOURNAL(p_s_sb)->j_join_wait)) ;
4088- return 0 ;
4089+ goto out ;
4090 }
4091
4092 /* first data block is j_start + 1, so add one to cur_write_start wherever you use it */
4093 cur_write_start = SB_JOURNAL(p_s_sb)->j_start ;
4094- cur_blocks_left = SB_JOURNAL(p_s_sb)->j_len ;
4095 cn = SB_JOURNAL(p_s_sb)->j_first ;
4096 jindex = 1 ; /* start at one so we don't get the desc again */
4097- while(cur_blocks_left > 0) {
4098+ while(cn) {
4099+ clear_bit(BH_JNew, &(cn->bh->b_state)) ;
4100 /* copy all the real blocks into log area. dirty log blocks */
4101 if (test_bit(BH_JDirty, &cn->bh->b_state)) {
4102 struct buffer_head *tmp_bh ;
4103 tmp_bh = journal_getblk(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
4104 ((cur_write_start + jindex) % SB_ONDISK_JOURNAL_SIZE(p_s_sb))) ;
4105 mark_buffer_uptodate(tmp_bh, 1) ;
4106- memcpy(tmp_bh->b_data, cn->bh->b_data, cn->bh->b_size) ;
4107+ memcpy(tmp_bh->b_data, bh_kmap(cn->bh), cn->bh->b_size) ;
4108+ bh_kunmap(cn->bh);
4109 jindex++ ;
4110+ set_bit(BH_JDirty_wait, &(cn->bh->b_state)) ;
4111+ clear_bit(BH_JDirty, &(cn->bh->b_state)) ;
4112 } else {
4113 /* JDirty cleared sometime during transaction. don't log this one */
4114 reiserfs_warning(p_s_sb, "journal-2048: do_journal_end: BAD, buffer in journal hash, but not JDirty!\n") ;
4115+ brelse(cn->bh) ;
4116 }
4117- cn = cn->next ;
4118- cur_blocks_left-- ;
4119+ next = cn->next ;
4120+ free_cnode(p_s_sb, cn) ;
4121+ cn = next ;
4122 }
4123
4124 /* we are done with both the c_bh and d_bh, but
4125@@ -3160,47 +3751,19 @@
4126 ** so we dirty/relse c_bh in flush_commit_list, with commit_left <= 1.
4127 */
4128
4129- /* now loop through and mark all buffers from this transaction as JDirty_wait
4130- ** clear the JDirty bit, clear BH_JNew too.
4131- ** if they weren't JDirty, they weren't logged, just relse them and move on
4132- */
4133- cn = SB_JOURNAL(p_s_sb)->j_first ;
4134- while(cn) {
4135- clear_bit(BH_JNew, &(cn->bh->b_state)) ;
4136- if (test_bit(BH_JDirty, &(cn->bh->b_state))) {
4137- set_bit(BH_JDirty_wait, &(cn->bh->b_state)) ;
4138- clear_bit(BH_JDirty, &(cn->bh->b_state)) ;
4139- } else {
4140- brelse(cn->bh) ;
4141- }
4142- next = cn->next ;
4143- free_cnode(p_s_sb, cn) ;
4144- cn = next ;
4145- }
4146-
4147- /* unlock the journal list for committing and flushing */
4148- atomic_set(&(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_commit_flushing), 0) ;
4149- atomic_set(&(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_flushing), 0) ;
4150-
4151- orig_jindex = SB_JOURNAL_LIST_INDEX(p_s_sb) ;
4152- jindex = (SB_JOURNAL_LIST_INDEX(p_s_sb) + 1) % JOURNAL_LIST_COUNT ;
4153- SB_JOURNAL_LIST_INDEX(p_s_sb) = jindex ;
4154+ SB_JOURNAL(p_s_sb)->j_current_jl = alloc_journal_list(p_s_sb);
4155
4156- /* write any buffers that must hit disk before this commit is done */
4157- fsync_buffers_list(&(SB_JOURNAL(p_s_sb)->j_dirty_buffers)) ;
4158+ /* we lock the commit before putting it onto the main list because
4159+ * we want to make sure nobody tries to run flush_commit_list until
4160+ * the new transaction is fully setup, and we've already flushed the
4161+ * ordered bh list
4162+ */
4163+ down(&jl->j_commit_lock);
4164
4165- /* honor the flush and async wishes from the caller */
4166- if (flush) {
4167-
4168- flush_commit_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + orig_jindex, 1) ;
4169- flush_journal_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + orig_jindex , 1) ;
4170- } else if (commit_now) {
4171- if (wait_on_commit) {
4172- flush_commit_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + orig_jindex, 1) ;
4173- } else {
4174- commit_flush_async(p_s_sb, orig_jindex) ;
4175- }
4176- }
4177+ /* now it is safe to insert this transaction on the main list */
4178+ list_add_tail(&jl->j_list, &SB_JOURNAL(p_s_sb)->j_journal_list);
4179+ list_add_tail(&jl->j_working_list, &SB_JOURNAL(p_s_sb)->j_working_list);
4180+ SB_JOURNAL(p_s_sb)->j_num_work_lists++;
4181
4182 /* reset journal values for the next transaction */
4183 old_start = SB_JOURNAL(p_s_sb)->j_start ;
4184@@ -3212,57 +3775,119 @@
4185 SB_JOURNAL(p_s_sb)->j_len = 0 ;
4186 SB_JOURNAL(p_s_sb)->j_trans_start_time = 0 ;
4187 SB_JOURNAL(p_s_sb)->j_trans_id++ ;
4188+ SB_JOURNAL(p_s_sb)->j_current_jl->j_trans_id = SB_JOURNAL(p_s_sb)->j_trans_id;
4189 SB_JOURNAL(p_s_sb)->j_must_wait = 0 ;
4190 SB_JOURNAL(p_s_sb)->j_len_alloc = 0 ;
4191 SB_JOURNAL(p_s_sb)->j_next_full_flush = 0 ;
4192 SB_JOURNAL(p_s_sb)->j_next_async_flush = 0 ;
4193 init_journal_hash(p_s_sb) ;
4194
4195+ /* tail conversion targets have to hit the disk before we end the
4196+ * transaction. Otherwise a later transaction might repack the tail
4197+ * before this transaction commits, leaving the data block unflushed and
4198+ * clean, if we crash before the later transaction commits, the data block
4199+ * is lost.
4200+ */
4201+ while(!list_empty(&jl->j_tail_bh_list)) {
4202+ unlock_kernel();
4203+ fsync_buffers_list(&jl->j_tail_bh_list);
4204+ lock_kernel();
4205+ }
4206+ up(&jl->j_commit_lock);
4207+
4208+ /* honor the flush wishes from the caller, simple commits can
4209+ ** be done outside the journal lock, they are done below
4210+ */
4211+ if (flush) {
4212+ flush_commit_list(p_s_sb, jl, 1) ;
4213+ flush_journal_list(p_s_sb, jl, 1) ;
4214+ }
4215+
4216+
4217 /* if the next transaction has any chance of wrapping, flush
4218 ** transactions that might get overwritten. If any journal lists are very
4219 ** old flush them as well.
4220 */
4221- for (i = 0 ; i < JOURNAL_LIST_COUNT ; i++) {
4222- jindex = i ;
4223- if (SB_JOURNAL_LIST(p_s_sb)[jindex].j_len > 0 && SB_JOURNAL(p_s_sb)->j_start <= SB_JOURNAL_LIST(p_s_sb)[jindex].j_start) {
4224- if ((SB_JOURNAL(p_s_sb)->j_start + SB_JOURNAL_TRANS_MAX(p_s_sb) + 1) >= SB_JOURNAL_LIST(p_s_sb)[jindex].j_start) {
4225- flush_journal_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + jindex, 1) ;
4226- }
4227- } else if (SB_JOURNAL_LIST(p_s_sb)[jindex].j_len > 0 &&
4228- (SB_JOURNAL(p_s_sb)->j_start + SB_JOURNAL_TRANS_MAX(p_s_sb) + 1) > SB_ONDISK_JOURNAL_SIZE(p_s_sb)) {
4229- if (((SB_JOURNAL(p_s_sb)->j_start + SB_JOURNAL_TRANS_MAX(p_s_sb) + 1) % SB_ONDISK_JOURNAL_SIZE(p_s_sb)) >=
4230- SB_JOURNAL_LIST(p_s_sb)[jindex].j_start) {
4231- flush_journal_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + jindex, 1 ) ;
4232+first_jl:
4233+ list_for_each_safe(entry, safe, &SB_JOURNAL(p_s_sb)->j_journal_list) {
4234+ temp_jl = JOURNAL_LIST_ENTRY(entry);
4235+ if (SB_JOURNAL(p_s_sb)->j_start <= temp_jl->j_start) {
4236+ if ((SB_JOURNAL(p_s_sb)->j_start + SB_JOURNAL_TRANS_MAX(p_s_sb) + 1) >=
4237+ temp_jl->j_start)
4238+ {
4239+ flush_used_journal_lists(p_s_sb, temp_jl);
4240+ wakeup_kreiserfsd = 1;
4241+ goto first_jl;
4242+ } else if ((SB_JOURNAL(p_s_sb)->j_start +
4243+ SB_JOURNAL_TRANS_MAX(p_s_sb) + 1) <
4244+ SB_ONDISK_JOURNAL_SIZE(p_s_sb))
4245+ {
4246+ /* if we don't cross into the next transaction and we don't
4247+ * wrap, there is no way we can overlap any later transactions
4248+ * break now
4249+ */
4250+ break;
4251+ }
4252+ } else if ((SB_JOURNAL(p_s_sb)->j_start +
4253+ SB_JOURNAL_TRANS_MAX(p_s_sb) + 1) >
4254+ SB_ONDISK_JOURNAL_SIZE(p_s_sb))
4255+ {
4256+ if (((SB_JOURNAL(p_s_sb)->j_start + SB_JOURNAL_TRANS_MAX(p_s_sb) + 1) %
4257+ SB_ONDISK_JOURNAL_SIZE(p_s_sb)) >= temp_jl->j_start)
4258+ {
4259+ flush_used_journal_lists(p_s_sb, temp_jl);
4260+ wakeup_kreiserfsd = 1;
4261+ goto first_jl;
4262+ } else {
4263+ /* we don't overlap anything from out start to the end of the
4264+ * log, and our wrapped portion doesn't overlap anything at
4265+ * the start of the log. We can break
4266+ */
4267+ break;
4268 }
4269- }
4270- /* this check should always be run, to send old lists to disk */
4271- if (SB_JOURNAL_LIST(p_s_sb)[jindex].j_len > 0 &&
4272- SB_JOURNAL_LIST(p_s_sb)[jindex].j_timestamp <
4273- (CURRENT_TIME - (SB_JOURNAL_MAX_TRANS_AGE(p_s_sb) * 4))) {
4274- flush_journal_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + jindex, 1 ) ;
4275 }
4276 }
4277+ flush_old_journal_lists(p_s_sb);
4278
4279- /* if the next journal_list is still in use, flush it */
4280- if (SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_len != 0) {
4281- flush_journal_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + SB_JOURNAL_LIST_INDEX(p_s_sb), 1) ;
4282- }
4283+ /* soft limit */
4284+ if (SB_JOURNAL(p_s_sb)->j_num_work_lists > 128 || wakeup_kreiserfsd) {
4285+ wake_up(&reiserfs_commit_thread_wait) ;
4286+ }
4287
4288- /* we don't want anyone flushing the new transaction's list */
4289- atomic_set(&(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_commit_flushing), 1) ;
4290- atomic_set(&(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_flushing), 1) ;
4291- SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_list_bitmap = get_list_bitmap(p_s_sb, SB_JOURNAL_LIST(p_s_sb) +
4292- SB_JOURNAL_LIST_INDEX(p_s_sb)) ;
4293+ SB_JOURNAL(p_s_sb)->j_current_jl->j_list_bitmap = get_list_bitmap(p_s_sb, SB_JOURNAL(p_s_sb)->j_current_jl) ;
4294
4295- if (!(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_list_bitmap)) {
4296+ if (!(SB_JOURNAL(p_s_sb)->j_current_jl->j_list_bitmap)) {
4297 reiserfs_panic(p_s_sb, "journal-1996: do_journal_end, could not get a list bitmap\n") ;
4298 }
4299- unlock_journal(p_s_sb) ;
4300+
4301 atomic_set(&(SB_JOURNAL(p_s_sb)->j_jlock), 0) ;
4302+ unlock_journal(p_s_sb) ;
4303 /* wake up any body waiting to join. */
4304+ clear_bit(WRITERS_QUEUED, &SB_JOURNAL(p_s_sb)->j_state);
4305 wake_up(&(SB_JOURNAL(p_s_sb)->j_join_wait)) ;
4306+
4307+ if (!flush && commit_now && wait_on_commit) {
4308+ if (current->need_resched) {
4309+ schedule() ;
4310+ }
4311+ if (journal_list_still_alive(p_s_sb, commit_trans_id))
4312+ flush_commit_list(p_s_sb, jl, 1) ;
4313+ }
4314+ /* if we did an async commit, get kreiserfsd going on it */
4315+ if (!commit_now && !wait_on_commit) {
4316+ wake_up(&reiserfs_commit_thread_wait) ;
4317+ schedule();
4318+ }
4319+out:
4320+ reiserfs_check_lock_depth("journal end2");
4321+ if (reiserfs_persistent_handle(th)) {
4322+ memset(th, 0, sizeof(*th));
4323+ reiserfs_kfree(th, sizeof(struct reiserfs_transaction_handle), p_s_sb) ;
4324+ } else
4325+ th->t_flags = 0 ;
4326 return 0 ;
4327 }
4328
4329-
4330-
4331+int __init reiserfs_journal_cache_init(void) {
4332+ return 0;
4333+}
052932c9
AM
4334diff -urN linux-2.4.22.org/fs/reiserfs/Makefile linux-2.4.22/fs/reiserfs/Makefile
4335--- linux-2.4.22.org/fs/reiserfs/Makefile 2003-11-21 15:08:29.000000000 +0100
4336+++ linux-2.4.22/fs/reiserfs/Makefile 2003-11-21 15:14:23.000000000 +0100
e57e653a
JR
4337@@ -7,6 +7,7 @@
4338 #
4339 # Note 2! The CFLAGS definitions are now in the main makefile...
4340
4341+export-objs := super.o
4342 O_TARGET := reiserfs.o
4343 obj-y := bitmap.o do_balan.o namei.o inode.o file.o dir.o fix_node.o super.o prints.o objectid.o \
4344 lbalance.o ibalance.o stree.o hashes.o buffer2.o tail_conversion.o journal.o resize.o item_ops.o ioctl.o procfs.o
052932c9
AM
4345diff -urN linux-2.4.22.org/fs/reiserfs/namei.c linux-2.4.22/fs/reiserfs/namei.c
4346--- linux-2.4.22.org/fs/reiserfs/namei.c 2003-11-21 15:08:29.000000000 +0100
4347+++ linux-2.4.22/fs/reiserfs/namei.c 2003-11-21 15:14:23.000000000 +0100
e57e653a
JR
4348@@ -7,6 +7,7 @@
4349 #include <linux/bitops.h>
4350 #include <linux/reiserfs_fs.h>
4351 #include <linux/smp_lock.h>
4352+#include <linux/quotaops.h>
4353
4354 #define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { i->i_nlink++; if (i->i_nlink >= REISERFS_LINK_MAX) i->i_nlink=1; }
4355 #define DEC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) i->i_nlink--;
4356@@ -469,7 +470,7 @@
4357 }
4358
4359 /* perform the insertion of the entry that we have prepared */
4360- retval = reiserfs_paste_into_item (th, &path, &entry_key, buffer, paste_size);
4361+ retval = reiserfs_paste_into_item (th, &path, &entry_key, dir, buffer, paste_size);
4362 if (buffer != small_buf)
4363 reiserfs_kfree (buffer, buflen, dir->i_sb);
4364 if (retval) {
4365@@ -478,7 +479,6 @@
4366 }
4367
4368 dir->i_size += paste_size;
4369- dir->i_blocks = ((dir->i_size + 511) >> 9);
4370 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
4371 if (!S_ISDIR (inode->i_mode) && visible)
4372 // reiserfs_mkdir or reiserfs_rename will do that by itself
4373@@ -494,7 +494,9 @@
4374 ** inserted into the tree yet.
4375 */
4376 static int drop_new_inode(struct inode *inode) {
4377+ DQUOT_DROP(inode);
4378 make_bad_inode(inode) ;
4379+ inode->i_flags |= S_NOQUOTA;
4380 iput(inode) ;
4381 return 0 ;
4382 }
4383@@ -518,6 +520,11 @@
4384 } else
4385 inode->i_gid = current->fsgid;
4386
4387+ DQUOT_INIT(inode);
4388+ if (DQUOT_ALLOC_INODE(inode)) {
4389+ drop_new_inode(inode);
4390+ return -EDQUOT;
4391+ }
4392 return 0 ;
4393 }
4394
4395@@ -536,7 +543,6 @@
4396 return retval ;
4397
4398 journal_begin(&th, dir->i_sb, jbegin_count) ;
4399- th.t_caller = "create" ;
4400 retval = reiserfs_new_inode (&th, dir, mode, 0, 0/*i_size*/, dentry, inode);
4401 if (retval) {
4402 goto out_failed ;
4403@@ -750,7 +756,6 @@
4404
4405 DEC_DIR_INODE_NLINK(dir)
4406 dir->i_size -= (DEH_SIZE + de.de_entrylen);
4407- dir->i_blocks = ((dir->i_size + 511) >> 9);
4408 reiserfs_update_sd (&th, dir);
4409
4410 /* prevent empty directory from getting lost */
4411@@ -835,7 +840,6 @@
4412 reiserfs_update_sd (&th, inode);
4413
4414 dir->i_size -= (de.de_entrylen + DEH_SIZE);
4415- dir->i_blocks = ((dir->i_size + 511) >> 9);
4416 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
4417 reiserfs_update_sd (&th, dir);
4418
4419@@ -1245,7 +1249,6 @@
4420 reiserfs_warning ((&th)->t_super, "vs-7060: reiserfs_rename: couldn't not cut old name. Fsck later?\n");
4421
4422 old_dir->i_size -= DEH_SIZE + old_de.de_entrylen;
4423- old_dir->i_blocks = ((old_dir->i_size + 511) >> 9);
4424
4425 reiserfs_update_sd (&th, old_dir);
4426 reiserfs_update_sd (&th, new_dir);
052932c9
AM
4427diff -urN linux-2.4.22.org/fs/reiserfs/objectid.c linux-2.4.22/fs/reiserfs/objectid.c
4428--- linux-2.4.22.org/fs/reiserfs/objectid.c 2003-11-21 15:08:29.000000000 +0100
4429+++ linux-2.4.22/fs/reiserfs/objectid.c 2003-11-21 15:14:23.000000000 +0100
e57e653a
JR
4430@@ -87,7 +87,6 @@
4431 }
4432
4433 journal_mark_dirty(th, s, SB_BUFFER_WITH_SB (s));
4434- s->s_dirt = 1;
4435 return unused_objectid;
4436 }
4437
4438@@ -106,8 +105,6 @@
4439
4440 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ;
4441 journal_mark_dirty(th, s, SB_BUFFER_WITH_SB (s));
4442- s->s_dirt = 1;
4443-
4444
4445 /* start at the beginning of the objectid map (i = 0) and go to
4446 the end of it (i = disk_sb->s_oid_cursize). Linear search is
052932c9
AM
4447diff -urN linux-2.4.22.org/fs/reiserfs/procfs.c linux-2.4.22/fs/reiserfs/procfs.c
4448--- linux-2.4.22.org/fs/reiserfs/procfs.c 2003-11-21 15:08:29.000000000 +0100
4449+++ linux-2.4.22/fs/reiserfs/procfs.c 2003-11-21 15:14:24.000000000 +0100
e57e653a
JR
4450@@ -497,7 +497,6 @@
4451 "j_first_unflushed_offset: \t%lu\n"
4452 "j_last_flush_trans_id: \t%lu\n"
4453 "j_trans_start_time: \t%li\n"
4454- "j_journal_list_index: \t%i\n"
4455 "j_list_bitmap_index: \t%i\n"
4456 "j_must_wait: \t%i\n"
4457 "j_next_full_flush: \t%i\n"
4458@@ -543,7 +542,6 @@
4459 JF( j_first_unflushed_offset ),
4460 JF( j_last_flush_trans_id ),
4461 JF( j_trans_start_time ),
4462- JF( j_journal_list_index ),
4463 JF( j_list_bitmap_index ),
4464 JF( j_must_wait ),
4465 JF( j_next_full_flush ),
052932c9
AM
4466diff -urN linux-2.4.22.org/fs/reiserfs/stree.c linux-2.4.22/fs/reiserfs/stree.c
4467--- linux-2.4.22.org/fs/reiserfs/stree.c 2003-11-21 15:08:29.000000000 +0100
4468+++ linux-2.4.22/fs/reiserfs/stree.c 2003-11-21 15:14:25.000000000 +0100
e57e653a
JR
4469@@ -60,6 +60,7 @@
4470 #include <linux/pagemap.h>
4471 #include <linux/reiserfs_fs.h>
4472 #include <linux/smp_lock.h>
4473+#include <linux/quotaops.h>
4474
4475 /* Does the buffer contain a disk block which is in the tree. */
4476 inline int B_IS_IN_TREE (const struct buffer_head * p_s_bh)
4477@@ -71,9 +72,6 @@
4478 return ( B_LEVEL (p_s_bh) != FREE_LEVEL );
4479 }
4480
4481-
4482-
4483-
4484 inline void copy_short_key (void * to, const void * from)
4485 {
4486 memcpy (to, from, SHORT_KEY_SIZE);
4487@@ -652,9 +650,9 @@
4488 stop at leaf level - set to
4489 DISK_LEAF_NODE_LEVEL */
4490 ) {
4491- int n_block_number = SB_ROOT_BLOCK (p_s_sb),
4492- expected_level = SB_TREE_HEIGHT (p_s_sb),
4493- n_block_size = p_s_sb->s_blocksize;
4494+ int n_block_number,
4495+ expected_level,
4496+ n_block_size = p_s_sb->s_blocksize;
4497 struct buffer_head * p_s_bh;
4498 struct path_element * p_s_last_element;
4499 int n_node_level, n_retval;
4500@@ -678,8 +676,11 @@
4501 /* With each iteration of this loop we search through the items in the
4502 current node, and calculate the next current node(next path element)
4503 for the next iteration of this loop.. */
4504+ n_block_number = SB_ROOT_BLOCK (p_s_sb);
4505+ expected_level = SB_TREE_HEIGHT (p_s_sb);
4506 while ( 1 ) {
4507
4508+ reiserfs_check_lock_depth("search_by_key");
4509 #ifdef CONFIG_REISERFS_CHECK
4510 if ( !(++n_repeat_counter % 50000) )
4511 reiserfs_warning (p_s_sb, "PAP-5100: search_by_key: %s:"
4512@@ -1123,8 +1124,7 @@
4513 tmp = get_block_num(p_n_unfm_pointer,0);
4514 put_block_num(p_n_unfm_pointer, 0, 0);
4515 journal_mark_dirty (th, p_s_sb, p_s_bh);
4516- inode->i_blocks -= p_s_sb->s_blocksize / 512;
4517- reiserfs_free_block(th, tmp);
4518+ reiserfs_free_block(th, inode, tmp, 1);
4519 /* In case of big fragmentation it is possible that each block
4520 freed will cause dirtying of one more bitmap and then we will
4521 quickly overflow our transaction space. This is a
4522@@ -1132,9 +1132,7 @@
4523 if (journal_transaction_should_end(th, th->t_blocks_allocated)) {
4524 int orig_len_alloc = th->t_blocks_allocated ;
4525 pathrelse(p_s_path) ;
4526-
4527- journal_end(th, p_s_sb, orig_len_alloc) ;
4528- journal_begin(th, p_s_sb, orig_len_alloc) ;
4529+ reiserfs_restart_transaction(th, orig_len_alloc);
4530 reiserfs_update_inode_transaction(inode) ;
4531 need_research = 1;
4532 break;
4533@@ -1168,8 +1166,7 @@
4534 }
4535 }
4536
4537-
4538-/* Calculate bytes number which will be deleted or cutted in the balance. */
4539+/* Calculate number of bytes which will be deleted or cut during balance */
4540 int calc_deleted_bytes_number(
4541 struct tree_balance * p_s_tb,
4542 char c_mode
4543@@ -1180,14 +1177,14 @@
4544 if ( is_statdata_le_ih (p_le_ih) )
4545 return 0;
4546
4547+ n_del_size = ( c_mode == M_DELETE ) ? ih_item_len(p_le_ih) : -p_s_tb->insert_size[0];
4548 if ( is_direntry_le_ih (p_le_ih) ) {
4549 // return EMPTY_DIR_SIZE; /* We delete emty directoris only. */
4550 // we can't use EMPTY_DIR_SIZE, as old format dirs have a different
4551 // empty size. ick. FIXME, is this right?
4552 //
4553- return ih_item_len(p_le_ih);
4554+ return n_del_size ;
4555 }
4556- n_del_size = ( c_mode == M_DELETE ) ? ih_item_len(p_le_ih) : -p_s_tb->insert_size[0];
4557
4558 if ( is_indirect_le_ih (p_le_ih) )
4559 n_del_size = (n_del_size/UNFM_P_SIZE)*
4560@@ -1221,17 +1218,46 @@
4561 item [--i] = 0;
4562 }
4563
4564+#ifdef REISERQUOTA_DEBUG
4565+char key2type(struct key *ih)
4566+{
4567+ if (is_direntry_le_key(2, ih))
4568+ return 'd';
4569+ if (is_direct_le_key(2, ih))
4570+ return 'D';
4571+ if (is_indirect_le_key(2, ih))
4572+ return 'i';
4573+ if (is_statdata_le_key(2, ih))
4574+ return 's';
4575+ return 'u';
4576+}
4577+
4578+char head2type(struct item_head *ih)
4579+{
4580+ if (is_direntry_le_ih(ih))
4581+ return 'd';
4582+ if (is_direct_le_ih(ih))
4583+ return 'D';
4584+ if (is_indirect_le_ih(ih))
4585+ return 'i';
4586+ if (is_statdata_le_ih(ih))
4587+ return 's';
4588+ return 'u';
4589+}
4590+#endif
4591
4592 /* Delete object item. */
4593 int reiserfs_delete_item (struct reiserfs_transaction_handle *th,
4594 struct path * p_s_path, /* Path to the deleted item. */
4595 const struct cpu_key * p_s_item_key, /* Key to search for the deleted item. */
4596- struct inode * p_s_inode,/* inode is here just to update i_blocks */
4597+ struct inode * p_s_inode,/* inode is here just to update i_blocks and quotas */
4598 struct buffer_head * p_s_un_bh) /* NULL or unformatted node pointer. */
4599 {
4600 struct super_block * p_s_sb = p_s_inode->i_sb;
4601 struct tree_balance s_del_balance;
4602 struct item_head s_ih;
4603+ struct item_head *q_ih;
4604+ int quota_cut_bytes;
4605 int n_ret_value,
4606 n_del_size,
4607 n_removed;
4608@@ -1281,6 +1307,22 @@
4609
4610 // reiserfs_delete_item returns item length when success
4611 n_ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE);
4612+ q_ih = get_ih(p_s_path) ;
4613+ quota_cut_bytes = ih_item_len(q_ih) ;
4614+
4615+ /* hack so the quota code doesn't have to guess if the file
4616+ ** has a tail. On tail insert, we allocate quota for 1 unformatted node.
4617+ ** We test the offset because the tail might have been
4618+ ** split into multiple items, and we only want to decrement for
4619+ ** the unfm node once
4620+ */
4621+ if (!S_ISLNK (p_s_inode->i_mode) && is_direct_le_ih(q_ih)) {
4622+ if ((le_ih_k_offset(q_ih) & (p_s_sb->s_blocksize - 1)) == 1) {
4623+ quota_cut_bytes = p_s_sb->s_blocksize + UNFM_P_SIZE;
4624+ } else {
4625+ quota_cut_bytes = 0 ;
4626+ }
4627+ }
4628
4629 if ( p_s_un_bh ) {
4630 int off;
4631@@ -1312,10 +1354,14 @@
4632 memcpy(data + off,
4633 B_I_PITEM(PATH_PLAST_BUFFER(p_s_path), &s_ih), n_ret_value);
4634 }
4635-
4636 /* Perform balancing after all resources have been collected at once. */
4637 do_balance(&s_del_balance, NULL, NULL, M_DELETE);
4638
4639+#ifdef REISERQUOTA_DEBUG
4640+ printk(KERN_DEBUG "reiserquota delete_item(): freeing %u, id=%u type=%c\n", quota_cut_bytes, p_s_inode->i_uid, head2type(&s_ih));
4641+#endif
4642+ DQUOT_FREE_SPACE_NODIRTY(p_s_inode, quota_cut_bytes);
4643+
4644 /* Return deleted body length */
4645 return n_ret_value;
4646 }
4647@@ -1340,14 +1386,16 @@
4648
4649 /* this deletes item which never gets split */
4650 void reiserfs_delete_solid_item (struct reiserfs_transaction_handle *th,
4651+ struct inode *inode,
4652 struct key * key)
4653 {
4654 struct tree_balance tb;
4655 INITIALIZE_PATH (path);
4656- int item_len;
4657+ int item_len = 0;
4658 int tb_init = 0 ;
4659 struct cpu_key cpu_key;
4660 int retval;
4661+ int quota_cut_bytes = 0;
4662
4663 le_key2cpu_key (&cpu_key, key);
4664
4665@@ -1371,6 +1419,7 @@
4666 item_len = ih_item_len( PATH_PITEM_HEAD(&path) );
4667 init_tb_struct (th, &tb, th->t_super, &path, - (IH_SIZE + item_len));
4668 }
4669+ quota_cut_bytes = ih_item_len(PATH_PITEM_HEAD(&path)) ;
4670
4671 retval = fix_nodes (M_DELETE, &tb, NULL, 0);
4672 if (retval == REPEAT_SEARCH) {
4673@@ -1380,6 +1429,12 @@
4674
4675 if (retval == CARRY_ON) {
4676 do_balance (&tb, 0, 0, M_DELETE);
4677+ if (inode) { /* Should we count quota for item? (we don't count quotas for save-links) */
4678+#ifdef REISERQUOTA_DEBUG
4679+ printk(KERN_DEBUG "reiserquota delete_solid_item(): freeing %u id=%u type=%c\n", quota_cut_bytes, inode->i_uid, key2type(key));
4680+#endif
4681+ DQUOT_FREE_SPACE_NODIRTY(inode, quota_cut_bytes);
4682+ }
4683 break;
4684 }
4685
4686@@ -1412,7 +1467,7 @@
4687 }
4688 /* USE_INODE_GENERATION_COUNTER */
4689 #endif
4690- reiserfs_delete_solid_item (th, INODE_PKEY (inode));
4691+ reiserfs_delete_solid_item (th, inode, INODE_PKEY (inode));
4692 }
4693
4694
4695@@ -1484,6 +1539,38 @@
4696 mark_inode_dirty (inode);
4697 }
4698
4699+static void
4700+unmap_buffers(struct page *page, loff_t pos) {
4701+ struct buffer_head *bh ;
4702+ struct buffer_head *head ;
4703+ struct buffer_head *next ;
4704+ unsigned long tail_index ;
4705+ unsigned long cur_index ;
4706+
4707+ if (!page || !page->buffers)
4708+ return;
4709+
4710+ tail_index = pos & (PAGE_CACHE_SIZE - 1) ;
4711+ cur_index = 0 ;
4712+ head = page->buffers ;
4713+ bh = head ;
4714+ do {
4715+ next = bh->b_this_page ;
4716+
4717+ /* we want to unmap the buffers that contain the tail, and
4718+ ** all the buffers after it (since the tail must be at the
4719+ ** end of the file). We don't want to unmap file data
4720+ ** before the tail, since it might be dirty and waiting to
4721+ ** reach disk
4722+ */
4723+ cur_index += bh->b_size ;
4724+ if (cur_index > tail_index) {
4725+ reiserfs_unmap_buffer(bh) ;
4726+ }
4727+ bh = next ;
4728+ } while (bh != head) ;
4729+}
4730+
4731
4732 /* (Truncate or cut entry) or delete object item. Returns < 0 on failure */
4733 int reiserfs_cut_from_item (struct reiserfs_transaction_handle *th,
4734@@ -1499,12 +1586,15 @@
4735 structure by using the init_tb_struct and fix_nodes functions.
4736 After that we can make tree balancing. */
4737 struct tree_balance s_cut_balance;
4738+ struct item_head *p_le_ih;
4739+ loff_t tail_pos = 0;
4740 int n_cut_size = 0, /* Amount to be cut. */
4741 n_ret_value = CARRY_ON,
4742 n_removed = 0, /* Number of the removed unformatted nodes. */
4743 n_is_inode_locked = 0;
4744 char c_mode; /* Mode of the balance. */
4745 int retval2 = -1;
4746+ int quota_cut_bytes;
4747
4748
4749 init_tb_struct(th, &s_cut_balance, p_s_inode->i_sb, p_s_path, n_cut_size);
4750@@ -1531,6 +1621,9 @@
4751 /* tail has been left in the unformatted node */
4752 return n_ret_value;
4753
4754+ if (n_is_inode_locked) {
4755+printk("inode locked twice\n");
4756+ }
4757 n_is_inode_locked = 1;
4758
4759 /* removing of last unformatted node will change value we
4760@@ -1545,6 +1638,7 @@
4761 set_cpu_key_k_type (p_s_item_key, TYPE_INDIRECT);
4762 p_s_item_key->key_length = 4;
4763 n_new_file_size -= (n_new_file_size & (p_s_sb->s_blocksize - 1));
4764+ tail_pos = n_new_file_size;
4765 set_cpu_key_k_offset (p_s_item_key, n_new_file_size + 1);
4766 if ( search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path) == POSITION_NOT_FOUND ){
4767 print_block (PATH_PLAST_BUFFER (p_s_path), 3, PATH_LAST_POSITION (p_s_path) - 1, PATH_LAST_POSITION (p_s_path) + 1);
4768@@ -1592,23 +1686,27 @@
4769 RFALSE( c_mode == M_PASTE || c_mode == M_INSERT, "illegal mode");
4770
4771 /* Calculate number of bytes that need to be cut from the item. */
4772+ quota_cut_bytes = ( c_mode == M_DELETE ) ? ih_item_len(get_ih(p_s_path)) : -s_cut_balance.insert_size[0];
4773 if (retval2 == -1)
4774 n_ret_value = calc_deleted_bytes_number(&s_cut_balance, c_mode);
4775 else
4776 n_ret_value = retval2;
4777-
4778- if ( c_mode == M_DELETE ) {
4779- struct item_head * p_le_ih = PATH_PITEM_HEAD (s_cut_balance.tb_path);
4780-
4781- if ( is_direct_le_ih (p_le_ih) && (le_ih_k_offset (p_le_ih) & (p_s_sb->s_blocksize - 1)) == 1 ) {
4782- /* we delete first part of tail which was stored in direct
4783- item(s) */
4784+
4785+
4786+ /* For direct items, we only change the quota when deleting the last
4787+ ** item.
4788+ */
4789+ p_le_ih = PATH_PITEM_HEAD (s_cut_balance.tb_path);
4790+ if (!S_ISLNK (p_s_inode->i_mode) && is_direct_le_ih(p_le_ih)) {
4791+ if (c_mode == M_DELETE &&
4792+ (le_ih_k_offset (p_le_ih) & (p_s_sb->s_blocksize - 1)) == 1 ) {
4793 // FIXME: this is to keep 3.5 happy
4794 p_s_inode->u.reiserfs_i.i_first_direct_byte = U32_MAX;
4795- p_s_inode->i_blocks -= p_s_sb->s_blocksize / 512;
4796+ quota_cut_bytes = p_s_sb->s_blocksize + UNFM_P_SIZE ;
4797+ } else {
4798+ quota_cut_bytes = 0 ;
4799 }
4800 }
4801-
4802 #ifdef CONFIG_REISERFS_CHECK
4803 if (n_is_inode_locked) {
4804 struct item_head * le_ih = PATH_PITEM_HEAD (s_cut_balance.tb_path);
4805@@ -1642,7 +1740,12 @@
4806 ** deal with it here.
4807 */
4808 p_s_inode->u.reiserfs_i.i_flags &= ~i_pack_on_close_mask;
4809+ unmap_buffers(page, tail_pos);
4810 }
4811+#ifdef REISERQUOTA_DEBUG
4812+ printk(KERN_DEBUG "reiserquota cut_from_item(): freeing %u id=%u type=%c\n", quota_cut_bytes, p_s_inode->i_uid, '?');
4813+#endif
4814+ DQUOT_FREE_SPACE_NODIRTY(p_s_inode, quota_cut_bytes);
4815 return n_ret_value;
4816 }
4817
4818@@ -1654,8 +1757,8 @@
4819
4820 set_le_key_k_offset (KEY_FORMAT_3_5, INODE_PKEY (inode), DOT_OFFSET);
4821 set_le_key_k_type (KEY_FORMAT_3_5, INODE_PKEY (inode), TYPE_DIRENTRY);
4822- reiserfs_delete_solid_item (th, INODE_PKEY (inode));
4823-
4824+ reiserfs_delete_solid_item (th, inode, INODE_PKEY (inode));
4825+ reiserfs_update_sd(th, inode) ;
4826 set_le_key_k_offset (KEY_FORMAT_3_5, INODE_PKEY (inode), SD_OFFSET);
4827 set_le_key_k_type (KEY_FORMAT_3_5, INODE_PKEY (inode), TYPE_STAT_DATA);
4828 }
4829@@ -1681,6 +1784,7 @@
4830 n_new_file_size;/* New file size. */
4831 int n_deleted; /* Number of deleted or truncated bytes. */
4832 int retval;
4833+ int jbegin_count = th->t_blocks_allocated;
4834
4835 if ( ! (S_ISREG(p_s_inode->i_mode) || S_ISDIR(p_s_inode->i_mode) || S_ISLNK(p_s_inode->i_mode)) )
4836 return;
4837@@ -1760,17 +1864,14 @@
4838 ** sure the file is consistent before ending the current trans
4839 ** and starting a new one
4840 */
4841- if (journal_transaction_should_end(th, th->t_blocks_allocated)) {
4842- int orig_len_alloc = th->t_blocks_allocated ;
4843+ if (journal_transaction_should_end(th, jbegin_count)) {
4844 decrement_counters_in_path(&s_search_path) ;
4845
4846 if (update_timestamps) {
4847 p_s_inode->i_mtime = p_s_inode->i_ctime = CURRENT_TIME;
4848 }
4849 reiserfs_update_sd(th, p_s_inode) ;
4850-
4851- journal_end(th, p_s_inode->i_sb, orig_len_alloc) ;
4852- journal_begin(th, p_s_inode->i_sb, orig_len_alloc) ;
4853+ reiserfs_restart_transaction(th, jbegin_count) ;
4854 reiserfs_update_inode_transaction(p_s_inode) ;
4855 }
4856 } while ( n_file_size > ROUND_UP (n_new_file_size) &&
4857@@ -1822,18 +1923,37 @@
4858 int reiserfs_paste_into_item (struct reiserfs_transaction_handle *th,
4859 struct path * p_s_search_path, /* Path to the pasted item. */
4860 const struct cpu_key * p_s_key, /* Key to search for the needed item.*/
4861+ struct inode * inode, /* Inode item belongs to */
4862 const char * p_c_body, /* Pointer to the bytes to paste. */
4863 int n_pasted_size) /* Size of pasted bytes. */
4864 {
4865 struct tree_balance s_paste_balance;
4866 int retval;
4867+ int fs_gen;
4868+
4869+ fs_gen = get_generation(inode->i_sb) ;
4870+
4871+#ifdef REISERQUOTA_DEBUG
4872+ printk(KERN_DEBUG "reiserquota paste_into_item(): allocating %u id=%u type=%c\n", n_pasted_size, inode->i_uid, key2type(&(p_s_key->on_disk_key)));
4873+#endif
4874
4875+ if (DQUOT_ALLOC_SPACE_NODIRTY(inode, n_pasted_size)) {
4876+ pathrelse(p_s_search_path);
4877+ return -EDQUOT;
4878+ }
4879 init_tb_struct(th, &s_paste_balance, th->t_super, p_s_search_path, n_pasted_size);
4880 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
4881 s_paste_balance.key = p_s_key->on_disk_key;
4882 #endif
4883-
4884- while ( (retval = fix_nodes(M_PASTE, &s_paste_balance, NULL, p_c_body)) == REPEAT_SEARCH ) {
4885+
4886+ /* DQUOT_* can schedule, must check before the fix_nodes */
4887+ if (fs_changed(fs_gen, inode->i_sb)) {
4888+ goto search_again;
4889+ }
4890+
4891+ while ((retval = fix_nodes(M_PASTE, &s_paste_balance, NULL, p_c_body)) ==
4892+REPEAT_SEARCH ) {
4893+search_again:
4894 /* file system changed while we were in the fix_nodes */
4895 PROC_INFO_INC( th -> t_super, paste_into_item_restarted );
4896 retval = search_for_position_by_key (th->t_super, p_s_key, p_s_search_path);
4897@@ -1862,6 +1982,10 @@
4898 error_out:
4899 /* this also releases the path */
4900 unfix_nodes(&s_paste_balance);
4901+#ifdef REISERQUOTA_DEBUG
4902+ printk(KERN_DEBUG "reiserquota paste_into_item(): freeing %u id=%u type=%c\n", n_pasted_size, inode->i_uid, key2type(&(p_s_key->on_disk_key)));
4903+#endif
4904+ DQUOT_FREE_SPACE_NODIRTY(inode, n_pasted_size);
4905 return retval ;
4906 }
4907
4908@@ -1871,23 +1995,45 @@
4909 struct path * p_s_path, /* Path to the inserteded item. */
4910 const struct cpu_key * key,
4911 struct item_head * p_s_ih, /* Pointer to the item header to insert.*/
4912+ struct inode * inode,
4913 const char * p_c_body) /* Pointer to the bytes to insert. */
4914 {
4915 struct tree_balance s_ins_balance;
4916 int retval;
4917+ int fs_gen = 0 ;
4918+ int quota_bytes = 0 ;
4919
4920+ if (inode) { /* Do we count quotas for item? */
4921+ fs_gen = get_generation(inode->i_sb);
4922+ quota_bytes = ih_item_len(p_s_ih);
4923+
4924+ /* hack so the quota code doesn't have to guess if the file has
4925+ ** a tail, links are always tails, so there's no guessing needed
4926+ */
4927+ if (!S_ISLNK (inode->i_mode) && is_direct_le_ih(p_s_ih)) {
4928+ quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE ;
4929+ }
4930+#ifdef REISERQUOTA_DEBUG
4931+ printk(KERN_DEBUG "reiserquota insert_item(): allocating %u id=%u type=%c\n", quota_bytes, inode->i_uid, head2type(p_s_ih));
4932+#endif
4933+ /* We can't dirty inode here. It would be immediately written but
4934+ * appropriate stat item isn't inserted yet... */
4935+ if (DQUOT_ALLOC_SPACE_NODIRTY(inode, quota_bytes)) {
4936+ pathrelse(p_s_path);
4937+ return -EDQUOT;
4938+ }
4939+ }
4940 init_tb_struct(th, &s_ins_balance, th->t_super, p_s_path, IH_SIZE + ih_item_len(p_s_ih));
4941 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
4942 s_ins_balance.key = key->on_disk_key;
4943 #endif
4944-
4945- /*
4946- if (p_c_body == 0)
4947- n_zeros_num = ih_item_len(p_s_ih);
4948- */
4949- // le_key2cpu_key (&key, &(p_s_ih->ih_key));
4950+ /* DQUOT_* can schedule, must check to be sure calling fix_nodes is safe */
4951+ if (inode && fs_changed(fs_gen, inode->i_sb)) {
4952+ goto search_again;
4953+ }
4954
4955 while ( (retval = fix_nodes(M_INSERT, &s_ins_balance, p_s_ih, p_c_body)) == REPEAT_SEARCH) {
4956+search_again:
4957 /* file system changed while we were in the fix_nodes */
4958 PROC_INFO_INC( th -> t_super, insert_item_restarted );
4959 retval = search_item (th->t_super, key, p_s_path);
4960@@ -1902,7 +2048,7 @@
4961 goto error_out;
4962 }
4963 }
4964-
4965+
4966 /* make balancing after all resources will be collected at a time */
4967 if ( retval == CARRY_ON ) {
4968 do_balance (&s_ins_balance, p_s_ih, p_c_body, M_INSERT);
4969@@ -1913,6 +2059,11 @@
4970 error_out:
4971 /* also releases the path */
4972 unfix_nodes(&s_ins_balance);
4973+#ifdef REISERQUOTA_DEBUG
4974+ printk(KERN_DEBUG "reiserquota insert_item(): freeing %u id=%u type=%c\n", quota_bytes, inode->i_uid, head2type(p_s_ih));
4975+#endif
4976+ if (inode)
4977+ DQUOT_FREE_SPACE_NODIRTY(inode, quota_bytes) ;
4978 return retval;
4979 }
4980
052932c9
AM
4981diff -urN linux-2.4.22.org/fs/reiserfs/super.c linux-2.4.22/fs/reiserfs/super.c
4982--- linux-2.4.22.org/fs/reiserfs/super.c 2003-11-21 15:08:29.000000000 +0100
4983+++ linux-2.4.22/fs/reiserfs/super.c 2003-11-21 15:14:25.000000000 +0100
e57e653a
JR
4984@@ -13,6 +13,9 @@
4985 #include <linux/locks.h>
4986 #include <linux/init.h>
4987
4988+EXPORT_SYMBOL(journal_begin) ;
4989+EXPORT_SYMBOL(journal_end) ;
4990+
4991 #define REISERFS_OLD_BLOCKSIZE 4096
4992 #define REISERFS_SUPER_MAGIC_STRING_OFFSET_NJ 20
4993
4994@@ -50,22 +53,28 @@
4995 static int reiserfs_remount (struct super_block * s, int * flags, char * data);
4996 static int reiserfs_statfs (struct super_block * s, struct statfs * buf);
4997
4998-static void reiserfs_write_super (struct super_block * s)
4999+static int reiserfs_sync_fs (struct super_block * s)
5000 {
5001+ struct reiserfs_transaction_handle th;
5002+ lock_kernel() ;
5003+ if (!(s->s_flags & MS_RDONLY)) {
5004+ journal_begin(&th, s, 1);
5005+ journal_end_sync(&th, s, 1);
5006+ s->s_dirt = 0;
5007+ }
5008+ unlock_kernel() ;
5009+ return 0;
5010+}
5011
5012- int dirty = 0 ;
5013- lock_kernel() ;
5014- if (!(s->s_flags & MS_RDONLY)) {
5015- dirty = flush_old_commits(s, 1) ;
5016- }
5017- s->s_dirt = dirty;
5018- unlock_kernel() ;
5019+static void reiserfs_write_super (struct super_block * s)
5020+{
5021+ reiserfs_sync_fs(s);
5022 }
5023
5024+
5025 static void reiserfs_write_super_lockfs (struct super_block * s)
5026 {
5027
5028- int dirty = 0 ;
5029 struct reiserfs_transaction_handle th ;
5030 lock_kernel() ;
5031 if (!(s->s_flags & MS_RDONLY)) {
5032@@ -75,7 +84,7 @@
5033 reiserfs_block_writes(&th) ;
052932c9 5034 journal_end(&th, s, 1) ;
e57e653a
JR
5035 }
5036- s->s_dirt = dirty;
5037+ s->s_dirt = 0;
5038 unlock_kernel() ;
5039 }
5040
5041@@ -100,7 +109,7 @@
5042 /* we are going to do one balancing */
5043 journal_begin (&th, s, JOURNAL_PER_BALANCE_CNT);
5044
5045- reiserfs_delete_solid_item (&th, key);
5046+ reiserfs_delete_solid_item (&th, NULL, key);
5047 if (oid_free)
5048 /* removals are protected by direct items */
5049 reiserfs_release_objectid (&th, le32_to_cpu (key->k_objectid));
5050@@ -286,8 +295,8 @@
5051 /* body of "save" link */
5052 link = INODE_PKEY (inode)->k_dir_id;
5053
5054- /* put "save" link inot tree */
5055- retval = reiserfs_insert_item (th, &path, &key, &ih, (char *)&link);
5056+ /* put "save" link inot tree, don't charge quota to anyone */
5057+ retval = reiserfs_insert_item (th, &path, &key, &ih, NULL, (char *)&link);
5058 if (retval) {
5059 if (retval != -ENOSPC)
5060 reiserfs_warning (inode->i_sb, "vs-2120: add_save_link: insert_item returned %d\n",
5061@@ -329,7 +338,8 @@
5062 ( inode -> u.reiserfs_i.i_flags & i_link_saved_truncate_mask ) ) ||
5063 ( !truncate &&
5064 ( inode -> u.reiserfs_i.i_flags & i_link_saved_unlink_mask ) ) )
5065- reiserfs_delete_solid_item (&th, &key);
5066+ /* don't take quota bytes from anywhere */
5067+ reiserfs_delete_solid_item (&th, NULL, &key);
5068 if (!truncate) {
5069 reiserfs_release_objectid (&th, inode->i_ino);
5070 inode -> u.reiserfs_i.i_flags &= ~i_link_saved_unlink_mask;
5071@@ -357,6 +367,7 @@
5072 ** to do a journal_end
5073 */
5074 journal_release(&th, s) ;
5075+ s->s_dirt = 0;
5076
5077 for (i = 0; i < SB_BMAP_NR (s); i ++)
5078 brelse (SB_AP_BITMAP (s)[i].bh);
5079@@ -418,6 +429,7 @@
5080 put_super: reiserfs_put_super,
5081 write_super: reiserfs_write_super,
5082 write_super_lockfs: reiserfs_write_super_lockfs,
5083+ sync_fs: reiserfs_sync_fs,
5084 unlockfs: reiserfs_unlockfs,
5085 statfs: reiserfs_statfs,
5086 remount_fs: reiserfs_remount,
5087@@ -463,6 +475,14 @@
5088 {NULL, 0, 0}
5089 };
5090
5091+/* possible values for -o data= */
5092+static const arg_desc_t logging_mode[] = {
5093+ {"ordered", 1<<REISERFS_DATA_ORDERED, (1<<REISERFS_DATA_LOG|1<<REISERFS_DATA_WRITEBACK)},
5094+ {"journal", 1<<REISERFS_DATA_LOG, (1<<REISERFS_DATA_ORDERED|1<<REISERFS_DATA_WRITEBACK)},
5095+ {"writeback", 1<<REISERFS_DATA_WRITEBACK, (1<<REISERFS_DATA_ORDERED|1<<REISERFS_DATA_LOG)},
5096+ {NULL, 0}
5097+};
5098+
5099
5100 /* possible values for "-o block-allocator=" and bits which are to be set in
5101 s_mount_opt of reiserfs specific part of in-core super block */
5102@@ -612,10 +632,14 @@
5103
5104 {"block-allocator", 'a', balloc, 0, 0},
5105 {"hash", 'h', hash, 1<<FORCE_HASH_DETECT, 0},
5106+ {"data", 'd', logging_mode, 0, 0},
5107
5108 {"resize", 'r', 0, 0, 0},
5109 {"attrs", 0, 0, 1<<REISERFS_ATTRS, 0},
5110 {"noattrs", 0, 0, 0, 1<<REISERFS_ATTRS},
5111+ {"usrquota", 0, 0, 0, 0},
5112+ {"grpquota", 0, 0, 0, 0},
5113+
5114 {NULL, 0, 0, 0, 0}
5115 };
5116
5117@@ -672,6 +696,47 @@
5118 }
5119 }
5120
5121+static void switch_data_mode(struct super_block *s, unsigned long mode) {
5122+ struct reiserfs_transaction_handle th;
5123+ int sync_all = !reiserfs_data_log(s);
5124+
5125+ journal_begin(&th, s, 1);
5126+ SB_JOURNAL(s)->j_must_wait = 1;
5127+ journal_end_sync(&th, s, 1);
5128+
5129+ s->u.reiserfs_sb.s_mount_opt &= ~((1 << REISERFS_DATA_LOG) |
5130+ (1 << REISERFS_DATA_ORDERED) |
5131+ (1 << REISERFS_DATA_WRITEBACK));
5132+ s->u.reiserfs_sb.s_mount_opt |= (1 << mode);
5133+
5134+ journal_begin(&th, s, 1);
5135+ SB_JOURNAL(s)->j_must_wait = 1;
5136+ journal_end_sync(&th, s, 1);
5137+
5138+ if (sync_all)
5139+ fsync_no_super(s->s_dev);
5140+}
5141+
5142+static void handle_data_mode(struct super_block *s, unsigned long mount_options)
5143+{
5144+ if (mount_options & (1 << REISERFS_DATA_LOG)) {
5145+ if (!reiserfs_data_log(s)) {
5146+ switch_data_mode(s, REISERFS_DATA_LOG);
5147+ printk("reiserfs: switching to journaled data mode\n");
5148+ }
5149+ } else if (mount_options & (1 << REISERFS_DATA_ORDERED)) {
5150+ if (!reiserfs_data_ordered(s)) {
5151+ switch_data_mode(s, REISERFS_DATA_ORDERED);
5152+ printk("reiserfs: switching to ordered data mode\n");
5153+ }
5154+ } else if (mount_options & (1 << REISERFS_DATA_WRITEBACK)) {
5155+ if (!reiserfs_data_writeback(s)) {
5156+ switch_data_mode(s, REISERFS_DATA_WRITEBACK);
5157+ printk("reiserfs: switching to writeback data mode\n");
5158+ }
5159+ }
5160+}
5161+
5162 static int reiserfs_remount (struct super_block * s, int * mount_flags, char * data)
5163 {
5164 struct reiserfs_super_block * rs;
5165@@ -723,9 +788,10 @@
5166 s->s_dirt = 0;
5167 } else {
5168 /* remount read-write */
5169- if (!(s->s_flags & MS_RDONLY))
5170+ if (!(s->s_flags & MS_RDONLY)) {
5171+ handle_data_mode(s, mount_options);
5172 return 0; /* We are read-write already */
5173-
5174+ }
5175 s->s_flags &= ~MS_RDONLY ; /* now it is safe to call journal_begin */
5176 journal_begin(&th, s, 10) ;
5177
5178@@ -743,9 +809,10 @@
5179 SB_JOURNAL(s)->j_must_wait = 1 ;
5180 journal_end(&th, s, 10) ;
5181
5182- if (!( *mount_flags & MS_RDONLY ) )
5183+ if (!( *mount_flags & MS_RDONLY ) ) {
5184 finish_unfinished( s );
5185-
5186+ handle_data_mode(s, mount_options);
5187+ }
5188 return 0;
5189 }
5190
5191@@ -1172,9 +1239,6 @@
5192
5193 if (reiserfs_parse_options (s, (char *) data, &(s->u.reiserfs_sb.s_mount_opt), &blocks) == 0) {
5194 return NULL;
5195-
5196-
5197-
5198 }
5199
5200 if (blocks) {
5201@@ -1222,9 +1286,22 @@
5202 printk("reiserfs:warning: - it is slow mode for debugging.\n");
5203 #endif
5204
5205- /* fixme */
5206- jdev_name = NULL;
5207+ /* make data=ordered the default */
5208+ if (!reiserfs_data_log(s) && !reiserfs_data_ordered(s) &&
5209+ !reiserfs_data_writeback(s))
5210+ {
5211+ s->u.reiserfs_sb.s_mount_opt |= (1 << REISERFS_DATA_ORDERED);
5212+ }
5213+
5214+ if (reiserfs_data_log(s)) {
5215+ printk("reiserfs: using journaled data mode\n");
5216+ } else if (reiserfs_data_ordered(s)) {
5217+ printk("reiserfs: using ordered data mode\n");
5218+ } else {
5219+ printk("reiserfs: using writeback data mode\n");
5220+ }
5221
5222+ jdev_name = NULL;
5223 if( journal_init(s, jdev_name, old_format) ) {
5224 reiserfs_warning(s, "sh-2022: reiserfs_read_super: unable to initialize journal space\n") ;
5225 goto error ;
5226@@ -1364,16 +1441,19 @@
5227
5228 static int __init init_reiserfs_fs (void)
5229 {
5230+ int ret;
5231 reiserfs_proc_info_global_init();
5232 reiserfs_proc_register_global( "version",
5233 reiserfs_global_version_in_proc );
5234+ ret = reiserfs_journal_cache_init();
5235+ if (ret)
5236+ return ret;
5237 return register_filesystem(&reiserfs_fs_type);
5238 }
5239
5240 MODULE_DESCRIPTION("ReiserFS journaled filesystem");
5241 MODULE_AUTHOR("Hans Reiser <reiser@namesys.com>");
5242 MODULE_LICENSE("GPL");
5243-EXPORT_NO_SYMBOLS;
5244
5245 static void __exit exit_reiserfs_fs(void)
5246 {
052932c9
AM
5247diff -urN linux-2.4.22.org/fs/reiserfs/tail_conversion.c linux-2.4.22/fs/reiserfs/tail_conversion.c
5248--- linux-2.4.22.org/fs/reiserfs/tail_conversion.c 2003-11-21 15:08:29.000000000 +0100
5249+++ linux-2.4.22/fs/reiserfs/tail_conversion.c 2003-11-21 15:14:25.000000000 +0100
e57e653a
JR
5250@@ -66,11 +66,11 @@
5251 set_ih_free_space (&ind_ih, 0); /* delete at nearest future */
5252 put_ih_item_len( &ind_ih, UNFM_P_SIZE );
5253 PATH_LAST_POSITION (path)++;
5254- n_retval = reiserfs_insert_item (th, path, &end_key, &ind_ih,
5255+ n_retval = reiserfs_insert_item (th, path, &end_key, &ind_ih, inode,
5256 (char *)&unfm_ptr);
5257 } else {
5258 /* Paste into last indirect item of an object. */
5259- n_retval = reiserfs_paste_into_item(th, path, &end_key,
5260+ n_retval = reiserfs_paste_into_item(th, path, &end_key, inode,
5261 (char *)&unfm_ptr, UNFM_P_SIZE);
5262 }
5263 if ( n_retval ) {
5264@@ -152,39 +152,6 @@
5265 }
5266 }
5267
5268-static void
5269-unmap_buffers(struct page *page, loff_t pos) {
5270- struct buffer_head *bh ;
5271- struct buffer_head *head ;
5272- struct buffer_head *next ;
5273- unsigned long tail_index ;
5274- unsigned long cur_index ;
5275-
5276- if (page) {
5277- if (page->buffers) {
5278- tail_index = pos & (PAGE_CACHE_SIZE - 1) ;
5279- cur_index = 0 ;
5280- head = page->buffers ;
5281- bh = head ;
5282- do {
5283- next = bh->b_this_page ;
5284-
5285- /* we want to unmap the buffers that contain the tail, and
5286- ** all the buffers after it (since the tail must be at the
5287- ** end of the file). We don't want to unmap file data
5288- ** before the tail, since it might be dirty and waiting to
5289- ** reach disk
5290- */
5291- cur_index += bh->b_size ;
5292- if (cur_index > tail_index) {
5293- reiserfs_unmap_buffer(bh) ;
5294- }
5295- bh = next ;
5296- } while (bh != head) ;
5297- }
5298- }
5299-}
5300-
5301 /* this first locks inode (neither reads nor sync are permitted),
5302 reads tail through page cache, insert direct item. When direct item
5303 inserted successfully inode is left locked. Return value is always
5304@@ -261,7 +228,7 @@
5305 set_cpu_key_k_type (&key, TYPE_DIRECT);
5306 key.key_length = 4;
5307 /* Insert tail as new direct item in the tree */
5308- if ( reiserfs_insert_item(th, p_s_path, &key, &s_ih,
5309+ if ( reiserfs_insert_item(th, p_s_path, &key, &s_ih, p_s_inode,
5310 tail ? tail : NULL) < 0 ) {
5311 /* No disk memory. So we can not convert last unformatted node
5312 to the direct item. In this case we used to adjust
5313@@ -274,10 +241,8 @@
5314 }
5315 kunmap(page) ;
5316
5317- /* this will invalidate all the buffers in the page after
5318- ** pos1
5319- */
5320- unmap_buffers(page, pos1) ;
5321+ /* make sure to get the i_blocks changes from reiserfs_insert_item */
5322+ reiserfs_update_sd(th, p_s_inode);
5323
5324 // note: we have now the same as in above direct2indirect
5325 // conversion: there are two keys which have matching first three
5326@@ -285,7 +250,6 @@
5327
5328 /* We have inserted new direct item and must remove last
5329 unformatted node. */
5330- p_s_inode->i_blocks += (p_s_sb->s_blocksize / 512);
5331 *p_c_mode = M_CUT;
5332
5333 /* we store position of first direct item in the in-core inode */
052932c9
AM
5334diff -urN linux-2.4.22.org/include/linux/fs.h linux-2.4.22/include/linux/fs.h
5335--- linux-2.4.22.org/include/linux/fs.h 2003-11-21 15:08:34.000000000 +0100
5336+++ linux-2.4.22/include/linux/fs.h 2003-11-21 15:14:25.000000000 +0100
5337@@ -1222,6 +1222,8 @@
e57e653a
JR
5338 return test_and_set_bit(BH_Dirty, &bh->b_state);
5339 }
5340
5341+extern void buffer_insert_list_journal_head(struct buffer_head *bh, struct list_head *list, void *journal_head);
5342+
5343 static inline void mark_buffer_async(struct buffer_head * bh, int on)
5344 {
5345 if (on)
052932c9 5346@@ -1508,6 +1510,7 @@
e57e653a
JR
5347 /* Generic buffer handling for block filesystems.. */
5348 extern int try_to_release_page(struct page * page, int gfp_mask);
5349 extern int discard_bh_page(struct page *, unsigned long, int);
5350+extern void discard_buffer(struct buffer_head *bh) ;
5351 #define block_flushpage(page, offset) discard_bh_page(page, offset, 1)
5352 #define block_invalidate_page(page) discard_bh_page(page, 0, 0)
5353 extern int block_symlink(struct inode *, const char *, int);
052932c9
AM
5354diff -urN linux-2.4.22.org/include/linux/reiserfs_fs.h linux-2.4.22/include/linux/reiserfs_fs.h
5355--- linux-2.4.22.org/include/linux/reiserfs_fs.h 2003-11-21 15:08:34.000000000 +0100
5356+++ linux-2.4.22/include/linux/reiserfs_fs.h 2003-11-21 15:14:25.000000000 +0100
e57e653a
JR
5357@@ -266,6 +266,7 @@
5358 #define NO_DISK_SPACE -3
5359 #define NO_BALANCING_NEEDED (-4)
5360 #define NO_MORE_UNUSED_CONTIGUOUS_BLOCKS (-5)
5361+#define QUOTA_EXCEEDED -6
5362
5363 typedef unsigned long b_blocknr_t;
5364 typedef __u32 unp_t;
5365@@ -1329,8 +1330,7 @@
5366 #define fs_generation(s) ((s)->u.reiserfs_sb.s_generation_counter)
5367 #define get_generation(s) atomic_read (&fs_generation(s))
5368 #define FILESYSTEM_CHANGED_TB(tb) (get_generation((tb)->tb_sb) != (tb)->fs_gen)
5369-#define fs_changed(gen,s) (gen != get_generation (s))
5370-
5371+#define fs_changed(gen,s) (gen != get_generation(s))
5372
5373 /***************************************************************************/
5374 /* FIXATE NODES */
5375@@ -1653,6 +1653,86 @@
5376 /* 12 */ struct journal_params jh_journal;
5377 } ;
5378
5379+static inline int
5380+reiserfs_file_data_log(struct inode *inode) {
5381+ if (reiserfs_data_log(inode->i_sb) ||
5382+ (inode->u.reiserfs_i.i_flags & i_data_log))
5383+ {
5384+ return 1 ;
5385+ }
5386+ return 0 ;
5387+}
5388+
5389+/* flags for the nested transaction handle */
5390+#define REISERFS_PERSISTENT_HANDLE 1
5391+#define REISERFS_ACTIVE_HANDLE 2
5392+#define REISERFS_CLOSE_NESTED 4
5393+#define REISERFS_DANGLING_HANDLE 8
5394+/*
5395+** transaction handle which is passed around for all journal calls
5396+*/
5397+struct reiserfs_transaction_handle {
5398+ struct super_block *t_super ; /* super for this FS when journal_begin was
5399+ called. saves calls to reiserfs_get_super
5400+ also used by nested transactions to make
5401+ sure they are nesting on the right FS
5402+ _must_ be first in the handle
5403+ */
5404+ int t_refcount;
5405+ int t_blocks_logged ; /* number of blocks this writer has logged */
5406+ int t_blocks_allocated ; /* number of blocks this writer allocated */
5407+ unsigned long t_trans_id ; /* sanity check, equals the current trans id */
5408+ int t_flags ;
5409+ void *t_handle_save ; /* save existing current->journal_info */
5410+ int displace_new_blocks:1; /* if new block allocation occurs, that
5411+ block should be displaced from others */
5412+} ;
5413+
5414+static inline int
5415+reiserfs_dangling_handle(struct reiserfs_transaction_handle *th) {
5416+ return (th && (th->t_flags & REISERFS_DANGLING_HANDLE)) ;
5417+}
5418+
5419+static inline void
5420+reiserfs_set_handle_dangling(struct reiserfs_transaction_handle *th) {
5421+ th->t_flags |= REISERFS_DANGLING_HANDLE ;
5422+}
5423+
5424+static inline void
5425+reiserfs_clear_handle_dangling(struct reiserfs_transaction_handle *th) {
5426+ th->t_flags &= ~REISERFS_DANGLING_HANDLE ;
5427+}
5428+
5429+static inline int
5430+reiserfs_persistent_handle(struct reiserfs_transaction_handle *th) {
5431+ return (th && (th->t_flags & REISERFS_PERSISTENT_HANDLE)) ;
5432+}
5433+
5434+static inline void
5435+reiserfs_set_handle_persistent(struct reiserfs_transaction_handle *th) {
5436+ th->t_flags |= REISERFS_PERSISTENT_HANDLE ;
5437+}
5438+
5439+static inline int
5440+reiserfs_active_handle(struct reiserfs_transaction_handle *th) {
5441+ return (th && (th->t_flags & REISERFS_ACTIVE_HANDLE)) ;
5442+}
5443+
5444+static inline void
5445+reiserfs_set_handle_active(struct reiserfs_transaction_handle *th) {
5446+ th->t_flags |= REISERFS_ACTIVE_HANDLE ;
5447+}
5448+
5449+static inline int
5450+reiserfs_restartable_handle(struct reiserfs_transaction_handle *th) {
5451+ return (th && (th->t_flags & REISERFS_CLOSE_NESTED)) ;
5452+}
5453+
5454+static inline void
5455+reiserfs_set_handle_restartable(struct reiserfs_transaction_handle *th) {
5456+ th->t_flags |= REISERFS_CLOSE_NESTED ;
5457+}
5458+
5459 extern task_queue reiserfs_commit_thread_tq ;
5460 extern wait_queue_head_t reiserfs_commit_thread_wait ;
5461
5462@@ -1693,6 +1773,8 @@
5463 */
5464 #define JOURNAL_BUFFER(j,n) ((j)->j_ap_blocks[((j)->j_start + (n)) % JOURNAL_BLOCK_COUNT])
5465
5466+int reiserfs_journal_cache_init(void);
5467+int reiserfs_flush_old_commits(struct super_block *);
5468 void reiserfs_commit_for_inode(struct inode *) ;
5469 void reiserfs_commit_for_tail(struct inode *) ;
5470 void reiserfs_update_inode_transaction(struct inode *) ;
5471@@ -1701,6 +1783,18 @@
5472 void reiserfs_block_writes(struct reiserfs_transaction_handle *th) ;
5473 void reiserfs_allow_writes(struct super_block *s) ;
5474 void reiserfs_check_lock_depth(char *caller) ;
5475+int journal_mark_dirty(struct reiserfs_transaction_handle *,
5476+ struct super_block *, struct buffer_head *bh) ;
5477+
5478+static inline int reiserfs_transaction_running(struct super_block *s) {
5479+ struct reiserfs_transaction_handle *th = current->journal_info ;
5480+ if (th && th->t_super == s)
5481+ return 1 ;
5482+ if (th && th->t_super == NULL)
5483+ BUG();
5484+ return 0 ;
5485+}
5486+
5487 void reiserfs_prepare_for_journal(struct super_block *, struct buffer_head *bh, int wait) ;
5488 void reiserfs_restore_prepared_buffer(struct super_block *, struct buffer_head *bh) ;
5489 struct buffer_head * journal_bread (struct super_block *s, int block);
5490@@ -1716,8 +1810,14 @@
5491 int push_journal_writer(char *w) ;
5492 int pop_journal_writer(int windex) ;
5493 int journal_transaction_should_end(struct reiserfs_transaction_handle *, int) ;
5494+int reiserfs_restart_transaction(struct reiserfs_transaction_handle *, int) ;
5495 int reiserfs_in_journal(struct super_block *p_s_sb, kdev_t dev, int bmap_nr, int bit_nr, int size, int searchall, unsigned int *next) ;
5496 int journal_begin(struct reiserfs_transaction_handle *, struct super_block *p_s_sb, unsigned long) ;
5497+
5498+/* allocates a transaction handle, and starts a new transaction it */
5499+struct reiserfs_transaction_handle *
5500+reiserfs_persistent_transaction(struct super_block *p_s_sb, unsigned long) ;
5501+
5502 struct super_block *reiserfs_get_super(kdev_t dev) ;
5503 void flush_async_commits(struct super_block *p_s_sb) ;
5504
5505@@ -1833,11 +1933,13 @@
5506 int reiserfs_insert_item (struct reiserfs_transaction_handle *th,
5507 struct path * path,
5508 const struct cpu_key * key,
5509- struct item_head * ih, const char * body);
5510+ struct item_head * ih,
5511+ struct inode *inode, const char * body);
5512
5513 int reiserfs_paste_into_item (struct reiserfs_transaction_handle *th,
5514 struct path * path,
5515 const struct cpu_key * key,
5516+ struct inode *inode,
5517 const char * body, int paste_size);
5518
5519 int reiserfs_cut_from_item (struct reiserfs_transaction_handle *th,
5520@@ -1854,7 +1956,7 @@
5521 struct buffer_head * p_s_un_bh);
5522
5523 void reiserfs_delete_solid_item (struct reiserfs_transaction_handle *th,
5524- struct key * key);
5525+ struct inode *inode, struct key * key);
5526 void reiserfs_delete_object (struct reiserfs_transaction_handle *th, struct inode * p_s_inode);
5527 void reiserfs_do_truncate (struct reiserfs_transaction_handle *th,
5528 struct inode * p_s_inode, struct page *,
5529@@ -1895,8 +1997,18 @@
5530 int i_size,
5531 struct dentry *dentry,
5532 struct inode *inode);
5533-int reiserfs_sync_inode (struct reiserfs_transaction_handle *th, struct inode * inode);
5534-void reiserfs_update_sd (struct reiserfs_transaction_handle *th, struct inode * inode);
5535+
5536+int reiserfs_sync_inode (struct reiserfs_transaction_handle *th,
5537+ struct inode * inode);
5538+
5539+void reiserfs_update_sd_size (struct reiserfs_transaction_handle *th,
5540+ struct inode * inode, loff_t size);
5541+
5542+static inline void reiserfs_update_sd(struct reiserfs_transaction_handle *th,
5543+ struct inode *inode)
5544+{
5545+ reiserfs_update_sd_size(th, inode, inode->i_size) ;
5546+}
5547
5548 void sd_attrs_to_i_attrs( __u16 sd_attrs, struct inode *inode );
5549 void i_attrs_to_sd_attrs( struct inode *inode, __u16 *sd_attrs );
5550@@ -1981,7 +2093,7 @@
5551 extern struct inode_operations reiserfs_file_inode_operations;
5552 extern struct file_operations reiserfs_file_operations;
5553 extern struct address_space_operations reiserfs_address_space_operations ;
5554-int get_new_buffer (struct reiserfs_transaction_handle *th, struct buffer_head *,
5555+int get_new_buffer (struct reiserfs_transaction_handle *th, struct inode *, struct buffer_head *,
5556 struct buffer_head **, struct path *);
5557
5558
5559@@ -2095,7 +2207,7 @@
5560
5561 int reiserfs_parse_alloc_options (struct super_block *, char *);
5562 int is_reusable (struct super_block * s, unsigned long block, int bit_value);
5563-void reiserfs_free_block (struct reiserfs_transaction_handle *th, unsigned long);
5564+void reiserfs_free_block (struct reiserfs_transaction_handle *th, struct inode *inode, unsigned long, int);
5565 int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t *, b_blocknr_t * , int, int);
5566 extern inline int reiserfs_new_form_blocknrs (struct tree_balance * tb,
5567 b_blocknr_t *new_blocknrs, int amount_needed)
052932c9
AM
5568diff -urN linux-2.4.22.org/include/linux/reiserfs_fs_i.h linux-2.4.22/include/linux/reiserfs_fs_i.h
5569--- linux-2.4.22.org/include/linux/reiserfs_fs_i.h 2003-11-21 15:08:34.000000000 +0100
5570+++ linux-2.4.22/include/linux/reiserfs_fs_i.h 2003-11-21 15:14:25.000000000 +0100
e57e653a
JR
5571@@ -6,6 +6,8 @@
5572
5573 #include <linux/list.h>
5574
5575+struct reiserfs_journal_list;
5576+
5577 /** bitmasks for i_flags field in reiserfs-specific part of inode */
5578 typedef enum {
5579 /** this says what format of key do all items (but stat data) of
5580@@ -23,7 +25,9 @@
5581 truncate or unlink. Safe link is used to avoid leakage of disk
5582 space on crash with some files open, but unlinked. */
5583 i_link_saved_unlink_mask = 0x0010,
5584- i_link_saved_truncate_mask = 0x0020
5585+ i_link_saved_truncate_mask = 0x0020,
5586+ /** are we logging data blocks for this file? */
5587+ i_data_log = 0x0040,
5588 } reiserfs_inode_flags;
5589
5590
5591@@ -52,14 +56,14 @@
5592 ** needs to be committed in order for this inode to be properly
5593 ** flushed */
5594 unsigned long i_trans_id ;
5595- unsigned long i_trans_index ;
5596+ struct reiserfs_journal_list *i_jl;
5597
5598 /* direct io needs to make sure the tail is on disk to avoid
5599 * buffer alias problems. This records the transaction last
5600 * involved in a direct->indirect conversion for this file
5601 */
5602 unsigned long i_tail_trans_id;
5603- unsigned long i_tail_trans_index;
5604+ struct reiserfs_journal_list *i_tail_jl;
5605 };
5606
5607 #endif
052932c9
AM
5608diff -urN linux-2.4.22.org/include/linux/reiserfs_fs_sb.h linux-2.4.22/include/linux/reiserfs_fs_sb.h
5609--- linux-2.4.22.org/include/linux/reiserfs_fs_sb.h 2003-11-21 15:08:34.000000000 +0100
5610+++ linux-2.4.22/include/linux/reiserfs_fs_sb.h 2003-11-21 15:14:25.000000000 +0100
e57e653a
JR
5611@@ -120,7 +120,6 @@
5612 #define JOURNAL_MAX_CNODE 1500 /* max cnodes to allocate. */
5613 #define JOURNAL_HASH_SIZE 8192
5614 #define JOURNAL_NUM_BITMAPS 5 /* number of copies of the bitmaps to have floating. Must be >= 2 */
5615-#define JOURNAL_LIST_COUNT 64
5616
5617 /* these are bh_state bit flag offset numbers, for use in the buffer head */
5618
5619@@ -167,20 +166,27 @@
5620 struct reiserfs_bitmap_node **bitmaps ;
5621 } ;
5622
5623-/*
5624-** transaction handle which is passed around for all journal calls
5625-*/
5626-struct reiserfs_transaction_handle {
5627- /* ifdef it. -Hans */
5628- char *t_caller ; /* debugging use */
5629- int t_blocks_logged ; /* number of blocks this writer has logged */
5630- int t_blocks_allocated ; /* number of blocks this writer allocated */
5631- unsigned long t_trans_id ; /* sanity check, equals the current trans id */
5632- struct super_block *t_super ; /* super for this FS when journal_begin was
5633- called. saves calls to reiserfs_get_super */
5634- int displace_new_blocks:1; /* if new block allocation occurres, that block
5635- should be displaced from others */
5636-} ;
5637+struct reiserfs_journal_list;
5638+
5639+/* so, we're using fsync_buffers_list to do the ordered buffer writes,
5640+ * but we don't want to have a full inode on each buffer list, it is
5641+ * a big waste of space.
5642+ *
5643+ * instead we copy the very head of the inode into a list here, a kludge
5644+ * but much smaller.
5645+ */
5646+struct reiserfs_inode_list {
5647+ struct list_head i_hash;
5648+ struct list_head i_list;
5649+ struct list_head i_dentry;
5650+ struct list_head i_dirty_buffers;
5651+
5652+ /* we could be very smart and do math based on the location
5653+ * of the inode list in the journal list struct.
5654+ * lets do that after this works properly
5655+ */
5656+ struct reiserfs_journal_list *jl;
5657+};
5658
5659 /*
5660 ** one of these for each transaction. The most important part here is the j_realblock.
5661@@ -190,20 +196,32 @@
5662 ** to be overwritten */
5663 struct reiserfs_journal_list {
5664 unsigned long j_start ;
5665+ unsigned long j_state ;
5666 unsigned long j_len ;
5667 atomic_t j_nonzerolen ;
5668 atomic_t j_commit_left ;
5669- atomic_t j_flushing ;
5670- atomic_t j_commit_flushing ;
5671 atomic_t j_older_commits_done ; /* all commits older than this on disk*/
5672+ struct semaphore j_commit_lock ;
5673 unsigned long j_trans_id ;
5674 time_t j_timestamp ;
5675 struct reiserfs_list_bitmap *j_list_bitmap ;
5676 struct buffer_head *j_commit_bh ; /* commit buffer head */
5677 struct reiserfs_journal_cnode *j_realblock ;
5678 struct reiserfs_journal_cnode *j_freedlist ; /* list of buffers that were freed during this trans. free each of these on flush */
5679- wait_queue_head_t j_commit_wait ; /* wait for all the commit blocks to be flushed */
5680- wait_queue_head_t j_flush_wait ; /* wait for all the real blocks to be flushed */
5681+
5682+ /* time ordered list of all the active transactions */
5683+ struct list_head j_list;
5684+
5685+ /* time ordered list of all transactions not touched by kreiserfsd */
5686+ struct list_head j_working_list;
5687+
5688+ /* for data=ordered support */
5689+ struct list_head j_ordered_bh_list;
5690+
5691+ /* sigh, the tails have slightly different rules for flushing, they
5692+ * need their own list
5693+ */
5694+ struct list_head j_tail_bh_list;
5695 } ;
5696
5697 struct reiserfs_page_list ; /* defined in reiserfs_fs.h */
5698@@ -230,16 +248,11 @@
5699 unsigned long j_last_flush_trans_id ; /* last fully flushed journal timestamp */
5700 struct buffer_head *j_header_bh ;
5701
5702- /* j_flush_pages must be flushed before the current transaction can
5703- ** commit
5704- */
5705- struct reiserfs_page_list *j_flush_pages ;
5706 time_t j_trans_start_time ; /* time this transaction started */
5707- wait_queue_head_t j_wait ; /* wait journal_end to finish I/O */
5708- atomic_t j_wlock ; /* lock for j_wait */
5709+ struct semaphore j_lock ;
5710+ struct semaphore j_flush_sem ;
5711 wait_queue_head_t j_join_wait ; /* wait for current transaction to finish before starting new one */
5712 atomic_t j_jlock ; /* lock for j_join_wait */
5713- int j_journal_list_index ; /* journal list number of the current trans */
5714 int j_list_bitmap_index ; /* number of next list bitmap to use */
5715 int j_must_wait ; /* no more journal begins allowed. MUST sleep on j_join_wait */
5716 int j_next_full_flush ; /* next journal_end will flush all journal list */
5717@@ -255,13 +268,28 @@
5718
5719 struct reiserfs_journal_cnode *j_cnode_free_list ;
5720 struct reiserfs_journal_cnode *j_cnode_free_orig ; /* orig pointer returned from vmalloc */
5721+ struct reiserfs_journal_list *j_current_jl;
5722
5723 int j_free_bitmap_nodes ;
5724 int j_used_bitmap_nodes ;
5725+ int j_num_lists; /* total number of active transactions */
5726+ int j_num_work_lists; /* number that need attention from kreiserfsd */
5727+
5728+ /* debugging to make sure things are flushed in order */
5729+ int j_last_flush_id;
5730+
5731+ /* debugging to make sure things are committed in order */
5732+ int j_last_commit_id;
5733+
5734 struct list_head j_bitmap_nodes ;
5735- struct list_head j_dirty_buffers ;
5736+
5737+ /* list of all active transactions */
5738+ struct list_head j_journal_list;
5739+
5740+ /* lists that haven't been touched by kreiserfsd */
5741+ struct list_head j_working_list;
5742+
5743 struct reiserfs_list_bitmap j_list_bitmap[JOURNAL_NUM_BITMAPS] ; /* array of bitmaps to record the deleted blocks */
5744- struct reiserfs_journal_list j_journal_list[JOURNAL_LIST_COUNT] ; /* array of all the journal lists */
5745 struct reiserfs_journal_cnode *j_hash_table[JOURNAL_HASH_SIZE] ; /* hash table for real buffer heads in current trans */
5746 struct reiserfs_journal_cnode *j_list_hash_table[JOURNAL_HASH_SIZE] ; /* hash table for all the real buffer heads in all
5747 the transactions */
5748@@ -413,6 +441,7 @@
5749 reiserfs_proc_info_data_t s_proc_info_data;
5750 struct proc_dir_entry *procdir;
5751 int reserved_blocks; /* amount of blocks reserved for further allocations */
5752+ struct list_head s_reiserfs_supers;
5753 };
5754
5755 /* Definitions of reiserfs on-disk properties: */
5756@@ -420,11 +449,12 @@
5757 #define REISERFS_3_6 1
5758
5759 /* Mount options */
5760-#define REISERFS_LARGETAIL 0 /* large tails will be created in a session */
5761-#define REISERFS_SMALLTAIL 17 /* small (for files less than block size) tails will be created in a session */
5762-#define REPLAYONLY 3 /* replay journal and return 0. Use by fsck */
5763-#define REISERFS_NOLOG 4 /* -o nolog: turn journalling off */
5764-#define REISERFS_CONVERT 5 /* -o conv: causes conversion of old
5765+enum {
5766+ REISERFS_LARGETAIL, /* large tails will be created in a session */
5767+ REISERFS_SMALLTAIL, /* small (for files less than block size) tails will be created in a session */
5768+ REPLAYONLY, /* replay journal and return 0. Use by fsck */
5769+ REISERFS_NOLOG, /* -o nolog: turn journalling off */
5770+ REISERFS_CONVERT, /* -o conv: causes conversion of old
5771 format super block to the new
5772 format. If not specified - old
5773 partition will be dealt with in a
5774@@ -438,27 +468,25 @@
5775 ** the existing hash on the FS, so if you have a tea hash disk, and mount
5776 ** with -o hash=rupasov, the mount will fail.
5777 */
5778-#define FORCE_TEA_HASH 6 /* try to force tea hash on mount */
5779-#define FORCE_RUPASOV_HASH 7 /* try to force rupasov hash on mount */
5780-#define FORCE_R5_HASH 8 /* try to force rupasov hash on mount */
5781-#define FORCE_HASH_DETECT 9 /* try to detect hash function on mount */
5782+ FORCE_TEA_HASH, /* try to force tea hash on mount */
5783+ FORCE_RUPASOV_HASH, /* try to force rupasov hash on mount */
5784+ FORCE_R5_HASH, /* try to force rupasov hash on mount */
5785+ FORCE_HASH_DETECT, /* try to detect hash function on mount */
5786
5787
5788 /* used for testing experimental features, makes benchmarking new
5789 features with and without more convenient, should never be used by
5790 users in any code shipped to users (ideally) */
5791
5792-#define REISERFS_NO_BORDER 11
5793-#define REISERFS_NO_UNHASHED_RELOCATION 12
5794-#define REISERFS_HASHED_RELOCATION 13
5795-#define REISERFS_TEST4 14
5796-
5797-#define REISERFS_TEST1 11
5798-#define REISERFS_TEST2 12
5799-#define REISERFS_TEST3 13
5800-#define REISERFS_TEST4 14
5801-
5802-#define REISERFS_ATTRS (15)
5803+ REISERFS_NO_BORDER,
5804+ REISERFS_NO_UNHASHED_RELOCATION,
5805+ REISERFS_HASHED_RELOCATION,
5806+ REISERFS_DATA_LOG,
5807+ REISERFS_DATA_ORDERED,
5808+ REISERFS_DATA_WRITEBACK,
5809+ REISERFS_ATTRS,
5810+ REISERFS_TEST4,
5811+};
5812
5813 #define reiserfs_r5_hash(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << FORCE_R5_HASH))
5814 #define reiserfs_rupasov_hash(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << FORCE_RUPASOV_HASH))
5815@@ -467,6 +495,9 @@
5816 #define reiserfs_no_border(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_NO_BORDER))
5817 #define reiserfs_no_unhashed_relocation(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_NO_UNHASHED_RELOCATION))
5818 #define reiserfs_hashed_relocation(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_HASHED_RELOCATION))
5819+#define reiserfs_data_log(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_DATA_LOG))
5820+#define reiserfs_data_ordered(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_DATA_ORDERED))
5821+#define reiserfs_data_writeback(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_DATA_WRITEBACK))
5822 #define reiserfs_test4(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_TEST4))
5823
5824 #define have_large_tails(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_LARGETAIL))
5825@@ -480,8 +511,6 @@
5826
5827 void reiserfs_file_buffer (struct buffer_head * bh, int list);
5828 int reiserfs_is_super(struct super_block *s) ;
5829-int journal_mark_dirty(struct reiserfs_transaction_handle *, struct super_block *, struct buffer_head *bh) ;
5830-int flush_old_commits(struct super_block *s, int) ;
5831 int show_reiserfs_locks(void) ;
5832 int reiserfs_resize(struct super_block *, unsigned long) ;
5833
5834@@ -492,8 +521,6 @@
5835 #define SB_BUFFER_WITH_SB(s) ((s)->u.reiserfs_sb.s_sbh)
5836 #define SB_JOURNAL(s) ((s)->u.reiserfs_sb.s_journal)
5837 #define SB_JOURNAL_1st_RESERVED_BLOCK(s) (SB_JOURNAL(s)->j_1st_reserved_block)
5838-#define SB_JOURNAL_LIST(s) (SB_JOURNAL(s)->j_journal_list)
5839-#define SB_JOURNAL_LIST_INDEX(s) (SB_JOURNAL(s)->j_journal_list_index)
5840 #define SB_JOURNAL_LEN_FREE(s) (SB_JOURNAL(s)->j_journal_len_free)
5841 #define SB_AP_BITMAP(s) ((s)->u.reiserfs_sb.s_ap_bitmap)
5842
052932c9
AM
5843diff -urN linux-2.4.22.org/kernel/ksyms.c linux-2.4.22/kernel/ksyms.c
5844--- linux-2.4.22.org/kernel/ksyms.c 2003-11-21 15:08:31.000000000 +0100
5845+++ linux-2.4.22/kernel/ksyms.c 2003-11-21 15:15:21.000000000 +0100
5846@@ -182,6 +182,7 @@
e57e653a 5847 EXPORT_SYMBOL(end_buffer_io_async);
052932c9
AM
5848 EXPORT_SYMBOL(__mark_buffer_dirty);
5849 EXPORT_SYMBOL(__mark_inode_dirty);
5850+EXPORT_SYMBOL(discard_buffer); /* for FS flushpage funcs */
5851 EXPORT_SYMBOL(fd_install);
5852 EXPORT_SYMBOL(get_empty_filp);
5853 EXPORT_SYMBOL(init_private_file);
5854diff -urN linux-2.4.22.org/mm/filemap.c linux-2.4.22/mm/filemap.c
5855--- linux-2.4.22.org/mm/filemap.c 2003-11-21 15:08:31.000000000 +0100
5856+++ linux-2.4.22/mm/filemap.c 2003-11-21 15:14:25.000000000 +0100
5857@@ -3041,6 +3041,14 @@
5858 }
e57e653a
JR
5859 }
5860
5861+static void update_inode_times(struct inode *inode)
5862+{
5863+ time_t now = CURRENT_TIME;
5864+ if (inode->i_ctime != now || inode->i_mtime != now) {
5865+ inode->i_ctime = inode->i_mtime = now;
5866+ mark_inode_dirty_sync(inode);
5867+ }
5868+}
5869 /*
052932c9
AM
5870 * precheck_file_write():
5871 * Check the conditions on a file descriptor prior to beginning a write
5872@@ -3302,8 +3310,7 @@
e57e653a
JR
5873 BUG();
5874
5875 remove_suid(inode);
5876- inode->i_ctime = inode->i_mtime = CURRENT_TIME;
5877- mark_inode_dirty_sync(inode);
5878+ update_inode_times(inode);
5879
5880 written = generic_file_direct_IO(WRITE, file, (char *) buf, count, pos);
5881 if (written > 0) {
This page took 2.218586 seconds and 4 git commands to generate.